anv: set error in all failure paths
[mesa.git] / src / intel / vulkan / anv_pipeline.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29
30 #include "util/mesa-sha1.h"
31 #include "common/gen_l3_config.h"
32 #include "anv_private.h"
33 #include "compiler/brw_nir.h"
34 #include "anv_nir.h"
35 #include "spirv/nir_spirv.h"
36 #include "vk_util.h"
37
38 /* Needed for SWIZZLE macros */
39 #include "program/prog_instruction.h"
40
41 // Shader functions
42
43 VkResult anv_CreateShaderModule(
44 VkDevice _device,
45 const VkShaderModuleCreateInfo* pCreateInfo,
46 const VkAllocationCallbacks* pAllocator,
47 VkShaderModule* pShaderModule)
48 {
49 ANV_FROM_HANDLE(anv_device, device, _device);
50 struct anv_shader_module *module;
51
52 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO);
53 assert(pCreateInfo->flags == 0);
54
55 module = vk_alloc2(&device->alloc, pAllocator,
56 sizeof(*module) + pCreateInfo->codeSize, 8,
57 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
58 if (module == NULL)
59 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
60
61 module->size = pCreateInfo->codeSize;
62 memcpy(module->data, pCreateInfo->pCode, module->size);
63
64 _mesa_sha1_compute(module->data, module->size, module->sha1);
65
66 *pShaderModule = anv_shader_module_to_handle(module);
67
68 return VK_SUCCESS;
69 }
70
71 void anv_DestroyShaderModule(
72 VkDevice _device,
73 VkShaderModule _module,
74 const VkAllocationCallbacks* pAllocator)
75 {
76 ANV_FROM_HANDLE(anv_device, device, _device);
77 ANV_FROM_HANDLE(anv_shader_module, module, _module);
78
79 if (!module)
80 return;
81
82 vk_free2(&device->alloc, pAllocator, module);
83 }
84
85 #define SPIR_V_MAGIC_NUMBER 0x07230203
86
87 static const uint64_t stage_to_debug[] = {
88 [MESA_SHADER_VERTEX] = DEBUG_VS,
89 [MESA_SHADER_TESS_CTRL] = DEBUG_TCS,
90 [MESA_SHADER_TESS_EVAL] = DEBUG_TES,
91 [MESA_SHADER_GEOMETRY] = DEBUG_GS,
92 [MESA_SHADER_FRAGMENT] = DEBUG_WM,
93 [MESA_SHADER_COMPUTE] = DEBUG_CS,
94 };
95
96 /* Eventually, this will become part of anv_CreateShader. Unfortunately,
97 * we can't do that yet because we don't have the ability to copy nir.
98 */
99 static nir_shader *
100 anv_shader_compile_to_nir(struct anv_pipeline *pipeline,
101 void *mem_ctx,
102 const struct anv_shader_module *module,
103 const char *entrypoint_name,
104 gl_shader_stage stage,
105 const VkSpecializationInfo *spec_info)
106 {
107 const struct anv_device *device = pipeline->device;
108
109 const struct brw_compiler *compiler =
110 device->instance->physicalDevice.compiler;
111 const nir_shader_compiler_options *nir_options =
112 compiler->glsl_compiler_options[stage].NirOptions;
113
114 uint32_t *spirv = (uint32_t *) module->data;
115 assert(spirv[0] == SPIR_V_MAGIC_NUMBER);
116 assert(module->size % 4 == 0);
117
118 uint32_t num_spec_entries = 0;
119 struct nir_spirv_specialization *spec_entries = NULL;
120 if (spec_info && spec_info->mapEntryCount > 0) {
121 num_spec_entries = spec_info->mapEntryCount;
122 spec_entries = malloc(num_spec_entries * sizeof(*spec_entries));
123 for (uint32_t i = 0; i < num_spec_entries; i++) {
124 VkSpecializationMapEntry entry = spec_info->pMapEntries[i];
125 const void *data = spec_info->pData + entry.offset;
126 assert(data + entry.size <= spec_info->pData + spec_info->dataSize);
127
128 spec_entries[i].id = spec_info->pMapEntries[i].constantID;
129 if (spec_info->dataSize == 8)
130 spec_entries[i].data64 = *(const uint64_t *)data;
131 else
132 spec_entries[i].data32 = *(const uint32_t *)data;
133 }
134 }
135
136 struct spirv_to_nir_options spirv_options = {
137 .lower_workgroup_access_to_offsets = true,
138 .caps = {
139 .float64 = device->instance->physicalDevice.info.gen >= 8,
140 .int64 = device->instance->physicalDevice.info.gen >= 8,
141 .tessellation = true,
142 .device_group = true,
143 .draw_parameters = true,
144 .image_write_without_format = true,
145 .multiview = true,
146 .variable_pointers = true,
147 .storage_16bit = device->instance->physicalDevice.info.gen >= 8,
148 .int16 = device->instance->physicalDevice.info.gen >= 8,
149 .shader_viewport_index_layer = true,
150 .subgroup_arithmetic = true,
151 .subgroup_basic = true,
152 .subgroup_ballot = true,
153 .subgroup_quad = true,
154 .subgroup_shuffle = true,
155 .subgroup_vote = true,
156 .stencil_export = device->instance->physicalDevice.info.gen >= 9,
157 .storage_8bit = device->instance->physicalDevice.info.gen >= 8,
158 .post_depth_coverage = device->instance->physicalDevice.info.gen >= 9,
159 },
160 };
161
162 nir_function *entry_point =
163 spirv_to_nir(spirv, module->size / 4,
164 spec_entries, num_spec_entries,
165 stage, entrypoint_name, &spirv_options, nir_options);
166 nir_shader *nir = entry_point->shader;
167 assert(nir->info.stage == stage);
168 nir_validate_shader(nir);
169 ralloc_steal(mem_ctx, nir);
170
171 free(spec_entries);
172
173 if (unlikely(INTEL_DEBUG & stage_to_debug[stage])) {
174 fprintf(stderr, "NIR (from SPIR-V) for %s shader:\n",
175 gl_shader_stage_name(stage));
176 nir_print_shader(nir, stderr);
177 }
178
179 /* We have to lower away local constant initializers right before we
180 * inline functions. That way they get properly initialized at the top
181 * of the function and not at the top of its caller.
182 */
183 NIR_PASS_V(nir, nir_lower_constant_initializers, nir_var_local);
184 NIR_PASS_V(nir, nir_lower_returns);
185 NIR_PASS_V(nir, nir_inline_functions);
186 NIR_PASS_V(nir, nir_copy_prop);
187
188 /* Pick off the single entrypoint that we want */
189 foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
190 if (func != entry_point)
191 exec_node_remove(&func->node);
192 }
193 assert(exec_list_length(&nir->functions) == 1);
194 entry_point->name = ralloc_strdup(entry_point, "main");
195
196 /* Now that we've deleted all but the main function, we can go ahead and
197 * lower the rest of the constant initializers. We do this here so that
198 * nir_remove_dead_variables and split_per_member_structs below see the
199 * corresponding stores.
200 */
201 NIR_PASS_V(nir, nir_lower_constant_initializers, ~0);
202
203 /* Split member structs. We do this before lower_io_to_temporaries so that
204 * it doesn't lower system values to temporaries by accident.
205 */
206 NIR_PASS_V(nir, nir_split_var_copies);
207 NIR_PASS_V(nir, nir_split_per_member_structs);
208
209 NIR_PASS_V(nir, nir_remove_dead_variables,
210 nir_var_shader_in | nir_var_shader_out | nir_var_system_value);
211
212 if (stage == MESA_SHADER_FRAGMENT)
213 NIR_PASS_V(nir, nir_lower_wpos_center, pipeline->sample_shading_enable);
214
215 NIR_PASS_V(nir, nir_propagate_invariant);
216 NIR_PASS_V(nir, nir_lower_io_to_temporaries,
217 entry_point->impl, true, false);
218
219 /* Vulkan uses the separate-shader linking model */
220 nir->info.separate_shader = true;
221
222 nir = brw_preprocess_nir(compiler, nir);
223
224 if (stage == MESA_SHADER_FRAGMENT)
225 NIR_PASS_V(nir, anv_nir_lower_input_attachments);
226
227 return nir;
228 }
229
230 void anv_DestroyPipeline(
231 VkDevice _device,
232 VkPipeline _pipeline,
233 const VkAllocationCallbacks* pAllocator)
234 {
235 ANV_FROM_HANDLE(anv_device, device, _device);
236 ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
237
238 if (!pipeline)
239 return;
240
241 anv_reloc_list_finish(&pipeline->batch_relocs,
242 pAllocator ? pAllocator : &device->alloc);
243 if (pipeline->blend_state.map)
244 anv_state_pool_free(&device->dynamic_state_pool, pipeline->blend_state);
245
246 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
247 if (pipeline->shaders[s])
248 anv_shader_bin_unref(device, pipeline->shaders[s]);
249 }
250
251 vk_free2(&device->alloc, pAllocator, pipeline);
252 }
253
254 static const uint32_t vk_to_gen_primitive_type[] = {
255 [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST,
256 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST,
257 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP,
258 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST,
259 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
260 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
261 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
262 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
263 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
264 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
265 };
266
267 static void
268 populate_sampler_prog_key(const struct gen_device_info *devinfo,
269 struct brw_sampler_prog_key_data *key)
270 {
271 /* Almost all multisampled textures are compressed. The only time when we
272 * don't compress a multisampled texture is for 16x MSAA with a surface
273 * width greater than 8k which is a bit of an edge case. Since the sampler
274 * just ignores the MCS parameter to ld2ms when MCS is disabled, it's safe
275 * to tell the compiler to always assume compression.
276 */
277 key->compressed_multisample_layout_mask = ~0;
278
279 /* SkyLake added support for 16x MSAA. With this came a new message for
280 * reading from a 16x MSAA surface with compression. The new message was
281 * needed because now the MCS data is 64 bits instead of 32 or lower as is
282 * the case for 8x, 4x, and 2x. The key->msaa_16 bit-field controls which
283 * message we use. Fortunately, the 16x message works for 8x, 4x, and 2x
284 * so we can just use it unconditionally. This may not be quite as
285 * efficient but it saves us from recompiling.
286 */
287 if (devinfo->gen >= 9)
288 key->msaa_16 = ~0;
289
290 /* XXX: Handle texture swizzle on HSW- */
291 for (int i = 0; i < MAX_SAMPLERS; i++) {
292 /* Assume color sampler, no swizzling. (Works for BDW+) */
293 key->swizzles[i] = SWIZZLE_XYZW;
294 }
295 }
296
297 static void
298 populate_vs_prog_key(const struct gen_device_info *devinfo,
299 struct brw_vs_prog_key *key)
300 {
301 memset(key, 0, sizeof(*key));
302
303 populate_sampler_prog_key(devinfo, &key->tex);
304
305 /* XXX: Handle vertex input work-arounds */
306
307 /* XXX: Handle sampler_prog_key */
308 }
309
310 static void
311 populate_tcs_prog_key(const struct gen_device_info *devinfo,
312 unsigned input_vertices,
313 struct brw_tcs_prog_key *key)
314 {
315 memset(key, 0, sizeof(*key));
316
317 populate_sampler_prog_key(devinfo, &key->tex);
318
319 key->input_vertices = input_vertices;
320 }
321
322 static void
323 populate_tes_prog_key(const struct gen_device_info *devinfo,
324 struct brw_tes_prog_key *key)
325 {
326 memset(key, 0, sizeof(*key));
327
328 populate_sampler_prog_key(devinfo, &key->tex);
329 }
330
331 static void
332 populate_gs_prog_key(const struct gen_device_info *devinfo,
333 struct brw_gs_prog_key *key)
334 {
335 memset(key, 0, sizeof(*key));
336
337 populate_sampler_prog_key(devinfo, &key->tex);
338 }
339
340 static void
341 populate_wm_prog_key(const struct gen_device_info *devinfo,
342 const struct anv_subpass *subpass,
343 const VkPipelineMultisampleStateCreateInfo *ms_info,
344 struct brw_wm_prog_key *key)
345 {
346 memset(key, 0, sizeof(*key));
347
348 populate_sampler_prog_key(devinfo, &key->tex);
349
350 /* We set this to 0 here and set to the actual value before we call
351 * brw_compile_fs.
352 */
353 key->input_slots_valid = 0;
354
355 /* Vulkan doesn't specify a default */
356 key->high_quality_derivatives = false;
357
358 /* XXX Vulkan doesn't appear to specify */
359 key->clamp_fragment_color = false;
360
361 assert(subpass->color_count <= MAX_RTS);
362 for (uint32_t i = 0; i < subpass->color_count; i++) {
363 if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED)
364 key->color_outputs_valid |= (1 << i);
365 }
366
367 key->nr_color_regions = _mesa_bitcount(key->color_outputs_valid);
368
369 key->replicate_alpha = key->nr_color_regions > 1 &&
370 ms_info && ms_info->alphaToCoverageEnable;
371
372 if (ms_info) {
373 /* We should probably pull this out of the shader, but it's fairly
374 * harmless to compute it and then let dead-code take care of it.
375 */
376 if (ms_info->rasterizationSamples > 1) {
377 key->persample_interp =
378 (ms_info->minSampleShading * ms_info->rasterizationSamples) > 1;
379 key->multisample_fbo = true;
380 }
381
382 key->frag_coord_adds_sample_pos = ms_info->sampleShadingEnable;
383 }
384 }
385
386 static void
387 populate_cs_prog_key(const struct gen_device_info *devinfo,
388 struct brw_cs_prog_key *key)
389 {
390 memset(key, 0, sizeof(*key));
391
392 populate_sampler_prog_key(devinfo, &key->tex);
393 }
394
395 struct anv_pipeline_stage {
396 gl_shader_stage stage;
397
398 const struct anv_shader_module *module;
399 const char *entrypoint;
400 const VkSpecializationInfo *spec_info;
401
402 union brw_any_prog_key key;
403
404 struct {
405 gl_shader_stage stage;
406 unsigned char sha1[20];
407 } cache_key;
408
409 nir_shader *nir;
410
411 struct anv_pipeline_binding surface_to_descriptor[256];
412 struct anv_pipeline_binding sampler_to_descriptor[256];
413 struct anv_pipeline_bind_map bind_map;
414
415 union brw_any_prog_data prog_data;
416 };
417
418 static void
419 anv_pipeline_hash_shader(struct mesa_sha1 *ctx,
420 struct anv_pipeline_stage *stage)
421 {
422 _mesa_sha1_update(ctx, stage->module->sha1, sizeof(stage->module->sha1));
423 _mesa_sha1_update(ctx, stage->entrypoint, strlen(stage->entrypoint));
424 _mesa_sha1_update(ctx, &stage->stage, sizeof(stage->stage));
425 if (stage->spec_info) {
426 _mesa_sha1_update(ctx, stage->spec_info->pMapEntries,
427 stage->spec_info->mapEntryCount *
428 sizeof(*stage->spec_info->pMapEntries));
429 _mesa_sha1_update(ctx, stage->spec_info->pData,
430 stage->spec_info->dataSize);
431 }
432 _mesa_sha1_update(ctx, &stage->key, brw_prog_key_size(stage->stage));
433 }
434
435 static void
436 anv_pipeline_hash_graphics(struct anv_pipeline *pipeline,
437 struct anv_pipeline_layout *layout,
438 struct anv_pipeline_stage *stages,
439 unsigned char *sha1_out)
440 {
441 struct mesa_sha1 ctx;
442 _mesa_sha1_init(&ctx);
443
444 _mesa_sha1_update(&ctx, &pipeline->subpass->view_mask,
445 sizeof(pipeline->subpass->view_mask));
446
447 if (layout)
448 _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
449
450 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
451 if (stages[s].entrypoint)
452 anv_pipeline_hash_shader(&ctx, &stages[s]);
453 }
454
455 _mesa_sha1_final(&ctx, sha1_out);
456 }
457
458 static void
459 anv_pipeline_hash_compute(struct anv_pipeline *pipeline,
460 struct anv_pipeline_layout *layout,
461 struct anv_pipeline_stage *stage,
462 unsigned char *sha1_out)
463 {
464 struct mesa_sha1 ctx;
465 _mesa_sha1_init(&ctx);
466
467 if (layout)
468 _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
469
470 anv_pipeline_hash_shader(&ctx, stage);
471
472 _mesa_sha1_final(&ctx, sha1_out);
473 }
474
475 static nir_shader *
476 anv_pipeline_compile(struct anv_pipeline *pipeline,
477 void *mem_ctx,
478 struct anv_pipeline_layout *layout,
479 struct anv_pipeline_stage *stage,
480 struct brw_stage_prog_data *prog_data,
481 struct anv_pipeline_bind_map *map)
482 {
483 const struct brw_compiler *compiler =
484 pipeline->device->instance->physicalDevice.compiler;
485
486 nir_shader *nir = anv_shader_compile_to_nir(pipeline, mem_ctx,
487 stage->module,
488 stage->entrypoint,
489 stage->stage,
490 stage->spec_info);
491 if (nir == NULL)
492 return NULL;
493
494 NIR_PASS_V(nir, anv_nir_lower_ycbcr_textures, layout);
495
496 NIR_PASS_V(nir, anv_nir_lower_push_constants);
497
498 if (nir->info.stage != MESA_SHADER_COMPUTE)
499 NIR_PASS_V(nir, anv_nir_lower_multiview, pipeline->subpass->view_mask);
500
501 if (nir->info.stage == MESA_SHADER_COMPUTE)
502 prog_data->total_shared = nir->num_shared;
503
504 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
505
506 if (nir->num_uniforms > 0) {
507 assert(prog_data->nr_params == 0);
508
509 /* If the shader uses any push constants at all, we'll just give
510 * them the maximum possible number
511 */
512 assert(nir->num_uniforms <= MAX_PUSH_CONSTANTS_SIZE);
513 nir->num_uniforms = MAX_PUSH_CONSTANTS_SIZE;
514 prog_data->nr_params += MAX_PUSH_CONSTANTS_SIZE / sizeof(float);
515 prog_data->param = ralloc_array(mem_ctx, uint32_t, prog_data->nr_params);
516
517 /* We now set the param values to be offsets into a
518 * anv_push_constant_data structure. Since the compiler doesn't
519 * actually dereference any of the gl_constant_value pointers in the
520 * params array, it doesn't really matter what we put here.
521 */
522 struct anv_push_constants *null_data = NULL;
523 /* Fill out the push constants section of the param array */
524 for (unsigned i = 0; i < MAX_PUSH_CONSTANTS_SIZE / sizeof(float); i++) {
525 prog_data->param[i] = ANV_PARAM_PUSH(
526 (uintptr_t)&null_data->client_data[i * sizeof(float)]);
527 }
528 }
529
530 if (nir->info.num_ssbos > 0 || nir->info.num_images > 0)
531 pipeline->needs_data_cache = true;
532
533 /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
534 if (layout)
535 anv_nir_apply_pipeline_layout(pipeline, layout, nir, prog_data, map);
536
537 if (nir->info.stage != MESA_SHADER_COMPUTE)
538 brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
539
540 assert(nir->num_uniforms == prog_data->nr_params * 4);
541
542 return nir;
543 }
544
545 static void
546 anv_fill_binding_table(struct brw_stage_prog_data *prog_data, unsigned bias)
547 {
548 prog_data->binding_table.size_bytes = 0;
549 prog_data->binding_table.texture_start = bias;
550 prog_data->binding_table.gather_texture_start = bias;
551 prog_data->binding_table.ubo_start = bias;
552 prog_data->binding_table.ssbo_start = bias;
553 prog_data->binding_table.image_start = bias;
554 }
555
556 static void
557 anv_pipeline_link_vs(const struct brw_compiler *compiler,
558 struct anv_pipeline_stage *vs_stage,
559 struct anv_pipeline_stage *next_stage)
560 {
561 anv_fill_binding_table(&vs_stage->prog_data.vs.base.base, 0);
562
563 if (next_stage)
564 brw_nir_link_shaders(compiler, &vs_stage->nir, &next_stage->nir);
565 }
566
567 static const unsigned *
568 anv_pipeline_compile_vs(const struct brw_compiler *compiler,
569 void *mem_ctx,
570 struct anv_pipeline_stage *vs_stage)
571 {
572 brw_compute_vue_map(compiler->devinfo,
573 &vs_stage->prog_data.vs.base.vue_map,
574 vs_stage->nir->info.outputs_written,
575 vs_stage->nir->info.separate_shader);
576
577 return brw_compile_vs(compiler, NULL, mem_ctx, &vs_stage->key.vs,
578 &vs_stage->prog_data.vs, vs_stage->nir, -1, NULL);
579 }
580
581 static void
582 merge_tess_info(struct shader_info *tes_info,
583 const struct shader_info *tcs_info)
584 {
585 /* The Vulkan 1.0.38 spec, section 21.1 Tessellator says:
586 *
587 * "PointMode. Controls generation of points rather than triangles
588 * or lines. This functionality defaults to disabled, and is
589 * enabled if either shader stage includes the execution mode.
590 *
591 * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw,
592 * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd,
593 * and OutputVertices, it says:
594 *
595 * "One mode must be set in at least one of the tessellation
596 * shader stages."
597 *
598 * So, the fields can be set in either the TCS or TES, but they must
599 * agree if set in both. Our backend looks at TES, so bitwise-or in
600 * the values from the TCS.
601 */
602 assert(tcs_info->tess.tcs_vertices_out == 0 ||
603 tes_info->tess.tcs_vertices_out == 0 ||
604 tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out);
605 tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out;
606
607 assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
608 tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
609 tcs_info->tess.spacing == tes_info->tess.spacing);
610 tes_info->tess.spacing |= tcs_info->tess.spacing;
611
612 assert(tcs_info->tess.primitive_mode == 0 ||
613 tes_info->tess.primitive_mode == 0 ||
614 tcs_info->tess.primitive_mode == tes_info->tess.primitive_mode);
615 tes_info->tess.primitive_mode |= tcs_info->tess.primitive_mode;
616 tes_info->tess.ccw |= tcs_info->tess.ccw;
617 tes_info->tess.point_mode |= tcs_info->tess.point_mode;
618 }
619
620 static void
621 anv_pipeline_link_tcs(const struct brw_compiler *compiler,
622 struct anv_pipeline_stage *tcs_stage,
623 struct anv_pipeline_stage *tes_stage)
624 {
625 assert(tes_stage && tes_stage->stage == MESA_SHADER_TESS_EVAL);
626
627 anv_fill_binding_table(&tcs_stage->prog_data.tcs.base.base, 0);
628
629 brw_nir_link_shaders(compiler, &tcs_stage->nir, &tes_stage->nir);
630
631 nir_lower_patch_vertices(tes_stage->nir,
632 tcs_stage->nir->info.tess.tcs_vertices_out,
633 NULL);
634
635 /* Copy TCS info into the TES info */
636 merge_tess_info(&tes_stage->nir->info, &tcs_stage->nir->info);
637
638 anv_fill_binding_table(&tcs_stage->prog_data.tcs.base.base, 0);
639 anv_fill_binding_table(&tes_stage->prog_data.tes.base.base, 0);
640
641 /* Whacking the key after cache lookup is a bit sketchy, but all of
642 * this comes from the SPIR-V, which is part of the hash used for the
643 * pipeline cache. So it should be safe.
644 */
645 tcs_stage->key.tcs.tes_primitive_mode =
646 tes_stage->nir->info.tess.primitive_mode;
647 tcs_stage->key.tcs.outputs_written =
648 tcs_stage->nir->info.outputs_written;
649 tcs_stage->key.tcs.patch_outputs_written =
650 tcs_stage->nir->info.patch_outputs_written;
651 tcs_stage->key.tcs.quads_workaround =
652 compiler->devinfo->gen < 9 &&
653 tes_stage->nir->info.tess.primitive_mode == 7 /* GL_QUADS */ &&
654 tes_stage->nir->info.tess.spacing == TESS_SPACING_EQUAL;
655
656 tes_stage->key.tes.inputs_read =
657 tcs_stage->nir->info.outputs_written;
658 tes_stage->key.tes.patch_inputs_read =
659 tcs_stage->nir->info.patch_outputs_written;
660 }
661
662 static const unsigned *
663 anv_pipeline_compile_tcs(const struct brw_compiler *compiler,
664 void *mem_ctx,
665 struct anv_pipeline_stage *tcs_stage,
666 struct anv_pipeline_stage *prev_stage)
667 {
668 return brw_compile_tcs(compiler, NULL, mem_ctx, &tcs_stage->key.tcs,
669 &tcs_stage->prog_data.tcs, tcs_stage->nir,
670 -1, NULL);
671 }
672
673 static void
674 anv_pipeline_link_tes(const struct brw_compiler *compiler,
675 struct anv_pipeline_stage *tes_stage,
676 struct anv_pipeline_stage *next_stage)
677 {
678 anv_fill_binding_table(&tes_stage->prog_data.tes.base.base, 0);
679
680 if (next_stage)
681 brw_nir_link_shaders(compiler, &tes_stage->nir, &next_stage->nir);
682 }
683
684 static const unsigned *
685 anv_pipeline_compile_tes(const struct brw_compiler *compiler,
686 void *mem_ctx,
687 struct anv_pipeline_stage *tes_stage,
688 struct anv_pipeline_stage *tcs_stage)
689 {
690 return brw_compile_tes(compiler, NULL, mem_ctx, &tes_stage->key.tes,
691 &tcs_stage->prog_data.tcs.base.vue_map,
692 &tes_stage->prog_data.tes, tes_stage->nir,
693 NULL, -1, NULL);
694 }
695
696 static void
697 anv_pipeline_link_gs(const struct brw_compiler *compiler,
698 struct anv_pipeline_stage *gs_stage,
699 struct anv_pipeline_stage *next_stage)
700 {
701 anv_fill_binding_table(&gs_stage->prog_data.gs.base.base, 0);
702
703 if (next_stage)
704 brw_nir_link_shaders(compiler, &gs_stage->nir, &next_stage->nir);
705 }
706
707 static const unsigned *
708 anv_pipeline_compile_gs(const struct brw_compiler *compiler,
709 void *mem_ctx,
710 struct anv_pipeline_stage *gs_stage,
711 struct anv_pipeline_stage *prev_stage)
712 {
713 brw_compute_vue_map(compiler->devinfo,
714 &gs_stage->prog_data.gs.base.vue_map,
715 gs_stage->nir->info.outputs_written,
716 gs_stage->nir->info.separate_shader);
717
718 return brw_compile_gs(compiler, NULL, mem_ctx, &gs_stage->key.gs,
719 &gs_stage->prog_data.gs, gs_stage->nir,
720 NULL, -1, NULL);
721 }
722
723 static void
724 anv_pipeline_link_fs(const struct brw_compiler *compiler,
725 struct anv_pipeline_stage *stage)
726 {
727 unsigned num_rts = 0;
728 const int max_rt = FRAG_RESULT_DATA7 - FRAG_RESULT_DATA0 + 1;
729 struct anv_pipeline_binding rt_bindings[max_rt];
730 nir_function_impl *impl = nir_shader_get_entrypoint(stage->nir);
731 int rt_to_bindings[max_rt];
732 memset(rt_to_bindings, -1, sizeof(rt_to_bindings));
733 bool rt_used[max_rt];
734 memset(rt_used, 0, sizeof(rt_used));
735
736 /* Flag used render targets */
737 nir_foreach_variable_safe(var, &stage->nir->outputs) {
738 if (var->data.location < FRAG_RESULT_DATA0)
739 continue;
740
741 const unsigned rt = var->data.location - FRAG_RESULT_DATA0;
742 /* Unused or out-of-bounds */
743 if (rt >= MAX_RTS || !(stage->key.wm.color_outputs_valid & (1 << rt)))
744 continue;
745
746 const unsigned array_len =
747 glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1;
748 assert(rt + array_len <= max_rt);
749
750 for (unsigned i = 0; i < array_len; i++)
751 rt_used[rt + i] = true;
752 }
753
754 /* Set new, compacted, location */
755 for (unsigned i = 0; i < max_rt; i++) {
756 if (!rt_used[i])
757 continue;
758
759 rt_to_bindings[i] = num_rts;
760 rt_bindings[rt_to_bindings[i]] = (struct anv_pipeline_binding) {
761 .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
762 .binding = 0,
763 .index = i,
764 };
765 num_rts++;
766 }
767
768 bool deleted_output = false;
769 nir_foreach_variable_safe(var, &stage->nir->outputs) {
770 if (var->data.location < FRAG_RESULT_DATA0)
771 continue;
772
773 const unsigned rt = var->data.location - FRAG_RESULT_DATA0;
774 if (rt >= MAX_RTS ||
775 !(stage->key.wm.color_outputs_valid & (1 << rt))) {
776 /* Unused or out-of-bounds, throw it away */
777 deleted_output = true;
778 var->data.mode = nir_var_local;
779 exec_node_remove(&var->node);
780 exec_list_push_tail(&impl->locals, &var->node);
781 continue;
782 }
783
784 /* Give it the new location */
785 assert(rt_to_bindings[rt] != -1);
786 var->data.location = rt_to_bindings[rt] + FRAG_RESULT_DATA0;
787 }
788
789 if (deleted_output)
790 nir_fixup_deref_modes(stage->nir);
791
792 if (num_rts == 0) {
793 /* If we have no render targets, we need a null render target */
794 rt_bindings[0] = (struct anv_pipeline_binding) {
795 .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
796 .binding = 0,
797 .index = UINT32_MAX,
798 };
799 num_rts = 1;
800 }
801
802 /* Now that we've determined the actual number of render targets, adjust
803 * the key accordingly.
804 */
805 stage->key.wm.nr_color_regions = num_rts;
806 stage->key.wm.color_outputs_valid = (1 << num_rts) - 1;
807
808 assert(num_rts <= max_rt);
809 assert(stage->bind_map.surface_count + num_rts <= 256);
810 memmove(stage->bind_map.surface_to_descriptor + num_rts,
811 stage->bind_map.surface_to_descriptor,
812 stage->bind_map.surface_count *
813 sizeof(*stage->bind_map.surface_to_descriptor));
814 typed_memcpy(stage->bind_map.surface_to_descriptor,
815 rt_bindings, num_rts);
816 stage->bind_map.surface_count += num_rts;
817
818 anv_fill_binding_table(&stage->prog_data.wm.base, num_rts);
819 }
820
821 static const unsigned *
822 anv_pipeline_compile_fs(const struct brw_compiler *compiler,
823 void *mem_ctx,
824 struct anv_pipeline_stage *fs_stage,
825 struct anv_pipeline_stage *prev_stage)
826 {
827 /* TODO: we could set this to 0 based on the information in nir_shader, but
828 * we need this before we call spirv_to_nir.
829 */
830 assert(prev_stage);
831 fs_stage->key.wm.input_slots_valid =
832 prev_stage->prog_data.vue.vue_map.slots_valid;
833
834 const unsigned *code =
835 brw_compile_fs(compiler, NULL, mem_ctx, &fs_stage->key.wm,
836 &fs_stage->prog_data.wm, fs_stage->nir,
837 NULL, -1, -1, -1, true, false, NULL, NULL);
838
839 if (fs_stage->key.wm.nr_color_regions == 0 &&
840 !fs_stage->prog_data.wm.has_side_effects &&
841 !fs_stage->prog_data.wm.uses_kill &&
842 fs_stage->prog_data.wm.computed_depth_mode == BRW_PSCDEPTH_OFF &&
843 !fs_stage->prog_data.wm.computed_stencil) {
844 /* This fragment shader has no outputs and no side effects. Go ahead
845 * and return the code pointer so we don't accidentally think the
846 * compile failed but zero out prog_data which will set program_size to
847 * zero and disable the stage.
848 */
849 memset(&fs_stage->prog_data, 0, sizeof(fs_stage->prog_data));
850 }
851
852 return code;
853 }
854
855 static VkResult
856 anv_pipeline_compile_graphics(struct anv_pipeline *pipeline,
857 struct anv_pipeline_cache *cache,
858 const VkGraphicsPipelineCreateInfo *info)
859 {
860 const struct brw_compiler *compiler =
861 pipeline->device->instance->physicalDevice.compiler;
862 struct anv_pipeline_stage stages[MESA_SHADER_STAGES] = {};
863
864 pipeline->active_stages = 0;
865
866 VkResult result;
867 for (uint32_t i = 0; i < info->stageCount; i++) {
868 const VkPipelineShaderStageCreateInfo *sinfo = &info->pStages[i];
869 gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
870
871 pipeline->active_stages |= sinfo->stage;
872
873 stages[stage].stage = stage;
874 stages[stage].module = anv_shader_module_from_handle(sinfo->module);
875 stages[stage].entrypoint = sinfo->pName;
876 stages[stage].spec_info = sinfo->pSpecializationInfo;
877
878 const struct gen_device_info *devinfo = &pipeline->device->info;
879 switch (stage) {
880 case MESA_SHADER_VERTEX:
881 populate_vs_prog_key(devinfo, &stages[stage].key.vs);
882 break;
883 case MESA_SHADER_TESS_CTRL:
884 populate_tcs_prog_key(devinfo,
885 info->pTessellationState->patchControlPoints,
886 &stages[stage].key.tcs);
887 break;
888 case MESA_SHADER_TESS_EVAL:
889 populate_tes_prog_key(devinfo, &stages[stage].key.tes);
890 break;
891 case MESA_SHADER_GEOMETRY:
892 populate_gs_prog_key(devinfo, &stages[stage].key.gs);
893 break;
894 case MESA_SHADER_FRAGMENT:
895 populate_wm_prog_key(devinfo, pipeline->subpass,
896 info->pMultisampleState,
897 &stages[stage].key.wm);
898 break;
899 default:
900 unreachable("Invalid graphics shader stage");
901 }
902 }
903
904 if (pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
905 pipeline->active_stages |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
906
907 assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT);
908
909 ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
910
911 unsigned char sha1[20];
912 anv_pipeline_hash_graphics(pipeline, layout, stages, sha1);
913
914 unsigned found = 0;
915 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
916 if (!stages[s].entrypoint)
917 continue;
918
919 stages[s].cache_key.stage = s;
920 memcpy(stages[s].cache_key.sha1, sha1, sizeof(sha1));
921
922 struct anv_shader_bin *bin =
923 anv_device_search_for_kernel(pipeline->device, cache,
924 &stages[s].cache_key,
925 sizeof(stages[s].cache_key));
926 if (bin) {
927 found++;
928 pipeline->shaders[s] = bin;
929 }
930 }
931
932 if (found == __builtin_popcount(pipeline->active_stages)) {
933 /* We found all our shaders in the cache. We're done. */
934 goto done;
935 } else if (found > 0) {
936 /* We found some but not all of our shaders. This shouldn't happen
937 * most of the time but it can if we have a partially populated
938 * pipeline cache.
939 */
940 assert(found < __builtin_popcount(pipeline->active_stages));
941
942 vk_debug_report(&pipeline->device->instance->debug_report_callbacks,
943 VK_DEBUG_REPORT_WARNING_BIT_EXT |
944 VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT,
945 VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_CACHE_EXT,
946 (uint64_t)(uintptr_t)cache,
947 0, 0, "anv",
948 "Found a partial pipeline in the cache. This is "
949 "most likely caused by an incomplete pipeline cache "
950 "import or export");
951
952 /* We're going to have to recompile anyway, so just throw away our
953 * references to the shaders in the cache. We'll get them out of the
954 * cache again as part of the compilation process.
955 */
956 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
957 if (pipeline->shaders[s]) {
958 anv_shader_bin_unref(pipeline->device, pipeline->shaders[s]);
959 pipeline->shaders[s] = NULL;
960 }
961 }
962 }
963
964 void *pipeline_ctx = ralloc_context(NULL);
965
966 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
967 if (!stages[s].entrypoint)
968 continue;
969
970 assert(stages[s].stage == s);
971 assert(pipeline->shaders[s] == NULL);
972
973 stages[s].bind_map = (struct anv_pipeline_bind_map) {
974 .surface_to_descriptor = stages[s].surface_to_descriptor,
975 .sampler_to_descriptor = stages[s].sampler_to_descriptor
976 };
977
978 stages[s].nir = anv_pipeline_compile(pipeline, pipeline_ctx, layout,
979 &stages[s],
980 &stages[s].prog_data.base,
981 &stages[s].bind_map);
982 if (stages[s].nir == NULL) {
983 result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
984 goto fail;
985 }
986 }
987
988 /* Walk backwards to link */
989 struct anv_pipeline_stage *next_stage = NULL;
990 for (int s = MESA_SHADER_STAGES - 1; s >= 0; s--) {
991 if (!stages[s].entrypoint)
992 continue;
993
994 switch (s) {
995 case MESA_SHADER_VERTEX:
996 anv_pipeline_link_vs(compiler, &stages[s], next_stage);
997 break;
998 case MESA_SHADER_TESS_CTRL:
999 anv_pipeline_link_tcs(compiler, &stages[s], next_stage);
1000 break;
1001 case MESA_SHADER_TESS_EVAL:
1002 anv_pipeline_link_tes(compiler, &stages[s], next_stage);
1003 break;
1004 case MESA_SHADER_GEOMETRY:
1005 anv_pipeline_link_gs(compiler, &stages[s], next_stage);
1006 break;
1007 case MESA_SHADER_FRAGMENT:
1008 anv_pipeline_link_fs(compiler, &stages[s]);
1009 break;
1010 default:
1011 unreachable("Invalid graphics shader stage");
1012 }
1013
1014 next_stage = &stages[s];
1015 }
1016
1017 struct anv_pipeline_stage *prev_stage = NULL;
1018 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
1019 if (!stages[s].entrypoint)
1020 continue;
1021
1022 void *stage_ctx = ralloc_context(NULL);
1023
1024 const unsigned *code;
1025 switch (s) {
1026 case MESA_SHADER_VERTEX:
1027 code = anv_pipeline_compile_vs(compiler, stage_ctx, &stages[s]);
1028 break;
1029 case MESA_SHADER_TESS_CTRL:
1030 code = anv_pipeline_compile_tcs(compiler, stage_ctx,
1031 &stages[s], prev_stage);
1032 break;
1033 case MESA_SHADER_TESS_EVAL:
1034 code = anv_pipeline_compile_tes(compiler, stage_ctx,
1035 &stages[s], prev_stage);
1036 break;
1037 case MESA_SHADER_GEOMETRY:
1038 code = anv_pipeline_compile_gs(compiler, stage_ctx,
1039 &stages[s], prev_stage);
1040 break;
1041 case MESA_SHADER_FRAGMENT:
1042 code = anv_pipeline_compile_fs(compiler, stage_ctx,
1043 &stages[s], prev_stage);
1044 break;
1045 default:
1046 unreachable("Invalid graphics shader stage");
1047 }
1048 if (code == NULL) {
1049 ralloc_free(stage_ctx);
1050 result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1051 goto fail;
1052 }
1053
1054 struct anv_shader_bin *bin =
1055 anv_device_upload_kernel(pipeline->device, cache,
1056 &stages[s].cache_key,
1057 sizeof(stages[s].cache_key),
1058 code, stages[s].prog_data.base.program_size,
1059 stages[s].nir->constant_data,
1060 stages[s].nir->constant_data_size,
1061 &stages[s].prog_data.base,
1062 brw_prog_data_size(s),
1063 &stages[s].bind_map);
1064 if (!bin) {
1065 ralloc_free(stage_ctx);
1066 result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1067 goto fail;
1068 }
1069
1070 pipeline->shaders[s] = bin;
1071 ralloc_free(stage_ctx);
1072
1073 prev_stage = &stages[s];
1074 }
1075
1076 ralloc_free(pipeline_ctx);
1077
1078 done:
1079
1080 if (pipeline->shaders[MESA_SHADER_FRAGMENT] &&
1081 pipeline->shaders[MESA_SHADER_FRAGMENT]->prog_data->program_size == 0) {
1082 /* This can happen if we decided to implicitly disable the fragment
1083 * shader. See anv_pipeline_compile_fs().
1084 */
1085 anv_shader_bin_unref(pipeline->device,
1086 pipeline->shaders[MESA_SHADER_FRAGMENT]);
1087 pipeline->shaders[MESA_SHADER_FRAGMENT] = NULL;
1088 pipeline->active_stages &= ~VK_SHADER_STAGE_FRAGMENT_BIT;
1089 }
1090
1091 return VK_SUCCESS;
1092
1093 fail:
1094 ralloc_free(pipeline_ctx);
1095
1096 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
1097 if (pipeline->shaders[s])
1098 anv_shader_bin_unref(pipeline->device, pipeline->shaders[s]);
1099 }
1100
1101 return result;
1102 }
1103
1104 VkResult
1105 anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
1106 struct anv_pipeline_cache *cache,
1107 const VkComputePipelineCreateInfo *info,
1108 const struct anv_shader_module *module,
1109 const char *entrypoint,
1110 const VkSpecializationInfo *spec_info)
1111 {
1112 const struct brw_compiler *compiler =
1113 pipeline->device->instance->physicalDevice.compiler;
1114
1115 struct anv_pipeline_stage stage = {
1116 .stage = MESA_SHADER_COMPUTE,
1117 .module = module,
1118 .entrypoint = entrypoint,
1119 .spec_info = spec_info,
1120 };
1121
1122 struct anv_shader_bin *bin = NULL;
1123
1124 populate_cs_prog_key(&pipeline->device->info, &stage.key.cs);
1125
1126 ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
1127
1128 unsigned char sha1[20];
1129 anv_pipeline_hash_compute(pipeline, layout, &stage, sha1);
1130 bin = anv_device_search_for_kernel(pipeline->device, cache, sha1, 20);
1131
1132 if (bin == NULL) {
1133 struct brw_cs_prog_data prog_data = {};
1134
1135 stage.bind_map = (struct anv_pipeline_bind_map) {
1136 .surface_to_descriptor = stage.surface_to_descriptor,
1137 .sampler_to_descriptor = stage.sampler_to_descriptor
1138 };
1139
1140 void *mem_ctx = ralloc_context(NULL);
1141
1142 nir_shader *nir = anv_pipeline_compile(pipeline, mem_ctx, layout, &stage,
1143 &prog_data.base, &stage.bind_map);
1144 if (nir == NULL) {
1145 ralloc_free(mem_ctx);
1146 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1147 }
1148
1149 NIR_PASS_V(nir, anv_nir_add_base_work_group_id, &prog_data);
1150
1151 anv_fill_binding_table(&prog_data.base, 1);
1152
1153 const unsigned *shader_code =
1154 brw_compile_cs(compiler, NULL, mem_ctx, &stage.key.cs,
1155 &prog_data, nir, -1, NULL);
1156 if (shader_code == NULL) {
1157 ralloc_free(mem_ctx);
1158 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1159 }
1160
1161 const unsigned code_size = prog_data.base.program_size;
1162 bin = anv_device_upload_kernel(pipeline->device, cache, sha1, 20,
1163 shader_code, code_size,
1164 nir->constant_data,
1165 nir->constant_data_size,
1166 &prog_data.base, sizeof(prog_data),
1167 &stage.bind_map);
1168 if (!bin) {
1169 ralloc_free(mem_ctx);
1170 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1171 }
1172
1173 ralloc_free(mem_ctx);
1174 }
1175
1176 pipeline->active_stages = VK_SHADER_STAGE_COMPUTE_BIT;
1177 pipeline->shaders[MESA_SHADER_COMPUTE] = bin;
1178
1179 return VK_SUCCESS;
1180 }
1181
1182 /**
1183 * Copy pipeline state not marked as dynamic.
1184 * Dynamic state is pipeline state which hasn't been provided at pipeline
1185 * creation time, but is dynamically provided afterwards using various
1186 * vkCmdSet* functions.
1187 *
1188 * The set of state considered "non_dynamic" is determined by the pieces of
1189 * state that have their corresponding VkDynamicState enums omitted from
1190 * VkPipelineDynamicStateCreateInfo::pDynamicStates.
1191 *
1192 * @param[out] pipeline Destination non_dynamic state.
1193 * @param[in] pCreateInfo Source of non_dynamic state to be copied.
1194 */
1195 static void
1196 copy_non_dynamic_state(struct anv_pipeline *pipeline,
1197 const VkGraphicsPipelineCreateInfo *pCreateInfo)
1198 {
1199 anv_cmd_dirty_mask_t states = ANV_CMD_DIRTY_DYNAMIC_ALL;
1200 struct anv_subpass *subpass = pipeline->subpass;
1201
1202 pipeline->dynamic_state = default_dynamic_state;
1203
1204 if (pCreateInfo->pDynamicState) {
1205 /* Remove all of the states that are marked as dynamic */
1206 uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount;
1207 for (uint32_t s = 0; s < count; s++)
1208 states &= ~(1 << pCreateInfo->pDynamicState->pDynamicStates[s]);
1209 }
1210
1211 struct anv_dynamic_state *dynamic = &pipeline->dynamic_state;
1212
1213 /* Section 9.2 of the Vulkan 1.0.15 spec says:
1214 *
1215 * pViewportState is [...] NULL if the pipeline
1216 * has rasterization disabled.
1217 */
1218 if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable) {
1219 assert(pCreateInfo->pViewportState);
1220
1221 dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount;
1222 if (states & (1 << VK_DYNAMIC_STATE_VIEWPORT)) {
1223 typed_memcpy(dynamic->viewport.viewports,
1224 pCreateInfo->pViewportState->pViewports,
1225 pCreateInfo->pViewportState->viewportCount);
1226 }
1227
1228 dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount;
1229 if (states & (1 << VK_DYNAMIC_STATE_SCISSOR)) {
1230 typed_memcpy(dynamic->scissor.scissors,
1231 pCreateInfo->pViewportState->pScissors,
1232 pCreateInfo->pViewportState->scissorCount);
1233 }
1234 }
1235
1236 if (states & (1 << VK_DYNAMIC_STATE_LINE_WIDTH)) {
1237 assert(pCreateInfo->pRasterizationState);
1238 dynamic->line_width = pCreateInfo->pRasterizationState->lineWidth;
1239 }
1240
1241 if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS)) {
1242 assert(pCreateInfo->pRasterizationState);
1243 dynamic->depth_bias.bias =
1244 pCreateInfo->pRasterizationState->depthBiasConstantFactor;
1245 dynamic->depth_bias.clamp =
1246 pCreateInfo->pRasterizationState->depthBiasClamp;
1247 dynamic->depth_bias.slope =
1248 pCreateInfo->pRasterizationState->depthBiasSlopeFactor;
1249 }
1250
1251 /* Section 9.2 of the Vulkan 1.0.15 spec says:
1252 *
1253 * pColorBlendState is [...] NULL if the pipeline has rasterization
1254 * disabled or if the subpass of the render pass the pipeline is
1255 * created against does not use any color attachments.
1256 */
1257 bool uses_color_att = false;
1258 for (unsigned i = 0; i < subpass->color_count; ++i) {
1259 if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED) {
1260 uses_color_att = true;
1261 break;
1262 }
1263 }
1264
1265 if (uses_color_att &&
1266 !pCreateInfo->pRasterizationState->rasterizerDiscardEnable) {
1267 assert(pCreateInfo->pColorBlendState);
1268
1269 if (states & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS))
1270 typed_memcpy(dynamic->blend_constants,
1271 pCreateInfo->pColorBlendState->blendConstants, 4);
1272 }
1273
1274 /* If there is no depthstencil attachment, then don't read
1275 * pDepthStencilState. The Vulkan spec states that pDepthStencilState may
1276 * be NULL in this case. Even if pDepthStencilState is non-NULL, there is
1277 * no need to override the depthstencil defaults in
1278 * anv_pipeline::dynamic_state when there is no depthstencil attachment.
1279 *
1280 * Section 9.2 of the Vulkan 1.0.15 spec says:
1281 *
1282 * pDepthStencilState is [...] NULL if the pipeline has rasterization
1283 * disabled or if the subpass of the render pass the pipeline is created
1284 * against does not use a depth/stencil attachment.
1285 */
1286 if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&
1287 subpass->depth_stencil_attachment) {
1288 assert(pCreateInfo->pDepthStencilState);
1289
1290 if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BOUNDS)) {
1291 dynamic->depth_bounds.min =
1292 pCreateInfo->pDepthStencilState->minDepthBounds;
1293 dynamic->depth_bounds.max =
1294 pCreateInfo->pDepthStencilState->maxDepthBounds;
1295 }
1296
1297 if (states & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) {
1298 dynamic->stencil_compare_mask.front =
1299 pCreateInfo->pDepthStencilState->front.compareMask;
1300 dynamic->stencil_compare_mask.back =
1301 pCreateInfo->pDepthStencilState->back.compareMask;
1302 }
1303
1304 if (states & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) {
1305 dynamic->stencil_write_mask.front =
1306 pCreateInfo->pDepthStencilState->front.writeMask;
1307 dynamic->stencil_write_mask.back =
1308 pCreateInfo->pDepthStencilState->back.writeMask;
1309 }
1310
1311 if (states & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) {
1312 dynamic->stencil_reference.front =
1313 pCreateInfo->pDepthStencilState->front.reference;
1314 dynamic->stencil_reference.back =
1315 pCreateInfo->pDepthStencilState->back.reference;
1316 }
1317 }
1318
1319 pipeline->dynamic_state_mask = states;
1320 }
1321
1322 static void
1323 anv_pipeline_validate_create_info(const VkGraphicsPipelineCreateInfo *info)
1324 {
1325 #ifdef DEBUG
1326 struct anv_render_pass *renderpass = NULL;
1327 struct anv_subpass *subpass = NULL;
1328
1329 /* Assert that all required members of VkGraphicsPipelineCreateInfo are
1330 * present. See the Vulkan 1.0.28 spec, Section 9.2 Graphics Pipelines.
1331 */
1332 assert(info->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
1333
1334 renderpass = anv_render_pass_from_handle(info->renderPass);
1335 assert(renderpass);
1336
1337 assert(info->subpass < renderpass->subpass_count);
1338 subpass = &renderpass->subpasses[info->subpass];
1339
1340 assert(info->stageCount >= 1);
1341 assert(info->pVertexInputState);
1342 assert(info->pInputAssemblyState);
1343 assert(info->pRasterizationState);
1344 if (!info->pRasterizationState->rasterizerDiscardEnable) {
1345 assert(info->pViewportState);
1346 assert(info->pMultisampleState);
1347
1348 if (subpass && subpass->depth_stencil_attachment)
1349 assert(info->pDepthStencilState);
1350
1351 if (subpass && subpass->color_count > 0) {
1352 bool all_color_unused = true;
1353 for (int i = 0; i < subpass->color_count; i++) {
1354 if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED)
1355 all_color_unused = false;
1356 }
1357 /* pColorBlendState is ignored if the pipeline has rasterization
1358 * disabled or if the subpass of the render pass the pipeline is
1359 * created against does not use any color attachments.
1360 */
1361 assert(info->pColorBlendState || all_color_unused);
1362 }
1363 }
1364
1365 for (uint32_t i = 0; i < info->stageCount; ++i) {
1366 switch (info->pStages[i].stage) {
1367 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
1368 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
1369 assert(info->pTessellationState);
1370 break;
1371 default:
1372 break;
1373 }
1374 }
1375 #endif
1376 }
1377
1378 /**
1379 * Calculate the desired L3 partitioning based on the current state of the
1380 * pipeline. For now this simply returns the conservative defaults calculated
1381 * by get_default_l3_weights(), but we could probably do better by gathering
1382 * more statistics from the pipeline state (e.g. guess of expected URB usage
1383 * and bound surfaces), or by using feed-back from performance counters.
1384 */
1385 void
1386 anv_pipeline_setup_l3_config(struct anv_pipeline *pipeline, bool needs_slm)
1387 {
1388 const struct gen_device_info *devinfo = &pipeline->device->info;
1389
1390 const struct gen_l3_weights w =
1391 gen_get_default_l3_weights(devinfo, pipeline->needs_data_cache, needs_slm);
1392
1393 pipeline->urb.l3_config = gen_get_l3_config(devinfo, w);
1394 pipeline->urb.total_size =
1395 gen_get_l3_config_urb_size(devinfo, pipeline->urb.l3_config);
1396 }
1397
1398 VkResult
1399 anv_pipeline_init(struct anv_pipeline *pipeline,
1400 struct anv_device *device,
1401 struct anv_pipeline_cache *cache,
1402 const VkGraphicsPipelineCreateInfo *pCreateInfo,
1403 const VkAllocationCallbacks *alloc)
1404 {
1405 VkResult result;
1406
1407 anv_pipeline_validate_create_info(pCreateInfo);
1408
1409 if (alloc == NULL)
1410 alloc = &device->alloc;
1411
1412 pipeline->device = device;
1413
1414 ANV_FROM_HANDLE(anv_render_pass, render_pass, pCreateInfo->renderPass);
1415 assert(pCreateInfo->subpass < render_pass->subpass_count);
1416 pipeline->subpass = &render_pass->subpasses[pCreateInfo->subpass];
1417
1418 result = anv_reloc_list_init(&pipeline->batch_relocs, alloc);
1419 if (result != VK_SUCCESS)
1420 return result;
1421
1422 pipeline->batch.alloc = alloc;
1423 pipeline->batch.next = pipeline->batch.start = pipeline->batch_data;
1424 pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data);
1425 pipeline->batch.relocs = &pipeline->batch_relocs;
1426 pipeline->batch.status = VK_SUCCESS;
1427
1428 copy_non_dynamic_state(pipeline, pCreateInfo);
1429 pipeline->depth_clamp_enable = pCreateInfo->pRasterizationState &&
1430 pCreateInfo->pRasterizationState->depthClampEnable;
1431
1432 pipeline->sample_shading_enable = pCreateInfo->pMultisampleState &&
1433 pCreateInfo->pMultisampleState->sampleShadingEnable;
1434
1435 pipeline->needs_data_cache = false;
1436
1437 /* When we free the pipeline, we detect stages based on the NULL status
1438 * of various prog_data pointers. Make them NULL by default.
1439 */
1440 memset(pipeline->shaders, 0, sizeof(pipeline->shaders));
1441
1442 result = anv_pipeline_compile_graphics(pipeline, cache, pCreateInfo);
1443 if (result != VK_SUCCESS) {
1444 anv_reloc_list_finish(&pipeline->batch_relocs, alloc);
1445 return result;
1446 }
1447
1448 assert(pipeline->shaders[MESA_SHADER_VERTEX]);
1449
1450 anv_pipeline_setup_l3_config(pipeline, false);
1451
1452 const VkPipelineVertexInputStateCreateInfo *vi_info =
1453 pCreateInfo->pVertexInputState;
1454
1455 const uint64_t inputs_read = get_vs_prog_data(pipeline)->inputs_read;
1456
1457 pipeline->vb_used = 0;
1458 for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
1459 const VkVertexInputAttributeDescription *desc =
1460 &vi_info->pVertexAttributeDescriptions[i];
1461
1462 if (inputs_read & (1ull << (VERT_ATTRIB_GENERIC0 + desc->location)))
1463 pipeline->vb_used |= 1 << desc->binding;
1464 }
1465
1466 for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
1467 const VkVertexInputBindingDescription *desc =
1468 &vi_info->pVertexBindingDescriptions[i];
1469
1470 pipeline->vb[desc->binding].stride = desc->stride;
1471
1472 /* Step rate is programmed per vertex element (attribute), not
1473 * binding. Set up a map of which bindings step per instance, for
1474 * reference by vertex element setup. */
1475 switch (desc->inputRate) {
1476 default:
1477 case VK_VERTEX_INPUT_RATE_VERTEX:
1478 pipeline->vb[desc->binding].instanced = false;
1479 break;
1480 case VK_VERTEX_INPUT_RATE_INSTANCE:
1481 pipeline->vb[desc->binding].instanced = true;
1482 break;
1483 }
1484
1485 pipeline->vb[desc->binding].instance_divisor = 1;
1486 }
1487
1488 const VkPipelineVertexInputDivisorStateCreateInfoEXT *vi_div_state =
1489 vk_find_struct_const(vi_info->pNext,
1490 PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT);
1491 if (vi_div_state) {
1492 for (uint32_t i = 0; i < vi_div_state->vertexBindingDivisorCount; i++) {
1493 const VkVertexInputBindingDivisorDescriptionEXT *desc =
1494 &vi_div_state->pVertexBindingDivisors[i];
1495
1496 pipeline->vb[desc->binding].instance_divisor = desc->divisor;
1497 }
1498 }
1499
1500 /* Our implementation of VK_KHR_multiview uses instancing to draw the
1501 * different views. If the client asks for instancing, we need to multiply
1502 * the instance divisor by the number of views ensure that we repeat the
1503 * client's per-instance data once for each view.
1504 */
1505 if (pipeline->subpass->view_mask) {
1506 const uint32_t view_count = anv_subpass_view_count(pipeline->subpass);
1507 for (uint32_t vb = 0; vb < MAX_VBS; vb++) {
1508 if (pipeline->vb[vb].instanced)
1509 pipeline->vb[vb].instance_divisor *= view_count;
1510 }
1511 }
1512
1513 const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1514 pCreateInfo->pInputAssemblyState;
1515 const VkPipelineTessellationStateCreateInfo *tess_info =
1516 pCreateInfo->pTessellationState;
1517 pipeline->primitive_restart = ia_info->primitiveRestartEnable;
1518
1519 if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
1520 pipeline->topology = _3DPRIM_PATCHLIST(tess_info->patchControlPoints);
1521 else
1522 pipeline->topology = vk_to_gen_primitive_type[ia_info->topology];
1523
1524 return VK_SUCCESS;
1525 }