anv: Implement the basic form of VK_EXT_transform_feedback
[mesa.git] / src / intel / vulkan / anv_pipeline.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29
30 #include "util/mesa-sha1.h"
31 #include "common/gen_l3_config.h"
32 #include "anv_private.h"
33 #include "compiler/brw_nir.h"
34 #include "anv_nir.h"
35 #include "nir/nir_xfb_info.h"
36 #include "spirv/nir_spirv.h"
37 #include "vk_util.h"
38
39 /* Needed for SWIZZLE macros */
40 #include "program/prog_instruction.h"
41
42 // Shader functions
43
44 VkResult anv_CreateShaderModule(
45 VkDevice _device,
46 const VkShaderModuleCreateInfo* pCreateInfo,
47 const VkAllocationCallbacks* pAllocator,
48 VkShaderModule* pShaderModule)
49 {
50 ANV_FROM_HANDLE(anv_device, device, _device);
51 struct anv_shader_module *module;
52
53 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO);
54 assert(pCreateInfo->flags == 0);
55
56 module = vk_alloc2(&device->alloc, pAllocator,
57 sizeof(*module) + pCreateInfo->codeSize, 8,
58 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
59 if (module == NULL)
60 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
61
62 module->size = pCreateInfo->codeSize;
63 memcpy(module->data, pCreateInfo->pCode, module->size);
64
65 _mesa_sha1_compute(module->data, module->size, module->sha1);
66
67 *pShaderModule = anv_shader_module_to_handle(module);
68
69 return VK_SUCCESS;
70 }
71
72 void anv_DestroyShaderModule(
73 VkDevice _device,
74 VkShaderModule _module,
75 const VkAllocationCallbacks* pAllocator)
76 {
77 ANV_FROM_HANDLE(anv_device, device, _device);
78 ANV_FROM_HANDLE(anv_shader_module, module, _module);
79
80 if (!module)
81 return;
82
83 vk_free2(&device->alloc, pAllocator, module);
84 }
85
86 #define SPIR_V_MAGIC_NUMBER 0x07230203
87
88 static const uint64_t stage_to_debug[] = {
89 [MESA_SHADER_VERTEX] = DEBUG_VS,
90 [MESA_SHADER_TESS_CTRL] = DEBUG_TCS,
91 [MESA_SHADER_TESS_EVAL] = DEBUG_TES,
92 [MESA_SHADER_GEOMETRY] = DEBUG_GS,
93 [MESA_SHADER_FRAGMENT] = DEBUG_WM,
94 [MESA_SHADER_COMPUTE] = DEBUG_CS,
95 };
96
97 /* Eventually, this will become part of anv_CreateShader. Unfortunately,
98 * we can't do that yet because we don't have the ability to copy nir.
99 */
100 static nir_shader *
101 anv_shader_compile_to_nir(struct anv_device *device,
102 void *mem_ctx,
103 const struct anv_shader_module *module,
104 const char *entrypoint_name,
105 gl_shader_stage stage,
106 const VkSpecializationInfo *spec_info)
107 {
108 const struct anv_physical_device *pdevice =
109 &device->instance->physicalDevice;
110 const struct brw_compiler *compiler = pdevice->compiler;
111 const nir_shader_compiler_options *nir_options =
112 compiler->glsl_compiler_options[stage].NirOptions;
113
114 uint32_t *spirv = (uint32_t *) module->data;
115 assert(spirv[0] == SPIR_V_MAGIC_NUMBER);
116 assert(module->size % 4 == 0);
117
118 uint32_t num_spec_entries = 0;
119 struct nir_spirv_specialization *spec_entries = NULL;
120 if (spec_info && spec_info->mapEntryCount > 0) {
121 num_spec_entries = spec_info->mapEntryCount;
122 spec_entries = malloc(num_spec_entries * sizeof(*spec_entries));
123 for (uint32_t i = 0; i < num_spec_entries; i++) {
124 VkSpecializationMapEntry entry = spec_info->pMapEntries[i];
125 const void *data = spec_info->pData + entry.offset;
126 assert(data + entry.size <= spec_info->pData + spec_info->dataSize);
127
128 spec_entries[i].id = spec_info->pMapEntries[i].constantID;
129 if (spec_info->dataSize == 8)
130 spec_entries[i].data64 = *(const uint64_t *)data;
131 else
132 spec_entries[i].data32 = *(const uint32_t *)data;
133 }
134 }
135
136 struct spirv_to_nir_options spirv_options = {
137 .lower_workgroup_access_to_offsets = true,
138 .caps = {
139 .device_group = true,
140 .draw_parameters = true,
141 .float64 = pdevice->info.gen >= 8,
142 .geometry_streams = true,
143 .image_write_without_format = true,
144 .int16 = pdevice->info.gen >= 8,
145 .int64 = pdevice->info.gen >= 8,
146 .min_lod = true,
147 .multiview = true,
148 .post_depth_coverage = pdevice->info.gen >= 9,
149 .shader_viewport_index_layer = true,
150 .stencil_export = pdevice->info.gen >= 9,
151 .storage_8bit = pdevice->info.gen >= 8,
152 .storage_16bit = pdevice->info.gen >= 8,
153 .subgroup_arithmetic = true,
154 .subgroup_basic = true,
155 .subgroup_ballot = true,
156 .subgroup_quad = true,
157 .subgroup_shuffle = true,
158 .subgroup_vote = true,
159 .tessellation = true,
160 .transform_feedback = pdevice->info.gen >= 8,
161 .variable_pointers = true,
162 },
163 .ubo_ptr_type = glsl_vector_type(GLSL_TYPE_UINT, 2),
164 .ssbo_ptr_type = glsl_vector_type(GLSL_TYPE_UINT, 2),
165 .push_const_ptr_type = glsl_uint_type(),
166 .shared_ptr_type = glsl_uint_type(),
167 };
168
169 nir_function *entry_point =
170 spirv_to_nir(spirv, module->size / 4,
171 spec_entries, num_spec_entries,
172 stage, entrypoint_name, &spirv_options, nir_options);
173 nir_shader *nir = entry_point->shader;
174 assert(nir->info.stage == stage);
175 nir_validate_shader(nir, "after spirv_to_nir");
176 ralloc_steal(mem_ctx, nir);
177
178 free(spec_entries);
179
180 if (unlikely(INTEL_DEBUG & stage_to_debug[stage])) {
181 fprintf(stderr, "NIR (from SPIR-V) for %s shader:\n",
182 gl_shader_stage_name(stage));
183 nir_print_shader(nir, stderr);
184 }
185
186 /* We have to lower away local constant initializers right before we
187 * inline functions. That way they get properly initialized at the top
188 * of the function and not at the top of its caller.
189 */
190 NIR_PASS_V(nir, nir_lower_constant_initializers, nir_var_function_temp);
191 NIR_PASS_V(nir, nir_lower_returns);
192 NIR_PASS_V(nir, nir_inline_functions);
193 NIR_PASS_V(nir, nir_opt_deref);
194
195 /* Pick off the single entrypoint that we want */
196 foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
197 if (func != entry_point)
198 exec_node_remove(&func->node);
199 }
200 assert(exec_list_length(&nir->functions) == 1);
201
202 /* Now that we've deleted all but the main function, we can go ahead and
203 * lower the rest of the constant initializers. We do this here so that
204 * nir_remove_dead_variables and split_per_member_structs below see the
205 * corresponding stores.
206 */
207 NIR_PASS_V(nir, nir_lower_constant_initializers, ~0);
208
209 /* Split member structs. We do this before lower_io_to_temporaries so that
210 * it doesn't lower system values to temporaries by accident.
211 */
212 NIR_PASS_V(nir, nir_split_var_copies);
213 NIR_PASS_V(nir, nir_split_per_member_structs);
214
215 NIR_PASS_V(nir, nir_remove_dead_variables,
216 nir_var_shader_in | nir_var_shader_out | nir_var_system_value);
217
218 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo | nir_var_mem_ssbo,
219 nir_address_format_vk_index_offset);
220
221 NIR_PASS_V(nir, nir_propagate_invariant);
222 NIR_PASS_V(nir, nir_lower_io_to_temporaries,
223 entry_point->impl, true, false);
224
225 /* Vulkan uses the separate-shader linking model */
226 nir->info.separate_shader = true;
227
228 nir = brw_preprocess_nir(compiler, nir);
229
230 return nir;
231 }
232
233 void anv_DestroyPipeline(
234 VkDevice _device,
235 VkPipeline _pipeline,
236 const VkAllocationCallbacks* pAllocator)
237 {
238 ANV_FROM_HANDLE(anv_device, device, _device);
239 ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
240
241 if (!pipeline)
242 return;
243
244 anv_reloc_list_finish(&pipeline->batch_relocs,
245 pAllocator ? pAllocator : &device->alloc);
246 if (pipeline->blend_state.map)
247 anv_state_pool_free(&device->dynamic_state_pool, pipeline->blend_state);
248
249 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
250 if (pipeline->shaders[s])
251 anv_shader_bin_unref(device, pipeline->shaders[s]);
252 }
253
254 vk_free2(&device->alloc, pAllocator, pipeline);
255 }
256
257 static const uint32_t vk_to_gen_primitive_type[] = {
258 [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST,
259 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST,
260 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP,
261 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST,
262 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
263 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
264 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
265 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
266 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
267 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
268 };
269
270 static void
271 populate_sampler_prog_key(const struct gen_device_info *devinfo,
272 struct brw_sampler_prog_key_data *key)
273 {
274 /* Almost all multisampled textures are compressed. The only time when we
275 * don't compress a multisampled texture is for 16x MSAA with a surface
276 * width greater than 8k which is a bit of an edge case. Since the sampler
277 * just ignores the MCS parameter to ld2ms when MCS is disabled, it's safe
278 * to tell the compiler to always assume compression.
279 */
280 key->compressed_multisample_layout_mask = ~0;
281
282 /* SkyLake added support for 16x MSAA. With this came a new message for
283 * reading from a 16x MSAA surface with compression. The new message was
284 * needed because now the MCS data is 64 bits instead of 32 or lower as is
285 * the case for 8x, 4x, and 2x. The key->msaa_16 bit-field controls which
286 * message we use. Fortunately, the 16x message works for 8x, 4x, and 2x
287 * so we can just use it unconditionally. This may not be quite as
288 * efficient but it saves us from recompiling.
289 */
290 if (devinfo->gen >= 9)
291 key->msaa_16 = ~0;
292
293 /* XXX: Handle texture swizzle on HSW- */
294 for (int i = 0; i < MAX_SAMPLERS; i++) {
295 /* Assume color sampler, no swizzling. (Works for BDW+) */
296 key->swizzles[i] = SWIZZLE_XYZW;
297 }
298 }
299
300 static void
301 populate_vs_prog_key(const struct gen_device_info *devinfo,
302 struct brw_vs_prog_key *key)
303 {
304 memset(key, 0, sizeof(*key));
305
306 populate_sampler_prog_key(devinfo, &key->tex);
307
308 /* XXX: Handle vertex input work-arounds */
309
310 /* XXX: Handle sampler_prog_key */
311 }
312
313 static void
314 populate_tcs_prog_key(const struct gen_device_info *devinfo,
315 unsigned input_vertices,
316 struct brw_tcs_prog_key *key)
317 {
318 memset(key, 0, sizeof(*key));
319
320 populate_sampler_prog_key(devinfo, &key->tex);
321
322 key->input_vertices = input_vertices;
323 }
324
325 static void
326 populate_tes_prog_key(const struct gen_device_info *devinfo,
327 struct brw_tes_prog_key *key)
328 {
329 memset(key, 0, sizeof(*key));
330
331 populate_sampler_prog_key(devinfo, &key->tex);
332 }
333
334 static void
335 populate_gs_prog_key(const struct gen_device_info *devinfo,
336 struct brw_gs_prog_key *key)
337 {
338 memset(key, 0, sizeof(*key));
339
340 populate_sampler_prog_key(devinfo, &key->tex);
341 }
342
343 static void
344 populate_wm_prog_key(const struct gen_device_info *devinfo,
345 const struct anv_subpass *subpass,
346 const VkPipelineMultisampleStateCreateInfo *ms_info,
347 struct brw_wm_prog_key *key)
348 {
349 memset(key, 0, sizeof(*key));
350
351 populate_sampler_prog_key(devinfo, &key->tex);
352
353 /* We set this to 0 here and set to the actual value before we call
354 * brw_compile_fs.
355 */
356 key->input_slots_valid = 0;
357
358 /* Vulkan doesn't specify a default */
359 key->high_quality_derivatives = false;
360
361 /* XXX Vulkan doesn't appear to specify */
362 key->clamp_fragment_color = false;
363
364 assert(subpass->color_count <= MAX_RTS);
365 for (uint32_t i = 0; i < subpass->color_count; i++) {
366 if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED)
367 key->color_outputs_valid |= (1 << i);
368 }
369
370 key->nr_color_regions = util_bitcount(key->color_outputs_valid);
371
372 key->replicate_alpha = key->nr_color_regions > 1 &&
373 ms_info && ms_info->alphaToCoverageEnable;
374
375 if (ms_info) {
376 /* We should probably pull this out of the shader, but it's fairly
377 * harmless to compute it and then let dead-code take care of it.
378 */
379 if (ms_info->rasterizationSamples > 1) {
380 key->persample_interp =
381 (ms_info->minSampleShading * ms_info->rasterizationSamples) > 1;
382 key->multisample_fbo = true;
383 }
384
385 key->frag_coord_adds_sample_pos = ms_info->sampleShadingEnable;
386 }
387 }
388
389 static void
390 populate_cs_prog_key(const struct gen_device_info *devinfo,
391 struct brw_cs_prog_key *key)
392 {
393 memset(key, 0, sizeof(*key));
394
395 populate_sampler_prog_key(devinfo, &key->tex);
396 }
397
398 struct anv_pipeline_stage {
399 gl_shader_stage stage;
400
401 const struct anv_shader_module *module;
402 const char *entrypoint;
403 const VkSpecializationInfo *spec_info;
404
405 unsigned char shader_sha1[20];
406
407 union brw_any_prog_key key;
408
409 struct {
410 gl_shader_stage stage;
411 unsigned char sha1[20];
412 } cache_key;
413
414 nir_shader *nir;
415
416 struct anv_pipeline_binding surface_to_descriptor[256];
417 struct anv_pipeline_binding sampler_to_descriptor[256];
418 struct anv_pipeline_bind_map bind_map;
419
420 union brw_any_prog_data prog_data;
421 };
422
423 static void
424 anv_pipeline_hash_shader(const struct anv_shader_module *module,
425 const char *entrypoint,
426 gl_shader_stage stage,
427 const VkSpecializationInfo *spec_info,
428 unsigned char *sha1_out)
429 {
430 struct mesa_sha1 ctx;
431 _mesa_sha1_init(&ctx);
432
433 _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
434 _mesa_sha1_update(&ctx, entrypoint, strlen(entrypoint));
435 _mesa_sha1_update(&ctx, &stage, sizeof(stage));
436 if (spec_info) {
437 _mesa_sha1_update(&ctx, spec_info->pMapEntries,
438 spec_info->mapEntryCount *
439 sizeof(*spec_info->pMapEntries));
440 _mesa_sha1_update(&ctx, spec_info->pData,
441 spec_info->dataSize);
442 }
443
444 _mesa_sha1_final(&ctx, sha1_out);
445 }
446
447 static void
448 anv_pipeline_hash_graphics(struct anv_pipeline *pipeline,
449 struct anv_pipeline_layout *layout,
450 struct anv_pipeline_stage *stages,
451 unsigned char *sha1_out)
452 {
453 struct mesa_sha1 ctx;
454 _mesa_sha1_init(&ctx);
455
456 _mesa_sha1_update(&ctx, &pipeline->subpass->view_mask,
457 sizeof(pipeline->subpass->view_mask));
458
459 if (layout)
460 _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
461
462 const bool rba = pipeline->device->robust_buffer_access;
463 _mesa_sha1_update(&ctx, &rba, sizeof(rba));
464
465 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
466 if (stages[s].entrypoint) {
467 _mesa_sha1_update(&ctx, stages[s].shader_sha1,
468 sizeof(stages[s].shader_sha1));
469 _mesa_sha1_update(&ctx, &stages[s].key, brw_prog_key_size(s));
470 }
471 }
472
473 _mesa_sha1_final(&ctx, sha1_out);
474 }
475
476 static void
477 anv_pipeline_hash_compute(struct anv_pipeline *pipeline,
478 struct anv_pipeline_layout *layout,
479 struct anv_pipeline_stage *stage,
480 unsigned char *sha1_out)
481 {
482 struct mesa_sha1 ctx;
483 _mesa_sha1_init(&ctx);
484
485 if (layout)
486 _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
487
488 const bool rba = pipeline->device->robust_buffer_access;
489 _mesa_sha1_update(&ctx, &rba, sizeof(rba));
490
491 _mesa_sha1_update(&ctx, stage->shader_sha1,
492 sizeof(stage->shader_sha1));
493 _mesa_sha1_update(&ctx, &stage->key.cs, sizeof(stage->key.cs));
494
495 _mesa_sha1_final(&ctx, sha1_out);
496 }
497
498 static nir_shader *
499 anv_pipeline_stage_get_nir(struct anv_pipeline *pipeline,
500 struct anv_pipeline_cache *cache,
501 void *mem_ctx,
502 struct anv_pipeline_stage *stage)
503 {
504 const struct brw_compiler *compiler =
505 pipeline->device->instance->physicalDevice.compiler;
506 const nir_shader_compiler_options *nir_options =
507 compiler->glsl_compiler_options[stage->stage].NirOptions;
508 nir_shader *nir;
509
510 nir = anv_device_search_for_nir(pipeline->device, cache,
511 nir_options,
512 stage->shader_sha1,
513 mem_ctx);
514 if (nir) {
515 assert(nir->info.stage == stage->stage);
516 return nir;
517 }
518
519 nir = anv_shader_compile_to_nir(pipeline->device,
520 mem_ctx,
521 stage->module,
522 stage->entrypoint,
523 stage->stage,
524 stage->spec_info);
525 if (nir) {
526 anv_device_upload_nir(pipeline->device, cache, nir, stage->shader_sha1);
527 return nir;
528 }
529
530 return NULL;
531 }
532
533 static void
534 anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
535 void *mem_ctx,
536 struct anv_pipeline_stage *stage,
537 struct anv_pipeline_layout *layout)
538 {
539 const struct brw_compiler *compiler =
540 pipeline->device->instance->physicalDevice.compiler;
541
542 struct brw_stage_prog_data *prog_data = &stage->prog_data.base;
543 nir_shader *nir = stage->nir;
544
545 if (nir->info.stage == MESA_SHADER_FRAGMENT) {
546 NIR_PASS_V(nir, nir_lower_wpos_center, pipeline->sample_shading_enable);
547 NIR_PASS_V(nir, anv_nir_lower_input_attachments);
548 }
549
550 NIR_PASS_V(nir, anv_nir_lower_ycbcr_textures, layout);
551
552 NIR_PASS_V(nir, anv_nir_lower_push_constants);
553
554 if (nir->info.stage != MESA_SHADER_COMPUTE)
555 NIR_PASS_V(nir, anv_nir_lower_multiview, pipeline->subpass->view_mask);
556
557 if (nir->info.stage == MESA_SHADER_COMPUTE)
558 prog_data->total_shared = nir->num_shared;
559
560 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
561
562 if (nir->num_uniforms > 0) {
563 assert(prog_data->nr_params == 0);
564
565 /* If the shader uses any push constants at all, we'll just give
566 * them the maximum possible number
567 */
568 assert(nir->num_uniforms <= MAX_PUSH_CONSTANTS_SIZE);
569 nir->num_uniforms = MAX_PUSH_CONSTANTS_SIZE;
570 prog_data->nr_params += MAX_PUSH_CONSTANTS_SIZE / sizeof(float);
571 prog_data->param = ralloc_array(mem_ctx, uint32_t, prog_data->nr_params);
572
573 /* We now set the param values to be offsets into a
574 * anv_push_constant_data structure. Since the compiler doesn't
575 * actually dereference any of the gl_constant_value pointers in the
576 * params array, it doesn't really matter what we put here.
577 */
578 struct anv_push_constants *null_data = NULL;
579 /* Fill out the push constants section of the param array */
580 for (unsigned i = 0; i < MAX_PUSH_CONSTANTS_SIZE / sizeof(float); i++) {
581 prog_data->param[i] = ANV_PARAM_PUSH(
582 (uintptr_t)&null_data->client_data[i * sizeof(float)]);
583 }
584 }
585
586 if (nir->info.num_ssbos > 0 || nir->info.num_images > 0)
587 pipeline->needs_data_cache = true;
588
589 NIR_PASS_V(nir, brw_nir_lower_image_load_store, compiler->devinfo);
590
591 /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
592 if (layout) {
593 anv_nir_apply_pipeline_layout(&pipeline->device->instance->physicalDevice,
594 pipeline->device->robust_buffer_access,
595 layout, nir, prog_data,
596 &stage->bind_map);
597 NIR_PASS_V(nir, nir_opt_constant_folding);
598 }
599
600 if (nir->info.stage != MESA_SHADER_COMPUTE)
601 brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
602
603 assert(nir->num_uniforms == prog_data->nr_params * 4);
604
605 stage->nir = nir;
606 }
607
608 static void
609 anv_fill_binding_table(struct brw_stage_prog_data *prog_data, unsigned bias)
610 {
611 prog_data->binding_table.size_bytes = 0;
612 prog_data->binding_table.texture_start = bias;
613 prog_data->binding_table.gather_texture_start = bias;
614 prog_data->binding_table.ubo_start = bias;
615 prog_data->binding_table.ssbo_start = bias;
616 prog_data->binding_table.image_start = bias;
617 }
618
619 static void
620 anv_pipeline_link_vs(const struct brw_compiler *compiler,
621 struct anv_pipeline_stage *vs_stage,
622 struct anv_pipeline_stage *next_stage)
623 {
624 anv_fill_binding_table(&vs_stage->prog_data.vs.base.base, 0);
625
626 if (next_stage)
627 brw_nir_link_shaders(compiler, &vs_stage->nir, &next_stage->nir);
628 }
629
630 static const unsigned *
631 anv_pipeline_compile_vs(const struct brw_compiler *compiler,
632 void *mem_ctx,
633 struct anv_pipeline_stage *vs_stage)
634 {
635 brw_compute_vue_map(compiler->devinfo,
636 &vs_stage->prog_data.vs.base.vue_map,
637 vs_stage->nir->info.outputs_written,
638 vs_stage->nir->info.separate_shader);
639
640 return brw_compile_vs(compiler, NULL, mem_ctx, &vs_stage->key.vs,
641 &vs_stage->prog_data.vs, vs_stage->nir, -1, NULL);
642 }
643
644 static void
645 merge_tess_info(struct shader_info *tes_info,
646 const struct shader_info *tcs_info)
647 {
648 /* The Vulkan 1.0.38 spec, section 21.1 Tessellator says:
649 *
650 * "PointMode. Controls generation of points rather than triangles
651 * or lines. This functionality defaults to disabled, and is
652 * enabled if either shader stage includes the execution mode.
653 *
654 * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw,
655 * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd,
656 * and OutputVertices, it says:
657 *
658 * "One mode must be set in at least one of the tessellation
659 * shader stages."
660 *
661 * So, the fields can be set in either the TCS or TES, but they must
662 * agree if set in both. Our backend looks at TES, so bitwise-or in
663 * the values from the TCS.
664 */
665 assert(tcs_info->tess.tcs_vertices_out == 0 ||
666 tes_info->tess.tcs_vertices_out == 0 ||
667 tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out);
668 tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out;
669
670 assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
671 tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
672 tcs_info->tess.spacing == tes_info->tess.spacing);
673 tes_info->tess.spacing |= tcs_info->tess.spacing;
674
675 assert(tcs_info->tess.primitive_mode == 0 ||
676 tes_info->tess.primitive_mode == 0 ||
677 tcs_info->tess.primitive_mode == tes_info->tess.primitive_mode);
678 tes_info->tess.primitive_mode |= tcs_info->tess.primitive_mode;
679 tes_info->tess.ccw |= tcs_info->tess.ccw;
680 tes_info->tess.point_mode |= tcs_info->tess.point_mode;
681 }
682
683 static void
684 anv_pipeline_link_tcs(const struct brw_compiler *compiler,
685 struct anv_pipeline_stage *tcs_stage,
686 struct anv_pipeline_stage *tes_stage)
687 {
688 assert(tes_stage && tes_stage->stage == MESA_SHADER_TESS_EVAL);
689
690 anv_fill_binding_table(&tcs_stage->prog_data.tcs.base.base, 0);
691
692 brw_nir_link_shaders(compiler, &tcs_stage->nir, &tes_stage->nir);
693
694 nir_lower_patch_vertices(tes_stage->nir,
695 tcs_stage->nir->info.tess.tcs_vertices_out,
696 NULL);
697
698 /* Copy TCS info into the TES info */
699 merge_tess_info(&tes_stage->nir->info, &tcs_stage->nir->info);
700
701 anv_fill_binding_table(&tcs_stage->prog_data.tcs.base.base, 0);
702 anv_fill_binding_table(&tes_stage->prog_data.tes.base.base, 0);
703
704 /* Whacking the key after cache lookup is a bit sketchy, but all of
705 * this comes from the SPIR-V, which is part of the hash used for the
706 * pipeline cache. So it should be safe.
707 */
708 tcs_stage->key.tcs.tes_primitive_mode =
709 tes_stage->nir->info.tess.primitive_mode;
710 tcs_stage->key.tcs.quads_workaround =
711 compiler->devinfo->gen < 9 &&
712 tes_stage->nir->info.tess.primitive_mode == 7 /* GL_QUADS */ &&
713 tes_stage->nir->info.tess.spacing == TESS_SPACING_EQUAL;
714 }
715
716 static const unsigned *
717 anv_pipeline_compile_tcs(const struct brw_compiler *compiler,
718 void *mem_ctx,
719 struct anv_pipeline_stage *tcs_stage,
720 struct anv_pipeline_stage *prev_stage)
721 {
722 tcs_stage->key.tcs.outputs_written =
723 tcs_stage->nir->info.outputs_written;
724 tcs_stage->key.tcs.patch_outputs_written =
725 tcs_stage->nir->info.patch_outputs_written;
726
727 return brw_compile_tcs(compiler, NULL, mem_ctx, &tcs_stage->key.tcs,
728 &tcs_stage->prog_data.tcs, tcs_stage->nir,
729 -1, NULL);
730 }
731
732 static void
733 anv_pipeline_link_tes(const struct brw_compiler *compiler,
734 struct anv_pipeline_stage *tes_stage,
735 struct anv_pipeline_stage *next_stage)
736 {
737 anv_fill_binding_table(&tes_stage->prog_data.tes.base.base, 0);
738
739 if (next_stage)
740 brw_nir_link_shaders(compiler, &tes_stage->nir, &next_stage->nir);
741 }
742
743 static const unsigned *
744 anv_pipeline_compile_tes(const struct brw_compiler *compiler,
745 void *mem_ctx,
746 struct anv_pipeline_stage *tes_stage,
747 struct anv_pipeline_stage *tcs_stage)
748 {
749 tes_stage->key.tes.inputs_read =
750 tcs_stage->nir->info.outputs_written;
751 tes_stage->key.tes.patch_inputs_read =
752 tcs_stage->nir->info.patch_outputs_written;
753
754 return brw_compile_tes(compiler, NULL, mem_ctx, &tes_stage->key.tes,
755 &tcs_stage->prog_data.tcs.base.vue_map,
756 &tes_stage->prog_data.tes, tes_stage->nir,
757 NULL, -1, NULL);
758 }
759
760 static void
761 anv_pipeline_link_gs(const struct brw_compiler *compiler,
762 struct anv_pipeline_stage *gs_stage,
763 struct anv_pipeline_stage *next_stage)
764 {
765 anv_fill_binding_table(&gs_stage->prog_data.gs.base.base, 0);
766
767 if (next_stage)
768 brw_nir_link_shaders(compiler, &gs_stage->nir, &next_stage->nir);
769 }
770
771 static const unsigned *
772 anv_pipeline_compile_gs(const struct brw_compiler *compiler,
773 void *mem_ctx,
774 struct anv_pipeline_stage *gs_stage,
775 struct anv_pipeline_stage *prev_stage)
776 {
777 brw_compute_vue_map(compiler->devinfo,
778 &gs_stage->prog_data.gs.base.vue_map,
779 gs_stage->nir->info.outputs_written,
780 gs_stage->nir->info.separate_shader);
781
782 return brw_compile_gs(compiler, NULL, mem_ctx, &gs_stage->key.gs,
783 &gs_stage->prog_data.gs, gs_stage->nir,
784 NULL, -1, NULL);
785 }
786
787 static void
788 anv_pipeline_link_fs(const struct brw_compiler *compiler,
789 struct anv_pipeline_stage *stage)
790 {
791 unsigned num_rts = 0;
792 const int max_rt = FRAG_RESULT_DATA7 - FRAG_RESULT_DATA0 + 1;
793 struct anv_pipeline_binding rt_bindings[max_rt];
794 nir_function_impl *impl = nir_shader_get_entrypoint(stage->nir);
795 int rt_to_bindings[max_rt];
796 memset(rt_to_bindings, -1, sizeof(rt_to_bindings));
797 bool rt_used[max_rt];
798 memset(rt_used, 0, sizeof(rt_used));
799
800 /* Flag used render targets */
801 nir_foreach_variable_safe(var, &stage->nir->outputs) {
802 if (var->data.location < FRAG_RESULT_DATA0)
803 continue;
804
805 const unsigned rt = var->data.location - FRAG_RESULT_DATA0;
806 /* Unused or out-of-bounds */
807 if (rt >= MAX_RTS || !(stage->key.wm.color_outputs_valid & (1 << rt)))
808 continue;
809
810 const unsigned array_len =
811 glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1;
812 assert(rt + array_len <= max_rt);
813
814 for (unsigned i = 0; i < array_len; i++)
815 rt_used[rt + i] = true;
816 }
817
818 /* Set new, compacted, location */
819 for (unsigned i = 0; i < max_rt; i++) {
820 if (!rt_used[i])
821 continue;
822
823 rt_to_bindings[i] = num_rts;
824 rt_bindings[rt_to_bindings[i]] = (struct anv_pipeline_binding) {
825 .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
826 .binding = 0,
827 .index = i,
828 };
829 num_rts++;
830 }
831
832 bool deleted_output = false;
833 nir_foreach_variable_safe(var, &stage->nir->outputs) {
834 if (var->data.location < FRAG_RESULT_DATA0)
835 continue;
836
837 const unsigned rt = var->data.location - FRAG_RESULT_DATA0;
838 if (rt >= MAX_RTS ||
839 !(stage->key.wm.color_outputs_valid & (1 << rt))) {
840 /* Unused or out-of-bounds, throw it away */
841 deleted_output = true;
842 var->data.mode = nir_var_function_temp;
843 exec_node_remove(&var->node);
844 exec_list_push_tail(&impl->locals, &var->node);
845 continue;
846 }
847
848 /* Give it the new location */
849 assert(rt_to_bindings[rt] != -1);
850 var->data.location = rt_to_bindings[rt] + FRAG_RESULT_DATA0;
851 }
852
853 if (deleted_output)
854 nir_fixup_deref_modes(stage->nir);
855
856 if (num_rts == 0) {
857 /* If we have no render targets, we need a null render target */
858 rt_bindings[0] = (struct anv_pipeline_binding) {
859 .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
860 .binding = 0,
861 .index = UINT32_MAX,
862 };
863 num_rts = 1;
864 }
865
866 /* Now that we've determined the actual number of render targets, adjust
867 * the key accordingly.
868 */
869 stage->key.wm.nr_color_regions = num_rts;
870 stage->key.wm.color_outputs_valid = (1 << num_rts) - 1;
871
872 assert(num_rts <= max_rt);
873 assert(stage->bind_map.surface_count == 0);
874 typed_memcpy(stage->bind_map.surface_to_descriptor,
875 rt_bindings, num_rts);
876 stage->bind_map.surface_count += num_rts;
877
878 anv_fill_binding_table(&stage->prog_data.wm.base, 0);
879 }
880
881 static const unsigned *
882 anv_pipeline_compile_fs(const struct brw_compiler *compiler,
883 void *mem_ctx,
884 struct anv_pipeline_stage *fs_stage,
885 struct anv_pipeline_stage *prev_stage)
886 {
887 /* TODO: we could set this to 0 based on the information in nir_shader, but
888 * we need this before we call spirv_to_nir.
889 */
890 assert(prev_stage);
891 fs_stage->key.wm.input_slots_valid =
892 prev_stage->prog_data.vue.vue_map.slots_valid;
893
894 const unsigned *code =
895 brw_compile_fs(compiler, NULL, mem_ctx, &fs_stage->key.wm,
896 &fs_stage->prog_data.wm, fs_stage->nir,
897 NULL, -1, -1, -1, true, false, NULL, NULL);
898
899 if (fs_stage->key.wm.nr_color_regions == 0 &&
900 !fs_stage->prog_data.wm.has_side_effects &&
901 !fs_stage->prog_data.wm.uses_kill &&
902 fs_stage->prog_data.wm.computed_depth_mode == BRW_PSCDEPTH_OFF &&
903 !fs_stage->prog_data.wm.computed_stencil) {
904 /* This fragment shader has no outputs and no side effects. Go ahead
905 * and return the code pointer so we don't accidentally think the
906 * compile failed but zero out prog_data which will set program_size to
907 * zero and disable the stage.
908 */
909 memset(&fs_stage->prog_data, 0, sizeof(fs_stage->prog_data));
910 }
911
912 return code;
913 }
914
915 static VkResult
916 anv_pipeline_compile_graphics(struct anv_pipeline *pipeline,
917 struct anv_pipeline_cache *cache,
918 const VkGraphicsPipelineCreateInfo *info)
919 {
920 const struct brw_compiler *compiler =
921 pipeline->device->instance->physicalDevice.compiler;
922 struct anv_pipeline_stage stages[MESA_SHADER_STAGES] = {};
923
924 pipeline->active_stages = 0;
925
926 VkResult result;
927 for (uint32_t i = 0; i < info->stageCount; i++) {
928 const VkPipelineShaderStageCreateInfo *sinfo = &info->pStages[i];
929 gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
930
931 pipeline->active_stages |= sinfo->stage;
932
933 stages[stage].stage = stage;
934 stages[stage].module = anv_shader_module_from_handle(sinfo->module);
935 stages[stage].entrypoint = sinfo->pName;
936 stages[stage].spec_info = sinfo->pSpecializationInfo;
937 anv_pipeline_hash_shader(stages[stage].module,
938 stages[stage].entrypoint,
939 stage,
940 stages[stage].spec_info,
941 stages[stage].shader_sha1);
942
943 const struct gen_device_info *devinfo = &pipeline->device->info;
944 switch (stage) {
945 case MESA_SHADER_VERTEX:
946 populate_vs_prog_key(devinfo, &stages[stage].key.vs);
947 break;
948 case MESA_SHADER_TESS_CTRL:
949 populate_tcs_prog_key(devinfo,
950 info->pTessellationState->patchControlPoints,
951 &stages[stage].key.tcs);
952 break;
953 case MESA_SHADER_TESS_EVAL:
954 populate_tes_prog_key(devinfo, &stages[stage].key.tes);
955 break;
956 case MESA_SHADER_GEOMETRY:
957 populate_gs_prog_key(devinfo, &stages[stage].key.gs);
958 break;
959 case MESA_SHADER_FRAGMENT:
960 populate_wm_prog_key(devinfo, pipeline->subpass,
961 info->pMultisampleState,
962 &stages[stage].key.wm);
963 break;
964 default:
965 unreachable("Invalid graphics shader stage");
966 }
967 }
968
969 if (pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
970 pipeline->active_stages |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
971
972 assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT);
973
974 ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
975
976 unsigned char sha1[20];
977 anv_pipeline_hash_graphics(pipeline, layout, stages, sha1);
978
979 unsigned found = 0;
980 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
981 if (!stages[s].entrypoint)
982 continue;
983
984 stages[s].cache_key.stage = s;
985 memcpy(stages[s].cache_key.sha1, sha1, sizeof(sha1));
986
987 struct anv_shader_bin *bin =
988 anv_device_search_for_kernel(pipeline->device, cache,
989 &stages[s].cache_key,
990 sizeof(stages[s].cache_key));
991 if (bin) {
992 found++;
993 pipeline->shaders[s] = bin;
994 }
995 }
996
997 if (found == __builtin_popcount(pipeline->active_stages)) {
998 /* We found all our shaders in the cache. We're done. */
999 goto done;
1000 } else if (found > 0) {
1001 /* We found some but not all of our shaders. This shouldn't happen
1002 * most of the time but it can if we have a partially populated
1003 * pipeline cache.
1004 */
1005 assert(found < __builtin_popcount(pipeline->active_stages));
1006
1007 vk_debug_report(&pipeline->device->instance->debug_report_callbacks,
1008 VK_DEBUG_REPORT_WARNING_BIT_EXT |
1009 VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT,
1010 VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_CACHE_EXT,
1011 (uint64_t)(uintptr_t)cache,
1012 0, 0, "anv",
1013 "Found a partial pipeline in the cache. This is "
1014 "most likely caused by an incomplete pipeline cache "
1015 "import or export");
1016
1017 /* We're going to have to recompile anyway, so just throw away our
1018 * references to the shaders in the cache. We'll get them out of the
1019 * cache again as part of the compilation process.
1020 */
1021 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
1022 if (pipeline->shaders[s]) {
1023 anv_shader_bin_unref(pipeline->device, pipeline->shaders[s]);
1024 pipeline->shaders[s] = NULL;
1025 }
1026 }
1027 }
1028
1029 void *pipeline_ctx = ralloc_context(NULL);
1030
1031 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
1032 if (!stages[s].entrypoint)
1033 continue;
1034
1035 assert(stages[s].stage == s);
1036 assert(pipeline->shaders[s] == NULL);
1037
1038 stages[s].bind_map = (struct anv_pipeline_bind_map) {
1039 .surface_to_descriptor = stages[s].surface_to_descriptor,
1040 .sampler_to_descriptor = stages[s].sampler_to_descriptor
1041 };
1042
1043 stages[s].nir = anv_pipeline_stage_get_nir(pipeline, cache,
1044 pipeline_ctx,
1045 &stages[s]);
1046 if (stages[s].nir == NULL) {
1047 result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1048 goto fail;
1049 }
1050 }
1051
1052 /* Walk backwards to link */
1053 struct anv_pipeline_stage *next_stage = NULL;
1054 for (int s = MESA_SHADER_STAGES - 1; s >= 0; s--) {
1055 if (!stages[s].entrypoint)
1056 continue;
1057
1058 switch (s) {
1059 case MESA_SHADER_VERTEX:
1060 anv_pipeline_link_vs(compiler, &stages[s], next_stage);
1061 break;
1062 case MESA_SHADER_TESS_CTRL:
1063 anv_pipeline_link_tcs(compiler, &stages[s], next_stage);
1064 break;
1065 case MESA_SHADER_TESS_EVAL:
1066 anv_pipeline_link_tes(compiler, &stages[s], next_stage);
1067 break;
1068 case MESA_SHADER_GEOMETRY:
1069 anv_pipeline_link_gs(compiler, &stages[s], next_stage);
1070 break;
1071 case MESA_SHADER_FRAGMENT:
1072 anv_pipeline_link_fs(compiler, &stages[s]);
1073 break;
1074 default:
1075 unreachable("Invalid graphics shader stage");
1076 }
1077
1078 next_stage = &stages[s];
1079 }
1080
1081 struct anv_pipeline_stage *prev_stage = NULL;
1082 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
1083 if (!stages[s].entrypoint)
1084 continue;
1085
1086 void *stage_ctx = ralloc_context(NULL);
1087
1088 nir_xfb_info *xfb_info = NULL;
1089 if (s == MESA_SHADER_VERTEX ||
1090 s == MESA_SHADER_TESS_EVAL ||
1091 s == MESA_SHADER_GEOMETRY)
1092 xfb_info = nir_gather_xfb_info(stages[s].nir, stage_ctx);
1093
1094 anv_pipeline_lower_nir(pipeline, stage_ctx, &stages[s], layout);
1095
1096 const unsigned *code;
1097 switch (s) {
1098 case MESA_SHADER_VERTEX:
1099 code = anv_pipeline_compile_vs(compiler, stage_ctx, &stages[s]);
1100 break;
1101 case MESA_SHADER_TESS_CTRL:
1102 code = anv_pipeline_compile_tcs(compiler, stage_ctx,
1103 &stages[s], prev_stage);
1104 break;
1105 case MESA_SHADER_TESS_EVAL:
1106 code = anv_pipeline_compile_tes(compiler, stage_ctx,
1107 &stages[s], prev_stage);
1108 break;
1109 case MESA_SHADER_GEOMETRY:
1110 code = anv_pipeline_compile_gs(compiler, stage_ctx,
1111 &stages[s], prev_stage);
1112 break;
1113 case MESA_SHADER_FRAGMENT:
1114 code = anv_pipeline_compile_fs(compiler, stage_ctx,
1115 &stages[s], prev_stage);
1116 break;
1117 default:
1118 unreachable("Invalid graphics shader stage");
1119 }
1120 if (code == NULL) {
1121 ralloc_free(stage_ctx);
1122 result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1123 goto fail;
1124 }
1125
1126 struct anv_shader_bin *bin =
1127 anv_device_upload_kernel(pipeline->device, cache,
1128 &stages[s].cache_key,
1129 sizeof(stages[s].cache_key),
1130 code, stages[s].prog_data.base.program_size,
1131 stages[s].nir->constant_data,
1132 stages[s].nir->constant_data_size,
1133 &stages[s].prog_data.base,
1134 brw_prog_data_size(s),
1135 xfb_info, &stages[s].bind_map);
1136 if (!bin) {
1137 ralloc_free(stage_ctx);
1138 result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1139 goto fail;
1140 }
1141
1142 pipeline->shaders[s] = bin;
1143 ralloc_free(stage_ctx);
1144
1145 prev_stage = &stages[s];
1146 }
1147
1148 ralloc_free(pipeline_ctx);
1149
1150 done:
1151
1152 if (pipeline->shaders[MESA_SHADER_FRAGMENT] &&
1153 pipeline->shaders[MESA_SHADER_FRAGMENT]->prog_data->program_size == 0) {
1154 /* This can happen if we decided to implicitly disable the fragment
1155 * shader. See anv_pipeline_compile_fs().
1156 */
1157 anv_shader_bin_unref(pipeline->device,
1158 pipeline->shaders[MESA_SHADER_FRAGMENT]);
1159 pipeline->shaders[MESA_SHADER_FRAGMENT] = NULL;
1160 pipeline->active_stages &= ~VK_SHADER_STAGE_FRAGMENT_BIT;
1161 }
1162
1163 return VK_SUCCESS;
1164
1165 fail:
1166 ralloc_free(pipeline_ctx);
1167
1168 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
1169 if (pipeline->shaders[s])
1170 anv_shader_bin_unref(pipeline->device, pipeline->shaders[s]);
1171 }
1172
1173 return result;
1174 }
1175
1176 VkResult
1177 anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
1178 struct anv_pipeline_cache *cache,
1179 const VkComputePipelineCreateInfo *info,
1180 const struct anv_shader_module *module,
1181 const char *entrypoint,
1182 const VkSpecializationInfo *spec_info)
1183 {
1184 const struct brw_compiler *compiler =
1185 pipeline->device->instance->physicalDevice.compiler;
1186
1187 struct anv_pipeline_stage stage = {
1188 .stage = MESA_SHADER_COMPUTE,
1189 .module = module,
1190 .entrypoint = entrypoint,
1191 .spec_info = spec_info,
1192 .cache_key = {
1193 .stage = MESA_SHADER_COMPUTE,
1194 }
1195 };
1196 anv_pipeline_hash_shader(stage.module,
1197 stage.entrypoint,
1198 MESA_SHADER_COMPUTE,
1199 stage.spec_info,
1200 stage.shader_sha1);
1201
1202 struct anv_shader_bin *bin = NULL;
1203
1204 populate_cs_prog_key(&pipeline->device->info, &stage.key.cs);
1205
1206 ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
1207
1208 anv_pipeline_hash_compute(pipeline, layout, &stage, stage.cache_key.sha1);
1209 bin = anv_device_search_for_kernel(pipeline->device, cache, &stage.cache_key,
1210 sizeof(stage.cache_key));
1211
1212 if (bin == NULL) {
1213 stage.bind_map = (struct anv_pipeline_bind_map) {
1214 .surface_to_descriptor = stage.surface_to_descriptor,
1215 .sampler_to_descriptor = stage.sampler_to_descriptor
1216 };
1217
1218 void *mem_ctx = ralloc_context(NULL);
1219
1220 stage.nir = anv_pipeline_stage_get_nir(pipeline, cache, mem_ctx, &stage);
1221 if (stage.nir == NULL) {
1222 ralloc_free(mem_ctx);
1223 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1224 }
1225
1226 anv_pipeline_lower_nir(pipeline, mem_ctx, &stage, layout);
1227
1228 NIR_PASS_V(stage.nir, anv_nir_add_base_work_group_id,
1229 &stage.prog_data.cs);
1230
1231 anv_fill_binding_table(&stage.prog_data.cs.base, 1);
1232
1233 const unsigned *shader_code =
1234 brw_compile_cs(compiler, NULL, mem_ctx, &stage.key.cs,
1235 &stage.prog_data.cs, stage.nir, -1, NULL);
1236 if (shader_code == NULL) {
1237 ralloc_free(mem_ctx);
1238 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1239 }
1240
1241 const unsigned code_size = stage.prog_data.base.program_size;
1242 bin = anv_device_upload_kernel(pipeline->device, cache,
1243 &stage.cache_key, sizeof(stage.cache_key),
1244 shader_code, code_size,
1245 stage.nir->constant_data,
1246 stage.nir->constant_data_size,
1247 &stage.prog_data.base,
1248 sizeof(stage.prog_data.cs),
1249 NULL, &stage.bind_map);
1250 if (!bin) {
1251 ralloc_free(mem_ctx);
1252 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1253 }
1254
1255 ralloc_free(mem_ctx);
1256 }
1257
1258 pipeline->active_stages = VK_SHADER_STAGE_COMPUTE_BIT;
1259 pipeline->shaders[MESA_SHADER_COMPUTE] = bin;
1260
1261 return VK_SUCCESS;
1262 }
1263
1264 /**
1265 * Copy pipeline state not marked as dynamic.
1266 * Dynamic state is pipeline state which hasn't been provided at pipeline
1267 * creation time, but is dynamically provided afterwards using various
1268 * vkCmdSet* functions.
1269 *
1270 * The set of state considered "non_dynamic" is determined by the pieces of
1271 * state that have their corresponding VkDynamicState enums omitted from
1272 * VkPipelineDynamicStateCreateInfo::pDynamicStates.
1273 *
1274 * @param[out] pipeline Destination non_dynamic state.
1275 * @param[in] pCreateInfo Source of non_dynamic state to be copied.
1276 */
1277 static void
1278 copy_non_dynamic_state(struct anv_pipeline *pipeline,
1279 const VkGraphicsPipelineCreateInfo *pCreateInfo)
1280 {
1281 anv_cmd_dirty_mask_t states = ANV_CMD_DIRTY_DYNAMIC_ALL;
1282 struct anv_subpass *subpass = pipeline->subpass;
1283
1284 pipeline->dynamic_state = default_dynamic_state;
1285
1286 if (pCreateInfo->pDynamicState) {
1287 /* Remove all of the states that are marked as dynamic */
1288 uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount;
1289 for (uint32_t s = 0; s < count; s++)
1290 states &= ~(1 << pCreateInfo->pDynamicState->pDynamicStates[s]);
1291 }
1292
1293 struct anv_dynamic_state *dynamic = &pipeline->dynamic_state;
1294
1295 /* Section 9.2 of the Vulkan 1.0.15 spec says:
1296 *
1297 * pViewportState is [...] NULL if the pipeline
1298 * has rasterization disabled.
1299 */
1300 if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable) {
1301 assert(pCreateInfo->pViewportState);
1302
1303 dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount;
1304 if (states & (1 << VK_DYNAMIC_STATE_VIEWPORT)) {
1305 typed_memcpy(dynamic->viewport.viewports,
1306 pCreateInfo->pViewportState->pViewports,
1307 pCreateInfo->pViewportState->viewportCount);
1308 }
1309
1310 dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount;
1311 if (states & (1 << VK_DYNAMIC_STATE_SCISSOR)) {
1312 typed_memcpy(dynamic->scissor.scissors,
1313 pCreateInfo->pViewportState->pScissors,
1314 pCreateInfo->pViewportState->scissorCount);
1315 }
1316 }
1317
1318 if (states & (1 << VK_DYNAMIC_STATE_LINE_WIDTH)) {
1319 assert(pCreateInfo->pRasterizationState);
1320 dynamic->line_width = pCreateInfo->pRasterizationState->lineWidth;
1321 }
1322
1323 if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS)) {
1324 assert(pCreateInfo->pRasterizationState);
1325 dynamic->depth_bias.bias =
1326 pCreateInfo->pRasterizationState->depthBiasConstantFactor;
1327 dynamic->depth_bias.clamp =
1328 pCreateInfo->pRasterizationState->depthBiasClamp;
1329 dynamic->depth_bias.slope =
1330 pCreateInfo->pRasterizationState->depthBiasSlopeFactor;
1331 }
1332
1333 /* Section 9.2 of the Vulkan 1.0.15 spec says:
1334 *
1335 * pColorBlendState is [...] NULL if the pipeline has rasterization
1336 * disabled or if the subpass of the render pass the pipeline is
1337 * created against does not use any color attachments.
1338 */
1339 bool uses_color_att = false;
1340 for (unsigned i = 0; i < subpass->color_count; ++i) {
1341 if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED) {
1342 uses_color_att = true;
1343 break;
1344 }
1345 }
1346
1347 if (uses_color_att &&
1348 !pCreateInfo->pRasterizationState->rasterizerDiscardEnable) {
1349 assert(pCreateInfo->pColorBlendState);
1350
1351 if (states & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS))
1352 typed_memcpy(dynamic->blend_constants,
1353 pCreateInfo->pColorBlendState->blendConstants, 4);
1354 }
1355
1356 /* If there is no depthstencil attachment, then don't read
1357 * pDepthStencilState. The Vulkan spec states that pDepthStencilState may
1358 * be NULL in this case. Even if pDepthStencilState is non-NULL, there is
1359 * no need to override the depthstencil defaults in
1360 * anv_pipeline::dynamic_state when there is no depthstencil attachment.
1361 *
1362 * Section 9.2 of the Vulkan 1.0.15 spec says:
1363 *
1364 * pDepthStencilState is [...] NULL if the pipeline has rasterization
1365 * disabled or if the subpass of the render pass the pipeline is created
1366 * against does not use a depth/stencil attachment.
1367 */
1368 if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&
1369 subpass->depth_stencil_attachment) {
1370 assert(pCreateInfo->pDepthStencilState);
1371
1372 if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BOUNDS)) {
1373 dynamic->depth_bounds.min =
1374 pCreateInfo->pDepthStencilState->minDepthBounds;
1375 dynamic->depth_bounds.max =
1376 pCreateInfo->pDepthStencilState->maxDepthBounds;
1377 }
1378
1379 if (states & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) {
1380 dynamic->stencil_compare_mask.front =
1381 pCreateInfo->pDepthStencilState->front.compareMask;
1382 dynamic->stencil_compare_mask.back =
1383 pCreateInfo->pDepthStencilState->back.compareMask;
1384 }
1385
1386 if (states & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) {
1387 dynamic->stencil_write_mask.front =
1388 pCreateInfo->pDepthStencilState->front.writeMask;
1389 dynamic->stencil_write_mask.back =
1390 pCreateInfo->pDepthStencilState->back.writeMask;
1391 }
1392
1393 if (states & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) {
1394 dynamic->stencil_reference.front =
1395 pCreateInfo->pDepthStencilState->front.reference;
1396 dynamic->stencil_reference.back =
1397 pCreateInfo->pDepthStencilState->back.reference;
1398 }
1399 }
1400
1401 pipeline->dynamic_state_mask = states;
1402 }
1403
1404 static void
1405 anv_pipeline_validate_create_info(const VkGraphicsPipelineCreateInfo *info)
1406 {
1407 #ifdef DEBUG
1408 struct anv_render_pass *renderpass = NULL;
1409 struct anv_subpass *subpass = NULL;
1410
1411 /* Assert that all required members of VkGraphicsPipelineCreateInfo are
1412 * present. See the Vulkan 1.0.28 spec, Section 9.2 Graphics Pipelines.
1413 */
1414 assert(info->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
1415
1416 renderpass = anv_render_pass_from_handle(info->renderPass);
1417 assert(renderpass);
1418
1419 assert(info->subpass < renderpass->subpass_count);
1420 subpass = &renderpass->subpasses[info->subpass];
1421
1422 assert(info->stageCount >= 1);
1423 assert(info->pVertexInputState);
1424 assert(info->pInputAssemblyState);
1425 assert(info->pRasterizationState);
1426 if (!info->pRasterizationState->rasterizerDiscardEnable) {
1427 assert(info->pViewportState);
1428 assert(info->pMultisampleState);
1429
1430 if (subpass && subpass->depth_stencil_attachment)
1431 assert(info->pDepthStencilState);
1432
1433 if (subpass && subpass->color_count > 0) {
1434 bool all_color_unused = true;
1435 for (int i = 0; i < subpass->color_count; i++) {
1436 if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED)
1437 all_color_unused = false;
1438 }
1439 /* pColorBlendState is ignored if the pipeline has rasterization
1440 * disabled or if the subpass of the render pass the pipeline is
1441 * created against does not use any color attachments.
1442 */
1443 assert(info->pColorBlendState || all_color_unused);
1444 }
1445 }
1446
1447 for (uint32_t i = 0; i < info->stageCount; ++i) {
1448 switch (info->pStages[i].stage) {
1449 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
1450 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
1451 assert(info->pTessellationState);
1452 break;
1453 default:
1454 break;
1455 }
1456 }
1457 #endif
1458 }
1459
1460 /**
1461 * Calculate the desired L3 partitioning based on the current state of the
1462 * pipeline. For now this simply returns the conservative defaults calculated
1463 * by get_default_l3_weights(), but we could probably do better by gathering
1464 * more statistics from the pipeline state (e.g. guess of expected URB usage
1465 * and bound surfaces), or by using feed-back from performance counters.
1466 */
1467 void
1468 anv_pipeline_setup_l3_config(struct anv_pipeline *pipeline, bool needs_slm)
1469 {
1470 const struct gen_device_info *devinfo = &pipeline->device->info;
1471
1472 const struct gen_l3_weights w =
1473 gen_get_default_l3_weights(devinfo, pipeline->needs_data_cache, needs_slm);
1474
1475 pipeline->urb.l3_config = gen_get_l3_config(devinfo, w);
1476 pipeline->urb.total_size =
1477 gen_get_l3_config_urb_size(devinfo, pipeline->urb.l3_config);
1478 }
1479
1480 VkResult
1481 anv_pipeline_init(struct anv_pipeline *pipeline,
1482 struct anv_device *device,
1483 struct anv_pipeline_cache *cache,
1484 const VkGraphicsPipelineCreateInfo *pCreateInfo,
1485 const VkAllocationCallbacks *alloc)
1486 {
1487 VkResult result;
1488
1489 anv_pipeline_validate_create_info(pCreateInfo);
1490
1491 if (alloc == NULL)
1492 alloc = &device->alloc;
1493
1494 pipeline->device = device;
1495
1496 ANV_FROM_HANDLE(anv_render_pass, render_pass, pCreateInfo->renderPass);
1497 assert(pCreateInfo->subpass < render_pass->subpass_count);
1498 pipeline->subpass = &render_pass->subpasses[pCreateInfo->subpass];
1499
1500 result = anv_reloc_list_init(&pipeline->batch_relocs, alloc);
1501 if (result != VK_SUCCESS)
1502 return result;
1503
1504 pipeline->batch.alloc = alloc;
1505 pipeline->batch.next = pipeline->batch.start = pipeline->batch_data;
1506 pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data);
1507 pipeline->batch.relocs = &pipeline->batch_relocs;
1508 pipeline->batch.status = VK_SUCCESS;
1509
1510 copy_non_dynamic_state(pipeline, pCreateInfo);
1511 pipeline->depth_clamp_enable = pCreateInfo->pRasterizationState &&
1512 pCreateInfo->pRasterizationState->depthClampEnable;
1513
1514 pipeline->sample_shading_enable = pCreateInfo->pMultisampleState &&
1515 pCreateInfo->pMultisampleState->sampleShadingEnable;
1516
1517 pipeline->needs_data_cache = false;
1518
1519 /* When we free the pipeline, we detect stages based on the NULL status
1520 * of various prog_data pointers. Make them NULL by default.
1521 */
1522 memset(pipeline->shaders, 0, sizeof(pipeline->shaders));
1523
1524 result = anv_pipeline_compile_graphics(pipeline, cache, pCreateInfo);
1525 if (result != VK_SUCCESS) {
1526 anv_reloc_list_finish(&pipeline->batch_relocs, alloc);
1527 return result;
1528 }
1529
1530 assert(pipeline->shaders[MESA_SHADER_VERTEX]);
1531
1532 anv_pipeline_setup_l3_config(pipeline, false);
1533
1534 const VkPipelineVertexInputStateCreateInfo *vi_info =
1535 pCreateInfo->pVertexInputState;
1536
1537 const uint64_t inputs_read = get_vs_prog_data(pipeline)->inputs_read;
1538
1539 pipeline->vb_used = 0;
1540 for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
1541 const VkVertexInputAttributeDescription *desc =
1542 &vi_info->pVertexAttributeDescriptions[i];
1543
1544 if (inputs_read & (1ull << (VERT_ATTRIB_GENERIC0 + desc->location)))
1545 pipeline->vb_used |= 1 << desc->binding;
1546 }
1547
1548 for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
1549 const VkVertexInputBindingDescription *desc =
1550 &vi_info->pVertexBindingDescriptions[i];
1551
1552 pipeline->vb[desc->binding].stride = desc->stride;
1553
1554 /* Step rate is programmed per vertex element (attribute), not
1555 * binding. Set up a map of which bindings step per instance, for
1556 * reference by vertex element setup. */
1557 switch (desc->inputRate) {
1558 default:
1559 case VK_VERTEX_INPUT_RATE_VERTEX:
1560 pipeline->vb[desc->binding].instanced = false;
1561 break;
1562 case VK_VERTEX_INPUT_RATE_INSTANCE:
1563 pipeline->vb[desc->binding].instanced = true;
1564 break;
1565 }
1566
1567 pipeline->vb[desc->binding].instance_divisor = 1;
1568 }
1569
1570 const VkPipelineVertexInputDivisorStateCreateInfoEXT *vi_div_state =
1571 vk_find_struct_const(vi_info->pNext,
1572 PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT);
1573 if (vi_div_state) {
1574 for (uint32_t i = 0; i < vi_div_state->vertexBindingDivisorCount; i++) {
1575 const VkVertexInputBindingDivisorDescriptionEXT *desc =
1576 &vi_div_state->pVertexBindingDivisors[i];
1577
1578 pipeline->vb[desc->binding].instance_divisor = desc->divisor;
1579 }
1580 }
1581
1582 /* Our implementation of VK_KHR_multiview uses instancing to draw the
1583 * different views. If the client asks for instancing, we need to multiply
1584 * the instance divisor by the number of views ensure that we repeat the
1585 * client's per-instance data once for each view.
1586 */
1587 if (pipeline->subpass->view_mask) {
1588 const uint32_t view_count = anv_subpass_view_count(pipeline->subpass);
1589 for (uint32_t vb = 0; vb < MAX_VBS; vb++) {
1590 if (pipeline->vb[vb].instanced)
1591 pipeline->vb[vb].instance_divisor *= view_count;
1592 }
1593 }
1594
1595 const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1596 pCreateInfo->pInputAssemblyState;
1597 const VkPipelineTessellationStateCreateInfo *tess_info =
1598 pCreateInfo->pTessellationState;
1599 pipeline->primitive_restart = ia_info->primitiveRestartEnable;
1600
1601 if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
1602 pipeline->topology = _3DPRIM_PATCHLIST(tess_info->patchControlPoints);
1603 else
1604 pipeline->topology = vk_to_gen_primitive_type[ia_info->topology];
1605
1606 return VK_SUCCESS;
1607 }