nir: rename nir_var_ubo to nir_var_mem_ubo
[mesa.git] / src / intel / vulkan / anv_pipeline.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29
30 #include "util/mesa-sha1.h"
31 #include "common/gen_l3_config.h"
32 #include "anv_private.h"
33 #include "compiler/brw_nir.h"
34 #include "anv_nir.h"
35 #include "spirv/nir_spirv.h"
36 #include "vk_util.h"
37
38 /* Needed for SWIZZLE macros */
39 #include "program/prog_instruction.h"
40
41 // Shader functions
42
43 VkResult anv_CreateShaderModule(
44 VkDevice _device,
45 const VkShaderModuleCreateInfo* pCreateInfo,
46 const VkAllocationCallbacks* pAllocator,
47 VkShaderModule* pShaderModule)
48 {
49 ANV_FROM_HANDLE(anv_device, device, _device);
50 struct anv_shader_module *module;
51
52 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO);
53 assert(pCreateInfo->flags == 0);
54
55 module = vk_alloc2(&device->alloc, pAllocator,
56 sizeof(*module) + pCreateInfo->codeSize, 8,
57 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
58 if (module == NULL)
59 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
60
61 module->size = pCreateInfo->codeSize;
62 memcpy(module->data, pCreateInfo->pCode, module->size);
63
64 _mesa_sha1_compute(module->data, module->size, module->sha1);
65
66 *pShaderModule = anv_shader_module_to_handle(module);
67
68 return VK_SUCCESS;
69 }
70
71 void anv_DestroyShaderModule(
72 VkDevice _device,
73 VkShaderModule _module,
74 const VkAllocationCallbacks* pAllocator)
75 {
76 ANV_FROM_HANDLE(anv_device, device, _device);
77 ANV_FROM_HANDLE(anv_shader_module, module, _module);
78
79 if (!module)
80 return;
81
82 vk_free2(&device->alloc, pAllocator, module);
83 }
84
85 #define SPIR_V_MAGIC_NUMBER 0x07230203
86
87 static const uint64_t stage_to_debug[] = {
88 [MESA_SHADER_VERTEX] = DEBUG_VS,
89 [MESA_SHADER_TESS_CTRL] = DEBUG_TCS,
90 [MESA_SHADER_TESS_EVAL] = DEBUG_TES,
91 [MESA_SHADER_GEOMETRY] = DEBUG_GS,
92 [MESA_SHADER_FRAGMENT] = DEBUG_WM,
93 [MESA_SHADER_COMPUTE] = DEBUG_CS,
94 };
95
96 /* Eventually, this will become part of anv_CreateShader. Unfortunately,
97 * we can't do that yet because we don't have the ability to copy nir.
98 */
99 static nir_shader *
100 anv_shader_compile_to_nir(struct anv_device *device,
101 void *mem_ctx,
102 const struct anv_shader_module *module,
103 const char *entrypoint_name,
104 gl_shader_stage stage,
105 const VkSpecializationInfo *spec_info)
106 {
107 const struct brw_compiler *compiler =
108 device->instance->physicalDevice.compiler;
109 const nir_shader_compiler_options *nir_options =
110 compiler->glsl_compiler_options[stage].NirOptions;
111
112 uint32_t *spirv = (uint32_t *) module->data;
113 assert(spirv[0] == SPIR_V_MAGIC_NUMBER);
114 assert(module->size % 4 == 0);
115
116 uint32_t num_spec_entries = 0;
117 struct nir_spirv_specialization *spec_entries = NULL;
118 if (spec_info && spec_info->mapEntryCount > 0) {
119 num_spec_entries = spec_info->mapEntryCount;
120 spec_entries = malloc(num_spec_entries * sizeof(*spec_entries));
121 for (uint32_t i = 0; i < num_spec_entries; i++) {
122 VkSpecializationMapEntry entry = spec_info->pMapEntries[i];
123 const void *data = spec_info->pData + entry.offset;
124 assert(data + entry.size <= spec_info->pData + spec_info->dataSize);
125
126 spec_entries[i].id = spec_info->pMapEntries[i].constantID;
127 if (spec_info->dataSize == 8)
128 spec_entries[i].data64 = *(const uint64_t *)data;
129 else
130 spec_entries[i].data32 = *(const uint32_t *)data;
131 }
132 }
133
134 struct spirv_to_nir_options spirv_options = {
135 .lower_workgroup_access_to_offsets = true,
136 .caps = {
137 .device_group = true,
138 .draw_parameters = true,
139 .float64 = device->instance->physicalDevice.info.gen >= 8,
140 .image_write_without_format = true,
141 .int16 = device->instance->physicalDevice.info.gen >= 8,
142 .int64 = device->instance->physicalDevice.info.gen >= 8,
143 .min_lod = true,
144 .multiview = true,
145 .post_depth_coverage = device->instance->physicalDevice.info.gen >= 9,
146 .shader_viewport_index_layer = true,
147 .stencil_export = device->instance->physicalDevice.info.gen >= 9,
148 .storage_8bit = device->instance->physicalDevice.info.gen >= 8,
149 .storage_16bit = device->instance->physicalDevice.info.gen >= 8,
150 .subgroup_arithmetic = true,
151 .subgroup_basic = true,
152 .subgroup_ballot = true,
153 .subgroup_quad = true,
154 .subgroup_shuffle = true,
155 .subgroup_vote = true,
156 .tessellation = true,
157 .variable_pointers = true,
158 },
159 .ubo_ptr_type = glsl_vector_type(GLSL_TYPE_UINT, 2),
160 .ssbo_ptr_type = glsl_vector_type(GLSL_TYPE_UINT, 2),
161 .push_const_ptr_type = glsl_uint_type(),
162 .shared_ptr_type = glsl_uint_type(),
163 };
164
165 nir_function *entry_point =
166 spirv_to_nir(spirv, module->size / 4,
167 spec_entries, num_spec_entries,
168 stage, entrypoint_name, &spirv_options, nir_options);
169 nir_shader *nir = entry_point->shader;
170 assert(nir->info.stage == stage);
171 nir_validate_shader(nir, "after spirv_to_nir");
172 ralloc_steal(mem_ctx, nir);
173
174 free(spec_entries);
175
176 if (unlikely(INTEL_DEBUG & stage_to_debug[stage])) {
177 fprintf(stderr, "NIR (from SPIR-V) for %s shader:\n",
178 gl_shader_stage_name(stage));
179 nir_print_shader(nir, stderr);
180 }
181
182 /* We have to lower away local constant initializers right before we
183 * inline functions. That way they get properly initialized at the top
184 * of the function and not at the top of its caller.
185 */
186 NIR_PASS_V(nir, nir_lower_constant_initializers, nir_var_function_temp);
187 NIR_PASS_V(nir, nir_lower_returns);
188 NIR_PASS_V(nir, nir_inline_functions);
189 NIR_PASS_V(nir, nir_opt_deref);
190
191 /* Pick off the single entrypoint that we want */
192 foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
193 if (func != entry_point)
194 exec_node_remove(&func->node);
195 }
196 assert(exec_list_length(&nir->functions) == 1);
197
198 /* Now that we've deleted all but the main function, we can go ahead and
199 * lower the rest of the constant initializers. We do this here so that
200 * nir_remove_dead_variables and split_per_member_structs below see the
201 * corresponding stores.
202 */
203 NIR_PASS_V(nir, nir_lower_constant_initializers, ~0);
204
205 /* Split member structs. We do this before lower_io_to_temporaries so that
206 * it doesn't lower system values to temporaries by accident.
207 */
208 NIR_PASS_V(nir, nir_split_var_copies);
209 NIR_PASS_V(nir, nir_split_per_member_structs);
210
211 NIR_PASS_V(nir, nir_remove_dead_variables,
212 nir_var_shader_in | nir_var_shader_out | nir_var_system_value);
213
214 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo | nir_var_ssbo,
215 nir_address_format_vk_index_offset);
216
217 NIR_PASS_V(nir, nir_propagate_invariant);
218 NIR_PASS_V(nir, nir_lower_io_to_temporaries,
219 entry_point->impl, true, false);
220
221 /* Vulkan uses the separate-shader linking model */
222 nir->info.separate_shader = true;
223
224 nir = brw_preprocess_nir(compiler, nir);
225
226 return nir;
227 }
228
229 void anv_DestroyPipeline(
230 VkDevice _device,
231 VkPipeline _pipeline,
232 const VkAllocationCallbacks* pAllocator)
233 {
234 ANV_FROM_HANDLE(anv_device, device, _device);
235 ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
236
237 if (!pipeline)
238 return;
239
240 anv_reloc_list_finish(&pipeline->batch_relocs,
241 pAllocator ? pAllocator : &device->alloc);
242 if (pipeline->blend_state.map)
243 anv_state_pool_free(&device->dynamic_state_pool, pipeline->blend_state);
244
245 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
246 if (pipeline->shaders[s])
247 anv_shader_bin_unref(device, pipeline->shaders[s]);
248 }
249
250 vk_free2(&device->alloc, pAllocator, pipeline);
251 }
252
253 static const uint32_t vk_to_gen_primitive_type[] = {
254 [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST,
255 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST,
256 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP,
257 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST,
258 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
259 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
260 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
261 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
262 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
263 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
264 };
265
266 static void
267 populate_sampler_prog_key(const struct gen_device_info *devinfo,
268 struct brw_sampler_prog_key_data *key)
269 {
270 /* Almost all multisampled textures are compressed. The only time when we
271 * don't compress a multisampled texture is for 16x MSAA with a surface
272 * width greater than 8k which is a bit of an edge case. Since the sampler
273 * just ignores the MCS parameter to ld2ms when MCS is disabled, it's safe
274 * to tell the compiler to always assume compression.
275 */
276 key->compressed_multisample_layout_mask = ~0;
277
278 /* SkyLake added support for 16x MSAA. With this came a new message for
279 * reading from a 16x MSAA surface with compression. The new message was
280 * needed because now the MCS data is 64 bits instead of 32 or lower as is
281 * the case for 8x, 4x, and 2x. The key->msaa_16 bit-field controls which
282 * message we use. Fortunately, the 16x message works for 8x, 4x, and 2x
283 * so we can just use it unconditionally. This may not be quite as
284 * efficient but it saves us from recompiling.
285 */
286 if (devinfo->gen >= 9)
287 key->msaa_16 = ~0;
288
289 /* XXX: Handle texture swizzle on HSW- */
290 for (int i = 0; i < MAX_SAMPLERS; i++) {
291 /* Assume color sampler, no swizzling. (Works for BDW+) */
292 key->swizzles[i] = SWIZZLE_XYZW;
293 }
294 }
295
296 static void
297 populate_vs_prog_key(const struct gen_device_info *devinfo,
298 struct brw_vs_prog_key *key)
299 {
300 memset(key, 0, sizeof(*key));
301
302 populate_sampler_prog_key(devinfo, &key->tex);
303
304 /* XXX: Handle vertex input work-arounds */
305
306 /* XXX: Handle sampler_prog_key */
307 }
308
309 static void
310 populate_tcs_prog_key(const struct gen_device_info *devinfo,
311 unsigned input_vertices,
312 struct brw_tcs_prog_key *key)
313 {
314 memset(key, 0, sizeof(*key));
315
316 populate_sampler_prog_key(devinfo, &key->tex);
317
318 key->input_vertices = input_vertices;
319 }
320
321 static void
322 populate_tes_prog_key(const struct gen_device_info *devinfo,
323 struct brw_tes_prog_key *key)
324 {
325 memset(key, 0, sizeof(*key));
326
327 populate_sampler_prog_key(devinfo, &key->tex);
328 }
329
330 static void
331 populate_gs_prog_key(const struct gen_device_info *devinfo,
332 struct brw_gs_prog_key *key)
333 {
334 memset(key, 0, sizeof(*key));
335
336 populate_sampler_prog_key(devinfo, &key->tex);
337 }
338
339 static void
340 populate_wm_prog_key(const struct gen_device_info *devinfo,
341 const struct anv_subpass *subpass,
342 const VkPipelineMultisampleStateCreateInfo *ms_info,
343 struct brw_wm_prog_key *key)
344 {
345 memset(key, 0, sizeof(*key));
346
347 populate_sampler_prog_key(devinfo, &key->tex);
348
349 /* We set this to 0 here and set to the actual value before we call
350 * brw_compile_fs.
351 */
352 key->input_slots_valid = 0;
353
354 /* Vulkan doesn't specify a default */
355 key->high_quality_derivatives = false;
356
357 /* XXX Vulkan doesn't appear to specify */
358 key->clamp_fragment_color = false;
359
360 assert(subpass->color_count <= MAX_RTS);
361 for (uint32_t i = 0; i < subpass->color_count; i++) {
362 if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED)
363 key->color_outputs_valid |= (1 << i);
364 }
365
366 key->nr_color_regions = util_bitcount(key->color_outputs_valid);
367
368 key->replicate_alpha = key->nr_color_regions > 1 &&
369 ms_info && ms_info->alphaToCoverageEnable;
370
371 if (ms_info) {
372 /* We should probably pull this out of the shader, but it's fairly
373 * harmless to compute it and then let dead-code take care of it.
374 */
375 if (ms_info->rasterizationSamples > 1) {
376 key->persample_interp =
377 (ms_info->minSampleShading * ms_info->rasterizationSamples) > 1;
378 key->multisample_fbo = true;
379 }
380
381 key->frag_coord_adds_sample_pos = ms_info->sampleShadingEnable;
382 }
383 }
384
385 static void
386 populate_cs_prog_key(const struct gen_device_info *devinfo,
387 struct brw_cs_prog_key *key)
388 {
389 memset(key, 0, sizeof(*key));
390
391 populate_sampler_prog_key(devinfo, &key->tex);
392 }
393
394 struct anv_pipeline_stage {
395 gl_shader_stage stage;
396
397 const struct anv_shader_module *module;
398 const char *entrypoint;
399 const VkSpecializationInfo *spec_info;
400
401 unsigned char shader_sha1[20];
402
403 union brw_any_prog_key key;
404
405 struct {
406 gl_shader_stage stage;
407 unsigned char sha1[20];
408 } cache_key;
409
410 nir_shader *nir;
411
412 struct anv_pipeline_binding surface_to_descriptor[256];
413 struct anv_pipeline_binding sampler_to_descriptor[256];
414 struct anv_pipeline_bind_map bind_map;
415
416 union brw_any_prog_data prog_data;
417 };
418
419 static void
420 anv_pipeline_hash_shader(const struct anv_shader_module *module,
421 const char *entrypoint,
422 gl_shader_stage stage,
423 const VkSpecializationInfo *spec_info,
424 unsigned char *sha1_out)
425 {
426 struct mesa_sha1 ctx;
427 _mesa_sha1_init(&ctx);
428
429 _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
430 _mesa_sha1_update(&ctx, entrypoint, strlen(entrypoint));
431 _mesa_sha1_update(&ctx, &stage, sizeof(stage));
432 if (spec_info) {
433 _mesa_sha1_update(&ctx, spec_info->pMapEntries,
434 spec_info->mapEntryCount *
435 sizeof(*spec_info->pMapEntries));
436 _mesa_sha1_update(&ctx, spec_info->pData,
437 spec_info->dataSize);
438 }
439
440 _mesa_sha1_final(&ctx, sha1_out);
441 }
442
443 static void
444 anv_pipeline_hash_graphics(struct anv_pipeline *pipeline,
445 struct anv_pipeline_layout *layout,
446 struct anv_pipeline_stage *stages,
447 unsigned char *sha1_out)
448 {
449 struct mesa_sha1 ctx;
450 _mesa_sha1_init(&ctx);
451
452 _mesa_sha1_update(&ctx, &pipeline->subpass->view_mask,
453 sizeof(pipeline->subpass->view_mask));
454
455 if (layout)
456 _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
457
458 const bool rba = pipeline->device->robust_buffer_access;
459 _mesa_sha1_update(&ctx, &rba, sizeof(rba));
460
461 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
462 if (stages[s].entrypoint) {
463 _mesa_sha1_update(&ctx, stages[s].shader_sha1,
464 sizeof(stages[s].shader_sha1));
465 _mesa_sha1_update(&ctx, &stages[s].key, brw_prog_key_size(s));
466 }
467 }
468
469 _mesa_sha1_final(&ctx, sha1_out);
470 }
471
472 static void
473 anv_pipeline_hash_compute(struct anv_pipeline *pipeline,
474 struct anv_pipeline_layout *layout,
475 struct anv_pipeline_stage *stage,
476 unsigned char *sha1_out)
477 {
478 struct mesa_sha1 ctx;
479 _mesa_sha1_init(&ctx);
480
481 if (layout)
482 _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
483
484 const bool rba = pipeline->device->robust_buffer_access;
485 _mesa_sha1_update(&ctx, &rba, sizeof(rba));
486
487 _mesa_sha1_update(&ctx, stage->shader_sha1,
488 sizeof(stage->shader_sha1));
489 _mesa_sha1_update(&ctx, &stage->key.cs, sizeof(stage->key.cs));
490
491 _mesa_sha1_final(&ctx, sha1_out);
492 }
493
494 static nir_shader *
495 anv_pipeline_stage_get_nir(struct anv_pipeline *pipeline,
496 struct anv_pipeline_cache *cache,
497 void *mem_ctx,
498 struct anv_pipeline_stage *stage)
499 {
500 const struct brw_compiler *compiler =
501 pipeline->device->instance->physicalDevice.compiler;
502 const nir_shader_compiler_options *nir_options =
503 compiler->glsl_compiler_options[stage->stage].NirOptions;
504 nir_shader *nir;
505
506 nir = anv_device_search_for_nir(pipeline->device, cache,
507 nir_options,
508 stage->shader_sha1,
509 mem_ctx);
510 if (nir) {
511 assert(nir->info.stage == stage->stage);
512 return nir;
513 }
514
515 nir = anv_shader_compile_to_nir(pipeline->device,
516 mem_ctx,
517 stage->module,
518 stage->entrypoint,
519 stage->stage,
520 stage->spec_info);
521 if (nir) {
522 anv_device_upload_nir(pipeline->device, cache, nir, stage->shader_sha1);
523 return nir;
524 }
525
526 return NULL;
527 }
528
529 static void
530 anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
531 void *mem_ctx,
532 struct anv_pipeline_stage *stage,
533 struct anv_pipeline_layout *layout)
534 {
535 const struct brw_compiler *compiler =
536 pipeline->device->instance->physicalDevice.compiler;
537
538 struct brw_stage_prog_data *prog_data = &stage->prog_data.base;
539 nir_shader *nir = stage->nir;
540
541 if (nir->info.stage == MESA_SHADER_FRAGMENT) {
542 NIR_PASS_V(nir, nir_lower_wpos_center, pipeline->sample_shading_enable);
543 NIR_PASS_V(nir, anv_nir_lower_input_attachments);
544 }
545
546 NIR_PASS_V(nir, anv_nir_lower_ycbcr_textures, layout);
547
548 NIR_PASS_V(nir, anv_nir_lower_push_constants);
549
550 if (nir->info.stage != MESA_SHADER_COMPUTE)
551 NIR_PASS_V(nir, anv_nir_lower_multiview, pipeline->subpass->view_mask);
552
553 if (nir->info.stage == MESA_SHADER_COMPUTE)
554 prog_data->total_shared = nir->num_shared;
555
556 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
557
558 if (nir->num_uniforms > 0) {
559 assert(prog_data->nr_params == 0);
560
561 /* If the shader uses any push constants at all, we'll just give
562 * them the maximum possible number
563 */
564 assert(nir->num_uniforms <= MAX_PUSH_CONSTANTS_SIZE);
565 nir->num_uniforms = MAX_PUSH_CONSTANTS_SIZE;
566 prog_data->nr_params += MAX_PUSH_CONSTANTS_SIZE / sizeof(float);
567 prog_data->param = ralloc_array(mem_ctx, uint32_t, prog_data->nr_params);
568
569 /* We now set the param values to be offsets into a
570 * anv_push_constant_data structure. Since the compiler doesn't
571 * actually dereference any of the gl_constant_value pointers in the
572 * params array, it doesn't really matter what we put here.
573 */
574 struct anv_push_constants *null_data = NULL;
575 /* Fill out the push constants section of the param array */
576 for (unsigned i = 0; i < MAX_PUSH_CONSTANTS_SIZE / sizeof(float); i++) {
577 prog_data->param[i] = ANV_PARAM_PUSH(
578 (uintptr_t)&null_data->client_data[i * sizeof(float)]);
579 }
580 }
581
582 if (nir->info.num_ssbos > 0 || nir->info.num_images > 0)
583 pipeline->needs_data_cache = true;
584
585 NIR_PASS_V(nir, brw_nir_lower_image_load_store, compiler->devinfo);
586
587 /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
588 if (layout) {
589 anv_nir_apply_pipeline_layout(&pipeline->device->instance->physicalDevice,
590 pipeline->device->robust_buffer_access,
591 layout, nir, prog_data,
592 &stage->bind_map);
593 NIR_PASS_V(nir, nir_opt_constant_folding);
594 }
595
596 if (nir->info.stage != MESA_SHADER_COMPUTE)
597 brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
598
599 assert(nir->num_uniforms == prog_data->nr_params * 4);
600
601 stage->nir = nir;
602 }
603
604 static void
605 anv_fill_binding_table(struct brw_stage_prog_data *prog_data, unsigned bias)
606 {
607 prog_data->binding_table.size_bytes = 0;
608 prog_data->binding_table.texture_start = bias;
609 prog_data->binding_table.gather_texture_start = bias;
610 prog_data->binding_table.ubo_start = bias;
611 prog_data->binding_table.ssbo_start = bias;
612 prog_data->binding_table.image_start = bias;
613 }
614
615 static void
616 anv_pipeline_link_vs(const struct brw_compiler *compiler,
617 struct anv_pipeline_stage *vs_stage,
618 struct anv_pipeline_stage *next_stage)
619 {
620 anv_fill_binding_table(&vs_stage->prog_data.vs.base.base, 0);
621
622 if (next_stage)
623 brw_nir_link_shaders(compiler, &vs_stage->nir, &next_stage->nir);
624 }
625
626 static const unsigned *
627 anv_pipeline_compile_vs(const struct brw_compiler *compiler,
628 void *mem_ctx,
629 struct anv_pipeline_stage *vs_stage)
630 {
631 brw_compute_vue_map(compiler->devinfo,
632 &vs_stage->prog_data.vs.base.vue_map,
633 vs_stage->nir->info.outputs_written,
634 vs_stage->nir->info.separate_shader);
635
636 return brw_compile_vs(compiler, NULL, mem_ctx, &vs_stage->key.vs,
637 &vs_stage->prog_data.vs, vs_stage->nir, -1, NULL);
638 }
639
640 static void
641 merge_tess_info(struct shader_info *tes_info,
642 const struct shader_info *tcs_info)
643 {
644 /* The Vulkan 1.0.38 spec, section 21.1 Tessellator says:
645 *
646 * "PointMode. Controls generation of points rather than triangles
647 * or lines. This functionality defaults to disabled, and is
648 * enabled if either shader stage includes the execution mode.
649 *
650 * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw,
651 * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd,
652 * and OutputVertices, it says:
653 *
654 * "One mode must be set in at least one of the tessellation
655 * shader stages."
656 *
657 * So, the fields can be set in either the TCS or TES, but they must
658 * agree if set in both. Our backend looks at TES, so bitwise-or in
659 * the values from the TCS.
660 */
661 assert(tcs_info->tess.tcs_vertices_out == 0 ||
662 tes_info->tess.tcs_vertices_out == 0 ||
663 tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out);
664 tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out;
665
666 assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
667 tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
668 tcs_info->tess.spacing == tes_info->tess.spacing);
669 tes_info->tess.spacing |= tcs_info->tess.spacing;
670
671 assert(tcs_info->tess.primitive_mode == 0 ||
672 tes_info->tess.primitive_mode == 0 ||
673 tcs_info->tess.primitive_mode == tes_info->tess.primitive_mode);
674 tes_info->tess.primitive_mode |= tcs_info->tess.primitive_mode;
675 tes_info->tess.ccw |= tcs_info->tess.ccw;
676 tes_info->tess.point_mode |= tcs_info->tess.point_mode;
677 }
678
679 static void
680 anv_pipeline_link_tcs(const struct brw_compiler *compiler,
681 struct anv_pipeline_stage *tcs_stage,
682 struct anv_pipeline_stage *tes_stage)
683 {
684 assert(tes_stage && tes_stage->stage == MESA_SHADER_TESS_EVAL);
685
686 anv_fill_binding_table(&tcs_stage->prog_data.tcs.base.base, 0);
687
688 brw_nir_link_shaders(compiler, &tcs_stage->nir, &tes_stage->nir);
689
690 nir_lower_patch_vertices(tes_stage->nir,
691 tcs_stage->nir->info.tess.tcs_vertices_out,
692 NULL);
693
694 /* Copy TCS info into the TES info */
695 merge_tess_info(&tes_stage->nir->info, &tcs_stage->nir->info);
696
697 anv_fill_binding_table(&tcs_stage->prog_data.tcs.base.base, 0);
698 anv_fill_binding_table(&tes_stage->prog_data.tes.base.base, 0);
699
700 /* Whacking the key after cache lookup is a bit sketchy, but all of
701 * this comes from the SPIR-V, which is part of the hash used for the
702 * pipeline cache. So it should be safe.
703 */
704 tcs_stage->key.tcs.tes_primitive_mode =
705 tes_stage->nir->info.tess.primitive_mode;
706 tcs_stage->key.tcs.quads_workaround =
707 compiler->devinfo->gen < 9 &&
708 tes_stage->nir->info.tess.primitive_mode == 7 /* GL_QUADS */ &&
709 tes_stage->nir->info.tess.spacing == TESS_SPACING_EQUAL;
710 }
711
712 static const unsigned *
713 anv_pipeline_compile_tcs(const struct brw_compiler *compiler,
714 void *mem_ctx,
715 struct anv_pipeline_stage *tcs_stage,
716 struct anv_pipeline_stage *prev_stage)
717 {
718 tcs_stage->key.tcs.outputs_written =
719 tcs_stage->nir->info.outputs_written;
720 tcs_stage->key.tcs.patch_outputs_written =
721 tcs_stage->nir->info.patch_outputs_written;
722
723 return brw_compile_tcs(compiler, NULL, mem_ctx, &tcs_stage->key.tcs,
724 &tcs_stage->prog_data.tcs, tcs_stage->nir,
725 -1, NULL);
726 }
727
728 static void
729 anv_pipeline_link_tes(const struct brw_compiler *compiler,
730 struct anv_pipeline_stage *tes_stage,
731 struct anv_pipeline_stage *next_stage)
732 {
733 anv_fill_binding_table(&tes_stage->prog_data.tes.base.base, 0);
734
735 if (next_stage)
736 brw_nir_link_shaders(compiler, &tes_stage->nir, &next_stage->nir);
737 }
738
739 static const unsigned *
740 anv_pipeline_compile_tes(const struct brw_compiler *compiler,
741 void *mem_ctx,
742 struct anv_pipeline_stage *tes_stage,
743 struct anv_pipeline_stage *tcs_stage)
744 {
745 tes_stage->key.tes.inputs_read =
746 tcs_stage->nir->info.outputs_written;
747 tes_stage->key.tes.patch_inputs_read =
748 tcs_stage->nir->info.patch_outputs_written;
749
750 return brw_compile_tes(compiler, NULL, mem_ctx, &tes_stage->key.tes,
751 &tcs_stage->prog_data.tcs.base.vue_map,
752 &tes_stage->prog_data.tes, tes_stage->nir,
753 NULL, -1, NULL);
754 }
755
756 static void
757 anv_pipeline_link_gs(const struct brw_compiler *compiler,
758 struct anv_pipeline_stage *gs_stage,
759 struct anv_pipeline_stage *next_stage)
760 {
761 anv_fill_binding_table(&gs_stage->prog_data.gs.base.base, 0);
762
763 if (next_stage)
764 brw_nir_link_shaders(compiler, &gs_stage->nir, &next_stage->nir);
765 }
766
767 static const unsigned *
768 anv_pipeline_compile_gs(const struct brw_compiler *compiler,
769 void *mem_ctx,
770 struct anv_pipeline_stage *gs_stage,
771 struct anv_pipeline_stage *prev_stage)
772 {
773 brw_compute_vue_map(compiler->devinfo,
774 &gs_stage->prog_data.gs.base.vue_map,
775 gs_stage->nir->info.outputs_written,
776 gs_stage->nir->info.separate_shader);
777
778 return brw_compile_gs(compiler, NULL, mem_ctx, &gs_stage->key.gs,
779 &gs_stage->prog_data.gs, gs_stage->nir,
780 NULL, -1, NULL);
781 }
782
783 static void
784 anv_pipeline_link_fs(const struct brw_compiler *compiler,
785 struct anv_pipeline_stage *stage)
786 {
787 unsigned num_rts = 0;
788 const int max_rt = FRAG_RESULT_DATA7 - FRAG_RESULT_DATA0 + 1;
789 struct anv_pipeline_binding rt_bindings[max_rt];
790 nir_function_impl *impl = nir_shader_get_entrypoint(stage->nir);
791 int rt_to_bindings[max_rt];
792 memset(rt_to_bindings, -1, sizeof(rt_to_bindings));
793 bool rt_used[max_rt];
794 memset(rt_used, 0, sizeof(rt_used));
795
796 /* Flag used render targets */
797 nir_foreach_variable_safe(var, &stage->nir->outputs) {
798 if (var->data.location < FRAG_RESULT_DATA0)
799 continue;
800
801 const unsigned rt = var->data.location - FRAG_RESULT_DATA0;
802 /* Unused or out-of-bounds */
803 if (rt >= MAX_RTS || !(stage->key.wm.color_outputs_valid & (1 << rt)))
804 continue;
805
806 const unsigned array_len =
807 glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1;
808 assert(rt + array_len <= max_rt);
809
810 for (unsigned i = 0; i < array_len; i++)
811 rt_used[rt + i] = true;
812 }
813
814 /* Set new, compacted, location */
815 for (unsigned i = 0; i < max_rt; i++) {
816 if (!rt_used[i])
817 continue;
818
819 rt_to_bindings[i] = num_rts;
820 rt_bindings[rt_to_bindings[i]] = (struct anv_pipeline_binding) {
821 .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
822 .binding = 0,
823 .index = i,
824 };
825 num_rts++;
826 }
827
828 bool deleted_output = false;
829 nir_foreach_variable_safe(var, &stage->nir->outputs) {
830 if (var->data.location < FRAG_RESULT_DATA0)
831 continue;
832
833 const unsigned rt = var->data.location - FRAG_RESULT_DATA0;
834 if (rt >= MAX_RTS ||
835 !(stage->key.wm.color_outputs_valid & (1 << rt))) {
836 /* Unused or out-of-bounds, throw it away */
837 deleted_output = true;
838 var->data.mode = nir_var_function_temp;
839 exec_node_remove(&var->node);
840 exec_list_push_tail(&impl->locals, &var->node);
841 continue;
842 }
843
844 /* Give it the new location */
845 assert(rt_to_bindings[rt] != -1);
846 var->data.location = rt_to_bindings[rt] + FRAG_RESULT_DATA0;
847 }
848
849 if (deleted_output)
850 nir_fixup_deref_modes(stage->nir);
851
852 if (num_rts == 0) {
853 /* If we have no render targets, we need a null render target */
854 rt_bindings[0] = (struct anv_pipeline_binding) {
855 .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
856 .binding = 0,
857 .index = UINT32_MAX,
858 };
859 num_rts = 1;
860 }
861
862 /* Now that we've determined the actual number of render targets, adjust
863 * the key accordingly.
864 */
865 stage->key.wm.nr_color_regions = num_rts;
866 stage->key.wm.color_outputs_valid = (1 << num_rts) - 1;
867
868 assert(num_rts <= max_rt);
869 assert(stage->bind_map.surface_count == 0);
870 typed_memcpy(stage->bind_map.surface_to_descriptor,
871 rt_bindings, num_rts);
872 stage->bind_map.surface_count += num_rts;
873
874 anv_fill_binding_table(&stage->prog_data.wm.base, 0);
875 }
876
877 static const unsigned *
878 anv_pipeline_compile_fs(const struct brw_compiler *compiler,
879 void *mem_ctx,
880 struct anv_pipeline_stage *fs_stage,
881 struct anv_pipeline_stage *prev_stage)
882 {
883 /* TODO: we could set this to 0 based on the information in nir_shader, but
884 * we need this before we call spirv_to_nir.
885 */
886 assert(prev_stage);
887 fs_stage->key.wm.input_slots_valid =
888 prev_stage->prog_data.vue.vue_map.slots_valid;
889
890 const unsigned *code =
891 brw_compile_fs(compiler, NULL, mem_ctx, &fs_stage->key.wm,
892 &fs_stage->prog_data.wm, fs_stage->nir,
893 NULL, -1, -1, -1, true, false, NULL, NULL);
894
895 if (fs_stage->key.wm.nr_color_regions == 0 &&
896 !fs_stage->prog_data.wm.has_side_effects &&
897 !fs_stage->prog_data.wm.uses_kill &&
898 fs_stage->prog_data.wm.computed_depth_mode == BRW_PSCDEPTH_OFF &&
899 !fs_stage->prog_data.wm.computed_stencil) {
900 /* This fragment shader has no outputs and no side effects. Go ahead
901 * and return the code pointer so we don't accidentally think the
902 * compile failed but zero out prog_data which will set program_size to
903 * zero and disable the stage.
904 */
905 memset(&fs_stage->prog_data, 0, sizeof(fs_stage->prog_data));
906 }
907
908 return code;
909 }
910
911 static VkResult
912 anv_pipeline_compile_graphics(struct anv_pipeline *pipeline,
913 struct anv_pipeline_cache *cache,
914 const VkGraphicsPipelineCreateInfo *info)
915 {
916 const struct brw_compiler *compiler =
917 pipeline->device->instance->physicalDevice.compiler;
918 struct anv_pipeline_stage stages[MESA_SHADER_STAGES] = {};
919
920 pipeline->active_stages = 0;
921
922 VkResult result;
923 for (uint32_t i = 0; i < info->stageCount; i++) {
924 const VkPipelineShaderStageCreateInfo *sinfo = &info->pStages[i];
925 gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
926
927 pipeline->active_stages |= sinfo->stage;
928
929 stages[stage].stage = stage;
930 stages[stage].module = anv_shader_module_from_handle(sinfo->module);
931 stages[stage].entrypoint = sinfo->pName;
932 stages[stage].spec_info = sinfo->pSpecializationInfo;
933 anv_pipeline_hash_shader(stages[stage].module,
934 stages[stage].entrypoint,
935 stage,
936 stages[stage].spec_info,
937 stages[stage].shader_sha1);
938
939 const struct gen_device_info *devinfo = &pipeline->device->info;
940 switch (stage) {
941 case MESA_SHADER_VERTEX:
942 populate_vs_prog_key(devinfo, &stages[stage].key.vs);
943 break;
944 case MESA_SHADER_TESS_CTRL:
945 populate_tcs_prog_key(devinfo,
946 info->pTessellationState->patchControlPoints,
947 &stages[stage].key.tcs);
948 break;
949 case MESA_SHADER_TESS_EVAL:
950 populate_tes_prog_key(devinfo, &stages[stage].key.tes);
951 break;
952 case MESA_SHADER_GEOMETRY:
953 populate_gs_prog_key(devinfo, &stages[stage].key.gs);
954 break;
955 case MESA_SHADER_FRAGMENT:
956 populate_wm_prog_key(devinfo, pipeline->subpass,
957 info->pMultisampleState,
958 &stages[stage].key.wm);
959 break;
960 default:
961 unreachable("Invalid graphics shader stage");
962 }
963 }
964
965 if (pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
966 pipeline->active_stages |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
967
968 assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT);
969
970 ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
971
972 unsigned char sha1[20];
973 anv_pipeline_hash_graphics(pipeline, layout, stages, sha1);
974
975 unsigned found = 0;
976 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
977 if (!stages[s].entrypoint)
978 continue;
979
980 stages[s].cache_key.stage = s;
981 memcpy(stages[s].cache_key.sha1, sha1, sizeof(sha1));
982
983 struct anv_shader_bin *bin =
984 anv_device_search_for_kernel(pipeline->device, cache,
985 &stages[s].cache_key,
986 sizeof(stages[s].cache_key));
987 if (bin) {
988 found++;
989 pipeline->shaders[s] = bin;
990 }
991 }
992
993 if (found == __builtin_popcount(pipeline->active_stages)) {
994 /* We found all our shaders in the cache. We're done. */
995 goto done;
996 } else if (found > 0) {
997 /* We found some but not all of our shaders. This shouldn't happen
998 * most of the time but it can if we have a partially populated
999 * pipeline cache.
1000 */
1001 assert(found < __builtin_popcount(pipeline->active_stages));
1002
1003 vk_debug_report(&pipeline->device->instance->debug_report_callbacks,
1004 VK_DEBUG_REPORT_WARNING_BIT_EXT |
1005 VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT,
1006 VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_CACHE_EXT,
1007 (uint64_t)(uintptr_t)cache,
1008 0, 0, "anv",
1009 "Found a partial pipeline in the cache. This is "
1010 "most likely caused by an incomplete pipeline cache "
1011 "import or export");
1012
1013 /* We're going to have to recompile anyway, so just throw away our
1014 * references to the shaders in the cache. We'll get them out of the
1015 * cache again as part of the compilation process.
1016 */
1017 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
1018 if (pipeline->shaders[s]) {
1019 anv_shader_bin_unref(pipeline->device, pipeline->shaders[s]);
1020 pipeline->shaders[s] = NULL;
1021 }
1022 }
1023 }
1024
1025 void *pipeline_ctx = ralloc_context(NULL);
1026
1027 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
1028 if (!stages[s].entrypoint)
1029 continue;
1030
1031 assert(stages[s].stage == s);
1032 assert(pipeline->shaders[s] == NULL);
1033
1034 stages[s].bind_map = (struct anv_pipeline_bind_map) {
1035 .surface_to_descriptor = stages[s].surface_to_descriptor,
1036 .sampler_to_descriptor = stages[s].sampler_to_descriptor
1037 };
1038
1039 stages[s].nir = anv_pipeline_stage_get_nir(pipeline, cache,
1040 pipeline_ctx,
1041 &stages[s]);
1042 if (stages[s].nir == NULL) {
1043 result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1044 goto fail;
1045 }
1046 }
1047
1048 /* Walk backwards to link */
1049 struct anv_pipeline_stage *next_stage = NULL;
1050 for (int s = MESA_SHADER_STAGES - 1; s >= 0; s--) {
1051 if (!stages[s].entrypoint)
1052 continue;
1053
1054 switch (s) {
1055 case MESA_SHADER_VERTEX:
1056 anv_pipeline_link_vs(compiler, &stages[s], next_stage);
1057 break;
1058 case MESA_SHADER_TESS_CTRL:
1059 anv_pipeline_link_tcs(compiler, &stages[s], next_stage);
1060 break;
1061 case MESA_SHADER_TESS_EVAL:
1062 anv_pipeline_link_tes(compiler, &stages[s], next_stage);
1063 break;
1064 case MESA_SHADER_GEOMETRY:
1065 anv_pipeline_link_gs(compiler, &stages[s], next_stage);
1066 break;
1067 case MESA_SHADER_FRAGMENT:
1068 anv_pipeline_link_fs(compiler, &stages[s]);
1069 break;
1070 default:
1071 unreachable("Invalid graphics shader stage");
1072 }
1073
1074 next_stage = &stages[s];
1075 }
1076
1077 struct anv_pipeline_stage *prev_stage = NULL;
1078 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
1079 if (!stages[s].entrypoint)
1080 continue;
1081
1082 void *stage_ctx = ralloc_context(NULL);
1083
1084 anv_pipeline_lower_nir(pipeline, stage_ctx, &stages[s], layout);
1085
1086 const unsigned *code;
1087 switch (s) {
1088 case MESA_SHADER_VERTEX:
1089 code = anv_pipeline_compile_vs(compiler, stage_ctx, &stages[s]);
1090 break;
1091 case MESA_SHADER_TESS_CTRL:
1092 code = anv_pipeline_compile_tcs(compiler, stage_ctx,
1093 &stages[s], prev_stage);
1094 break;
1095 case MESA_SHADER_TESS_EVAL:
1096 code = anv_pipeline_compile_tes(compiler, stage_ctx,
1097 &stages[s], prev_stage);
1098 break;
1099 case MESA_SHADER_GEOMETRY:
1100 code = anv_pipeline_compile_gs(compiler, stage_ctx,
1101 &stages[s], prev_stage);
1102 break;
1103 case MESA_SHADER_FRAGMENT:
1104 code = anv_pipeline_compile_fs(compiler, stage_ctx,
1105 &stages[s], prev_stage);
1106 break;
1107 default:
1108 unreachable("Invalid graphics shader stage");
1109 }
1110 if (code == NULL) {
1111 ralloc_free(stage_ctx);
1112 result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1113 goto fail;
1114 }
1115
1116 struct anv_shader_bin *bin =
1117 anv_device_upload_kernel(pipeline->device, cache,
1118 &stages[s].cache_key,
1119 sizeof(stages[s].cache_key),
1120 code, stages[s].prog_data.base.program_size,
1121 stages[s].nir->constant_data,
1122 stages[s].nir->constant_data_size,
1123 &stages[s].prog_data.base,
1124 brw_prog_data_size(s),
1125 &stages[s].bind_map);
1126 if (!bin) {
1127 ralloc_free(stage_ctx);
1128 result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1129 goto fail;
1130 }
1131
1132 pipeline->shaders[s] = bin;
1133 ralloc_free(stage_ctx);
1134
1135 prev_stage = &stages[s];
1136 }
1137
1138 ralloc_free(pipeline_ctx);
1139
1140 done:
1141
1142 if (pipeline->shaders[MESA_SHADER_FRAGMENT] &&
1143 pipeline->shaders[MESA_SHADER_FRAGMENT]->prog_data->program_size == 0) {
1144 /* This can happen if we decided to implicitly disable the fragment
1145 * shader. See anv_pipeline_compile_fs().
1146 */
1147 anv_shader_bin_unref(pipeline->device,
1148 pipeline->shaders[MESA_SHADER_FRAGMENT]);
1149 pipeline->shaders[MESA_SHADER_FRAGMENT] = NULL;
1150 pipeline->active_stages &= ~VK_SHADER_STAGE_FRAGMENT_BIT;
1151 }
1152
1153 return VK_SUCCESS;
1154
1155 fail:
1156 ralloc_free(pipeline_ctx);
1157
1158 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
1159 if (pipeline->shaders[s])
1160 anv_shader_bin_unref(pipeline->device, pipeline->shaders[s]);
1161 }
1162
1163 return result;
1164 }
1165
1166 VkResult
1167 anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
1168 struct anv_pipeline_cache *cache,
1169 const VkComputePipelineCreateInfo *info,
1170 const struct anv_shader_module *module,
1171 const char *entrypoint,
1172 const VkSpecializationInfo *spec_info)
1173 {
1174 const struct brw_compiler *compiler =
1175 pipeline->device->instance->physicalDevice.compiler;
1176
1177 struct anv_pipeline_stage stage = {
1178 .stage = MESA_SHADER_COMPUTE,
1179 .module = module,
1180 .entrypoint = entrypoint,
1181 .spec_info = spec_info,
1182 .cache_key = {
1183 .stage = MESA_SHADER_COMPUTE,
1184 }
1185 };
1186 anv_pipeline_hash_shader(stage.module,
1187 stage.entrypoint,
1188 MESA_SHADER_COMPUTE,
1189 stage.spec_info,
1190 stage.shader_sha1);
1191
1192 struct anv_shader_bin *bin = NULL;
1193
1194 populate_cs_prog_key(&pipeline->device->info, &stage.key.cs);
1195
1196 ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
1197
1198 anv_pipeline_hash_compute(pipeline, layout, &stage, stage.cache_key.sha1);
1199 bin = anv_device_search_for_kernel(pipeline->device, cache, &stage.cache_key,
1200 sizeof(stage.cache_key));
1201
1202 if (bin == NULL) {
1203 stage.bind_map = (struct anv_pipeline_bind_map) {
1204 .surface_to_descriptor = stage.surface_to_descriptor,
1205 .sampler_to_descriptor = stage.sampler_to_descriptor
1206 };
1207
1208 void *mem_ctx = ralloc_context(NULL);
1209
1210 stage.nir = anv_pipeline_stage_get_nir(pipeline, cache, mem_ctx, &stage);
1211 if (stage.nir == NULL) {
1212 ralloc_free(mem_ctx);
1213 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1214 }
1215
1216 anv_pipeline_lower_nir(pipeline, mem_ctx, &stage, layout);
1217
1218 NIR_PASS_V(stage.nir, anv_nir_add_base_work_group_id,
1219 &stage.prog_data.cs);
1220
1221 anv_fill_binding_table(&stage.prog_data.cs.base, 1);
1222
1223 const unsigned *shader_code =
1224 brw_compile_cs(compiler, NULL, mem_ctx, &stage.key.cs,
1225 &stage.prog_data.cs, stage.nir, -1, NULL);
1226 if (shader_code == NULL) {
1227 ralloc_free(mem_ctx);
1228 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1229 }
1230
1231 const unsigned code_size = stage.prog_data.base.program_size;
1232 bin = anv_device_upload_kernel(pipeline->device, cache,
1233 &stage.cache_key, sizeof(stage.cache_key),
1234 shader_code, code_size,
1235 stage.nir->constant_data,
1236 stage.nir->constant_data_size,
1237 &stage.prog_data.base,
1238 sizeof(stage.prog_data.cs),
1239 &stage.bind_map);
1240 if (!bin) {
1241 ralloc_free(mem_ctx);
1242 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1243 }
1244
1245 ralloc_free(mem_ctx);
1246 }
1247
1248 pipeline->active_stages = VK_SHADER_STAGE_COMPUTE_BIT;
1249 pipeline->shaders[MESA_SHADER_COMPUTE] = bin;
1250
1251 return VK_SUCCESS;
1252 }
1253
1254 /**
1255 * Copy pipeline state not marked as dynamic.
1256 * Dynamic state is pipeline state which hasn't been provided at pipeline
1257 * creation time, but is dynamically provided afterwards using various
1258 * vkCmdSet* functions.
1259 *
1260 * The set of state considered "non_dynamic" is determined by the pieces of
1261 * state that have their corresponding VkDynamicState enums omitted from
1262 * VkPipelineDynamicStateCreateInfo::pDynamicStates.
1263 *
1264 * @param[out] pipeline Destination non_dynamic state.
1265 * @param[in] pCreateInfo Source of non_dynamic state to be copied.
1266 */
1267 static void
1268 copy_non_dynamic_state(struct anv_pipeline *pipeline,
1269 const VkGraphicsPipelineCreateInfo *pCreateInfo)
1270 {
1271 anv_cmd_dirty_mask_t states = ANV_CMD_DIRTY_DYNAMIC_ALL;
1272 struct anv_subpass *subpass = pipeline->subpass;
1273
1274 pipeline->dynamic_state = default_dynamic_state;
1275
1276 if (pCreateInfo->pDynamicState) {
1277 /* Remove all of the states that are marked as dynamic */
1278 uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount;
1279 for (uint32_t s = 0; s < count; s++)
1280 states &= ~(1 << pCreateInfo->pDynamicState->pDynamicStates[s]);
1281 }
1282
1283 struct anv_dynamic_state *dynamic = &pipeline->dynamic_state;
1284
1285 /* Section 9.2 of the Vulkan 1.0.15 spec says:
1286 *
1287 * pViewportState is [...] NULL if the pipeline
1288 * has rasterization disabled.
1289 */
1290 if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable) {
1291 assert(pCreateInfo->pViewportState);
1292
1293 dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount;
1294 if (states & (1 << VK_DYNAMIC_STATE_VIEWPORT)) {
1295 typed_memcpy(dynamic->viewport.viewports,
1296 pCreateInfo->pViewportState->pViewports,
1297 pCreateInfo->pViewportState->viewportCount);
1298 }
1299
1300 dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount;
1301 if (states & (1 << VK_DYNAMIC_STATE_SCISSOR)) {
1302 typed_memcpy(dynamic->scissor.scissors,
1303 pCreateInfo->pViewportState->pScissors,
1304 pCreateInfo->pViewportState->scissorCount);
1305 }
1306 }
1307
1308 if (states & (1 << VK_DYNAMIC_STATE_LINE_WIDTH)) {
1309 assert(pCreateInfo->pRasterizationState);
1310 dynamic->line_width = pCreateInfo->pRasterizationState->lineWidth;
1311 }
1312
1313 if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS)) {
1314 assert(pCreateInfo->pRasterizationState);
1315 dynamic->depth_bias.bias =
1316 pCreateInfo->pRasterizationState->depthBiasConstantFactor;
1317 dynamic->depth_bias.clamp =
1318 pCreateInfo->pRasterizationState->depthBiasClamp;
1319 dynamic->depth_bias.slope =
1320 pCreateInfo->pRasterizationState->depthBiasSlopeFactor;
1321 }
1322
1323 /* Section 9.2 of the Vulkan 1.0.15 spec says:
1324 *
1325 * pColorBlendState is [...] NULL if the pipeline has rasterization
1326 * disabled or if the subpass of the render pass the pipeline is
1327 * created against does not use any color attachments.
1328 */
1329 bool uses_color_att = false;
1330 for (unsigned i = 0; i < subpass->color_count; ++i) {
1331 if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED) {
1332 uses_color_att = true;
1333 break;
1334 }
1335 }
1336
1337 if (uses_color_att &&
1338 !pCreateInfo->pRasterizationState->rasterizerDiscardEnable) {
1339 assert(pCreateInfo->pColorBlendState);
1340
1341 if (states & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS))
1342 typed_memcpy(dynamic->blend_constants,
1343 pCreateInfo->pColorBlendState->blendConstants, 4);
1344 }
1345
1346 /* If there is no depthstencil attachment, then don't read
1347 * pDepthStencilState. The Vulkan spec states that pDepthStencilState may
1348 * be NULL in this case. Even if pDepthStencilState is non-NULL, there is
1349 * no need to override the depthstencil defaults in
1350 * anv_pipeline::dynamic_state when there is no depthstencil attachment.
1351 *
1352 * Section 9.2 of the Vulkan 1.0.15 spec says:
1353 *
1354 * pDepthStencilState is [...] NULL if the pipeline has rasterization
1355 * disabled or if the subpass of the render pass the pipeline is created
1356 * against does not use a depth/stencil attachment.
1357 */
1358 if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&
1359 subpass->depth_stencil_attachment) {
1360 assert(pCreateInfo->pDepthStencilState);
1361
1362 if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BOUNDS)) {
1363 dynamic->depth_bounds.min =
1364 pCreateInfo->pDepthStencilState->minDepthBounds;
1365 dynamic->depth_bounds.max =
1366 pCreateInfo->pDepthStencilState->maxDepthBounds;
1367 }
1368
1369 if (states & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) {
1370 dynamic->stencil_compare_mask.front =
1371 pCreateInfo->pDepthStencilState->front.compareMask;
1372 dynamic->stencil_compare_mask.back =
1373 pCreateInfo->pDepthStencilState->back.compareMask;
1374 }
1375
1376 if (states & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) {
1377 dynamic->stencil_write_mask.front =
1378 pCreateInfo->pDepthStencilState->front.writeMask;
1379 dynamic->stencil_write_mask.back =
1380 pCreateInfo->pDepthStencilState->back.writeMask;
1381 }
1382
1383 if (states & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) {
1384 dynamic->stencil_reference.front =
1385 pCreateInfo->pDepthStencilState->front.reference;
1386 dynamic->stencil_reference.back =
1387 pCreateInfo->pDepthStencilState->back.reference;
1388 }
1389 }
1390
1391 pipeline->dynamic_state_mask = states;
1392 }
1393
1394 static void
1395 anv_pipeline_validate_create_info(const VkGraphicsPipelineCreateInfo *info)
1396 {
1397 #ifdef DEBUG
1398 struct anv_render_pass *renderpass = NULL;
1399 struct anv_subpass *subpass = NULL;
1400
1401 /* Assert that all required members of VkGraphicsPipelineCreateInfo are
1402 * present. See the Vulkan 1.0.28 spec, Section 9.2 Graphics Pipelines.
1403 */
1404 assert(info->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
1405
1406 renderpass = anv_render_pass_from_handle(info->renderPass);
1407 assert(renderpass);
1408
1409 assert(info->subpass < renderpass->subpass_count);
1410 subpass = &renderpass->subpasses[info->subpass];
1411
1412 assert(info->stageCount >= 1);
1413 assert(info->pVertexInputState);
1414 assert(info->pInputAssemblyState);
1415 assert(info->pRasterizationState);
1416 if (!info->pRasterizationState->rasterizerDiscardEnable) {
1417 assert(info->pViewportState);
1418 assert(info->pMultisampleState);
1419
1420 if (subpass && subpass->depth_stencil_attachment)
1421 assert(info->pDepthStencilState);
1422
1423 if (subpass && subpass->color_count > 0) {
1424 bool all_color_unused = true;
1425 for (int i = 0; i < subpass->color_count; i++) {
1426 if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED)
1427 all_color_unused = false;
1428 }
1429 /* pColorBlendState is ignored if the pipeline has rasterization
1430 * disabled or if the subpass of the render pass the pipeline is
1431 * created against does not use any color attachments.
1432 */
1433 assert(info->pColorBlendState || all_color_unused);
1434 }
1435 }
1436
1437 for (uint32_t i = 0; i < info->stageCount; ++i) {
1438 switch (info->pStages[i].stage) {
1439 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
1440 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
1441 assert(info->pTessellationState);
1442 break;
1443 default:
1444 break;
1445 }
1446 }
1447 #endif
1448 }
1449
1450 /**
1451 * Calculate the desired L3 partitioning based on the current state of the
1452 * pipeline. For now this simply returns the conservative defaults calculated
1453 * by get_default_l3_weights(), but we could probably do better by gathering
1454 * more statistics from the pipeline state (e.g. guess of expected URB usage
1455 * and bound surfaces), or by using feed-back from performance counters.
1456 */
1457 void
1458 anv_pipeline_setup_l3_config(struct anv_pipeline *pipeline, bool needs_slm)
1459 {
1460 const struct gen_device_info *devinfo = &pipeline->device->info;
1461
1462 const struct gen_l3_weights w =
1463 gen_get_default_l3_weights(devinfo, pipeline->needs_data_cache, needs_slm);
1464
1465 pipeline->urb.l3_config = gen_get_l3_config(devinfo, w);
1466 pipeline->urb.total_size =
1467 gen_get_l3_config_urb_size(devinfo, pipeline->urb.l3_config);
1468 }
1469
1470 VkResult
1471 anv_pipeline_init(struct anv_pipeline *pipeline,
1472 struct anv_device *device,
1473 struct anv_pipeline_cache *cache,
1474 const VkGraphicsPipelineCreateInfo *pCreateInfo,
1475 const VkAllocationCallbacks *alloc)
1476 {
1477 VkResult result;
1478
1479 anv_pipeline_validate_create_info(pCreateInfo);
1480
1481 if (alloc == NULL)
1482 alloc = &device->alloc;
1483
1484 pipeline->device = device;
1485
1486 ANV_FROM_HANDLE(anv_render_pass, render_pass, pCreateInfo->renderPass);
1487 assert(pCreateInfo->subpass < render_pass->subpass_count);
1488 pipeline->subpass = &render_pass->subpasses[pCreateInfo->subpass];
1489
1490 result = anv_reloc_list_init(&pipeline->batch_relocs, alloc);
1491 if (result != VK_SUCCESS)
1492 return result;
1493
1494 pipeline->batch.alloc = alloc;
1495 pipeline->batch.next = pipeline->batch.start = pipeline->batch_data;
1496 pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data);
1497 pipeline->batch.relocs = &pipeline->batch_relocs;
1498 pipeline->batch.status = VK_SUCCESS;
1499
1500 copy_non_dynamic_state(pipeline, pCreateInfo);
1501 pipeline->depth_clamp_enable = pCreateInfo->pRasterizationState &&
1502 pCreateInfo->pRasterizationState->depthClampEnable;
1503
1504 pipeline->sample_shading_enable = pCreateInfo->pMultisampleState &&
1505 pCreateInfo->pMultisampleState->sampleShadingEnable;
1506
1507 pipeline->needs_data_cache = false;
1508
1509 /* When we free the pipeline, we detect stages based on the NULL status
1510 * of various prog_data pointers. Make them NULL by default.
1511 */
1512 memset(pipeline->shaders, 0, sizeof(pipeline->shaders));
1513
1514 result = anv_pipeline_compile_graphics(pipeline, cache, pCreateInfo);
1515 if (result != VK_SUCCESS) {
1516 anv_reloc_list_finish(&pipeline->batch_relocs, alloc);
1517 return result;
1518 }
1519
1520 assert(pipeline->shaders[MESA_SHADER_VERTEX]);
1521
1522 anv_pipeline_setup_l3_config(pipeline, false);
1523
1524 const VkPipelineVertexInputStateCreateInfo *vi_info =
1525 pCreateInfo->pVertexInputState;
1526
1527 const uint64_t inputs_read = get_vs_prog_data(pipeline)->inputs_read;
1528
1529 pipeline->vb_used = 0;
1530 for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
1531 const VkVertexInputAttributeDescription *desc =
1532 &vi_info->pVertexAttributeDescriptions[i];
1533
1534 if (inputs_read & (1ull << (VERT_ATTRIB_GENERIC0 + desc->location)))
1535 pipeline->vb_used |= 1 << desc->binding;
1536 }
1537
1538 for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
1539 const VkVertexInputBindingDescription *desc =
1540 &vi_info->pVertexBindingDescriptions[i];
1541
1542 pipeline->vb[desc->binding].stride = desc->stride;
1543
1544 /* Step rate is programmed per vertex element (attribute), not
1545 * binding. Set up a map of which bindings step per instance, for
1546 * reference by vertex element setup. */
1547 switch (desc->inputRate) {
1548 default:
1549 case VK_VERTEX_INPUT_RATE_VERTEX:
1550 pipeline->vb[desc->binding].instanced = false;
1551 break;
1552 case VK_VERTEX_INPUT_RATE_INSTANCE:
1553 pipeline->vb[desc->binding].instanced = true;
1554 break;
1555 }
1556
1557 pipeline->vb[desc->binding].instance_divisor = 1;
1558 }
1559
1560 const VkPipelineVertexInputDivisorStateCreateInfoEXT *vi_div_state =
1561 vk_find_struct_const(vi_info->pNext,
1562 PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT);
1563 if (vi_div_state) {
1564 for (uint32_t i = 0; i < vi_div_state->vertexBindingDivisorCount; i++) {
1565 const VkVertexInputBindingDivisorDescriptionEXT *desc =
1566 &vi_div_state->pVertexBindingDivisors[i];
1567
1568 pipeline->vb[desc->binding].instance_divisor = desc->divisor;
1569 }
1570 }
1571
1572 /* Our implementation of VK_KHR_multiview uses instancing to draw the
1573 * different views. If the client asks for instancing, we need to multiply
1574 * the instance divisor by the number of views ensure that we repeat the
1575 * client's per-instance data once for each view.
1576 */
1577 if (pipeline->subpass->view_mask) {
1578 const uint32_t view_count = anv_subpass_view_count(pipeline->subpass);
1579 for (uint32_t vb = 0; vb < MAX_VBS; vb++) {
1580 if (pipeline->vb[vb].instanced)
1581 pipeline->vb[vb].instance_divisor *= view_count;
1582 }
1583 }
1584
1585 const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1586 pCreateInfo->pInputAssemblyState;
1587 const VkPipelineTessellationStateCreateInfo *tess_info =
1588 pCreateInfo->pTessellationState;
1589 pipeline->primitive_restart = ia_info->primitiveRestartEnable;
1590
1591 if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
1592 pipeline->topology = _3DPRIM_PATCHLIST(tess_info->patchControlPoints);
1593 else
1594 pipeline->topology = vk_to_gen_primitive_type[ia_info->topology];
1595
1596 return VK_SUCCESS;
1597 }