anv/pipeline: get map for double input attributes
[mesa.git] / src / intel / vulkan / anv_pipeline.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29
30 #include "util/mesa-sha1.h"
31 #include "common/gen_l3_config.h"
32 #include "anv_private.h"
33 #include "brw_nir.h"
34 #include "anv_nir.h"
35 #include "spirv/nir_spirv.h"
36
37 /* Needed for SWIZZLE macros */
38 #include "program/prog_instruction.h"
39
40 // Shader functions
41
42 VkResult anv_CreateShaderModule(
43 VkDevice _device,
44 const VkShaderModuleCreateInfo* pCreateInfo,
45 const VkAllocationCallbacks* pAllocator,
46 VkShaderModule* pShaderModule)
47 {
48 ANV_FROM_HANDLE(anv_device, device, _device);
49 struct anv_shader_module *module;
50
51 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO);
52 assert(pCreateInfo->flags == 0);
53
54 module = vk_alloc2(&device->alloc, pAllocator,
55 sizeof(*module) + pCreateInfo->codeSize, 8,
56 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
57 if (module == NULL)
58 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
59
60 module->size = pCreateInfo->codeSize;
61 memcpy(module->data, pCreateInfo->pCode, module->size);
62
63 _mesa_sha1_compute(module->data, module->size, module->sha1);
64
65 *pShaderModule = anv_shader_module_to_handle(module);
66
67 return VK_SUCCESS;
68 }
69
70 void anv_DestroyShaderModule(
71 VkDevice _device,
72 VkShaderModule _module,
73 const VkAllocationCallbacks* pAllocator)
74 {
75 ANV_FROM_HANDLE(anv_device, device, _device);
76 ANV_FROM_HANDLE(anv_shader_module, module, _module);
77
78 if (!module)
79 return;
80
81 vk_free2(&device->alloc, pAllocator, module);
82 }
83
84 #define SPIR_V_MAGIC_NUMBER 0x07230203
85
86 /* Eventually, this will become part of anv_CreateShader. Unfortunately,
87 * we can't do that yet because we don't have the ability to copy nir.
88 */
89 static nir_shader *
90 anv_shader_compile_to_nir(struct anv_device *device,
91 struct anv_shader_module *module,
92 const char *entrypoint_name,
93 gl_shader_stage stage,
94 const VkSpecializationInfo *spec_info)
95 {
96 if (strcmp(entrypoint_name, "main") != 0) {
97 anv_finishme("Multiple shaders per module not really supported");
98 }
99
100 const struct brw_compiler *compiler =
101 device->instance->physicalDevice.compiler;
102 const nir_shader_compiler_options *nir_options =
103 compiler->glsl_compiler_options[stage].NirOptions;
104
105 uint32_t *spirv = (uint32_t *) module->data;
106 assert(spirv[0] == SPIR_V_MAGIC_NUMBER);
107 assert(module->size % 4 == 0);
108
109 uint32_t num_spec_entries = 0;
110 struct nir_spirv_specialization *spec_entries = NULL;
111 if (spec_info && spec_info->mapEntryCount > 0) {
112 num_spec_entries = spec_info->mapEntryCount;
113 spec_entries = malloc(num_spec_entries * sizeof(*spec_entries));
114 for (uint32_t i = 0; i < num_spec_entries; i++) {
115 VkSpecializationMapEntry entry = spec_info->pMapEntries[i];
116 const void *data = spec_info->pData + entry.offset;
117 assert(data + entry.size <= spec_info->pData + spec_info->dataSize);
118
119 spec_entries[i].id = spec_info->pMapEntries[i].constantID;
120 if (spec_info->dataSize == 8)
121 spec_entries[i].data64 = *(const uint64_t *)data;
122 else
123 spec_entries[i].data32 = *(const uint32_t *)data;
124 }
125 }
126
127 nir_function *entry_point =
128 spirv_to_nir(spirv, module->size / 4,
129 spec_entries, num_spec_entries,
130 stage, entrypoint_name, NULL, nir_options);
131 nir_shader *nir = entry_point->shader;
132 assert(nir->stage == stage);
133 nir_validate_shader(nir);
134
135 free(spec_entries);
136
137 if (stage == MESA_SHADER_FRAGMENT)
138 NIR_PASS_V(nir, nir_lower_wpos_center);
139
140 /* We have to lower away local constant initializers right before we
141 * inline functions. That way they get properly initialized at the top
142 * of the function and not at the top of its caller.
143 */
144 NIR_PASS_V(nir, nir_lower_constant_initializers, nir_var_local);
145 NIR_PASS_V(nir, nir_lower_returns);
146 NIR_PASS_V(nir, nir_inline_functions);
147
148 /* Pick off the single entrypoint that we want */
149 foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
150 if (func != entry_point)
151 exec_node_remove(&func->node);
152 }
153 assert(exec_list_length(&nir->functions) == 1);
154 entry_point->name = ralloc_strdup(entry_point, "main");
155
156 NIR_PASS_V(nir, nir_remove_dead_variables,
157 nir_var_shader_in | nir_var_shader_out | nir_var_system_value);
158
159 /* Now that we've deleted all but the main function, we can go ahead and
160 * lower the rest of the constant initializers.
161 */
162 NIR_PASS_V(nir, nir_lower_constant_initializers, ~0);
163 NIR_PASS_V(nir, nir_propagate_invariant);
164 NIR_PASS_V(nir, nir_lower_io_to_temporaries,
165 entry_point->impl, true, false);
166 NIR_PASS_V(nir, nir_lower_system_values);
167
168 /* Vulkan uses the separate-shader linking model */
169 nir->info->separate_shader = true;
170
171 nir = brw_preprocess_nir(compiler, nir);
172
173 NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
174
175 if (stage == MESA_SHADER_FRAGMENT)
176 NIR_PASS_V(nir, anv_nir_lower_input_attachments);
177
178 nir_shader_gather_info(nir, entry_point->impl);
179
180 return nir;
181 }
182
183 void anv_DestroyPipeline(
184 VkDevice _device,
185 VkPipeline _pipeline,
186 const VkAllocationCallbacks* pAllocator)
187 {
188 ANV_FROM_HANDLE(anv_device, device, _device);
189 ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
190
191 if (!pipeline)
192 return;
193
194 anv_reloc_list_finish(&pipeline->batch_relocs,
195 pAllocator ? pAllocator : &device->alloc);
196 if (pipeline->blend_state.map)
197 anv_state_pool_free(&device->dynamic_state_pool, pipeline->blend_state);
198
199 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
200 if (pipeline->shaders[s])
201 anv_shader_bin_unref(device, pipeline->shaders[s]);
202 }
203
204 vk_free2(&device->alloc, pAllocator, pipeline);
205 }
206
207 static const uint32_t vk_to_gen_primitive_type[] = {
208 [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST,
209 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST,
210 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP,
211 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST,
212 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
213 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
214 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
215 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
216 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
217 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
218 /* [VK_PRIMITIVE_TOPOLOGY_PATCH_LIST] = _3DPRIM_PATCHLIST_1 */
219 };
220
221 static void
222 populate_sampler_prog_key(const struct gen_device_info *devinfo,
223 struct brw_sampler_prog_key_data *key)
224 {
225 /* XXX: Handle texture swizzle on HSW- */
226 for (int i = 0; i < MAX_SAMPLERS; i++) {
227 /* Assume color sampler, no swizzling. (Works for BDW+) */
228 key->swizzles[i] = SWIZZLE_XYZW;
229 }
230 }
231
232 static void
233 populate_vs_prog_key(const struct gen_device_info *devinfo,
234 struct brw_vs_prog_key *key)
235 {
236 memset(key, 0, sizeof(*key));
237
238 populate_sampler_prog_key(devinfo, &key->tex);
239
240 /* XXX: Handle vertex input work-arounds */
241
242 /* XXX: Handle sampler_prog_key */
243 }
244
245 static void
246 populate_gs_prog_key(const struct gen_device_info *devinfo,
247 struct brw_gs_prog_key *key)
248 {
249 memset(key, 0, sizeof(*key));
250
251 populate_sampler_prog_key(devinfo, &key->tex);
252 }
253
254 static void
255 populate_wm_prog_key(const struct gen_device_info *devinfo,
256 const VkGraphicsPipelineCreateInfo *info,
257 struct brw_wm_prog_key *key)
258 {
259 ANV_FROM_HANDLE(anv_render_pass, render_pass, info->renderPass);
260
261 memset(key, 0, sizeof(*key));
262
263 populate_sampler_prog_key(devinfo, &key->tex);
264
265 /* TODO: Fill out key->input_slots_valid */
266
267 /* Vulkan doesn't specify a default */
268 key->high_quality_derivatives = false;
269
270 /* XXX Vulkan doesn't appear to specify */
271 key->clamp_fragment_color = false;
272
273 key->nr_color_regions =
274 render_pass->subpasses[info->subpass].color_count;
275
276 key->replicate_alpha = key->nr_color_regions > 1 &&
277 info->pMultisampleState &&
278 info->pMultisampleState->alphaToCoverageEnable;
279
280 if (info->pMultisampleState && info->pMultisampleState->rasterizationSamples > 1) {
281 /* We should probably pull this out of the shader, but it's fairly
282 * harmless to compute it and then let dead-code take care of it.
283 */
284 key->persample_interp =
285 (info->pMultisampleState->minSampleShading *
286 info->pMultisampleState->rasterizationSamples) > 1;
287 key->multisample_fbo = true;
288 }
289 }
290
291 static void
292 populate_cs_prog_key(const struct gen_device_info *devinfo,
293 struct brw_cs_prog_key *key)
294 {
295 memset(key, 0, sizeof(*key));
296
297 populate_sampler_prog_key(devinfo, &key->tex);
298 }
299
300 static nir_shader *
301 anv_pipeline_compile(struct anv_pipeline *pipeline,
302 struct anv_shader_module *module,
303 const char *entrypoint,
304 gl_shader_stage stage,
305 const VkSpecializationInfo *spec_info,
306 struct brw_stage_prog_data *prog_data,
307 struct anv_pipeline_bind_map *map)
308 {
309 nir_shader *nir = anv_shader_compile_to_nir(pipeline->device,
310 module, entrypoint, stage,
311 spec_info);
312 if (nir == NULL)
313 return NULL;
314
315 NIR_PASS_V(nir, anv_nir_lower_push_constants);
316
317 /* Figure out the number of parameters */
318 prog_data->nr_params = 0;
319
320 if (nir->num_uniforms > 0) {
321 /* If the shader uses any push constants at all, we'll just give
322 * them the maximum possible number
323 */
324 assert(nir->num_uniforms <= MAX_PUSH_CONSTANTS_SIZE);
325 prog_data->nr_params += MAX_PUSH_CONSTANTS_SIZE / sizeof(float);
326 }
327
328 if (pipeline->layout && pipeline->layout->stage[stage].has_dynamic_offsets)
329 prog_data->nr_params += MAX_DYNAMIC_BUFFERS * 2;
330
331 if (nir->info->num_images > 0) {
332 prog_data->nr_params += nir->info->num_images * BRW_IMAGE_PARAM_SIZE;
333 pipeline->needs_data_cache = true;
334 }
335
336 if (stage == MESA_SHADER_COMPUTE)
337 ((struct brw_cs_prog_data *)prog_data)->thread_local_id_index =
338 prog_data->nr_params++; /* The CS Thread ID uniform */
339
340 if (nir->info->num_ssbos > 0)
341 pipeline->needs_data_cache = true;
342
343 if (prog_data->nr_params > 0) {
344 /* XXX: I think we're leaking this */
345 prog_data->param = (const union gl_constant_value **)
346 malloc(prog_data->nr_params * sizeof(union gl_constant_value *));
347
348 /* We now set the param values to be offsets into a
349 * anv_push_constant_data structure. Since the compiler doesn't
350 * actually dereference any of the gl_constant_value pointers in the
351 * params array, it doesn't really matter what we put here.
352 */
353 struct anv_push_constants *null_data = NULL;
354 if (nir->num_uniforms > 0) {
355 /* Fill out the push constants section of the param array */
356 for (unsigned i = 0; i < MAX_PUSH_CONSTANTS_SIZE / sizeof(float); i++)
357 prog_data->param[i] = (const union gl_constant_value *)
358 &null_data->client_data[i * sizeof(float)];
359 }
360 }
361
362 /* Set up dynamic offsets */
363 anv_nir_apply_dynamic_offsets(pipeline, nir, prog_data);
364
365 /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
366 if (pipeline->layout)
367 anv_nir_apply_pipeline_layout(pipeline, nir, prog_data, map);
368
369 /* nir_lower_io will only handle the push constants; we need to set this
370 * to the full number of possible uniforms.
371 */
372 nir->num_uniforms = prog_data->nr_params * 4;
373
374 return nir;
375 }
376
377 static void
378 anv_fill_binding_table(struct brw_stage_prog_data *prog_data, unsigned bias)
379 {
380 prog_data->binding_table.size_bytes = 0;
381 prog_data->binding_table.texture_start = bias;
382 prog_data->binding_table.gather_texture_start = bias;
383 prog_data->binding_table.ubo_start = bias;
384 prog_data->binding_table.ssbo_start = bias;
385 prog_data->binding_table.image_start = bias;
386 }
387
388 static struct anv_shader_bin *
389 anv_pipeline_upload_kernel(struct anv_pipeline *pipeline,
390 struct anv_pipeline_cache *cache,
391 const void *key_data, uint32_t key_size,
392 const void *kernel_data, uint32_t kernel_size,
393 const struct brw_stage_prog_data *prog_data,
394 uint32_t prog_data_size,
395 const struct anv_pipeline_bind_map *bind_map)
396 {
397 if (cache) {
398 return anv_pipeline_cache_upload_kernel(cache, key_data, key_size,
399 kernel_data, kernel_size,
400 prog_data, prog_data_size,
401 bind_map);
402 } else {
403 return anv_shader_bin_create(pipeline->device, key_data, key_size,
404 kernel_data, kernel_size,
405 prog_data, prog_data_size,
406 prog_data->param, bind_map);
407 }
408 }
409
410
411 static void
412 anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline,
413 gl_shader_stage stage,
414 struct anv_shader_bin *shader)
415 {
416 pipeline->shaders[stage] = shader;
417 pipeline->active_stages |= mesa_to_vk_shader_stage(stage);
418 }
419
420 static VkResult
421 anv_pipeline_compile_vs(struct anv_pipeline *pipeline,
422 struct anv_pipeline_cache *cache,
423 const VkGraphicsPipelineCreateInfo *info,
424 struct anv_shader_module *module,
425 const char *entrypoint,
426 const VkSpecializationInfo *spec_info)
427 {
428 const struct brw_compiler *compiler =
429 pipeline->device->instance->physicalDevice.compiler;
430 struct anv_pipeline_bind_map map;
431 struct brw_vs_prog_key key;
432 struct anv_shader_bin *bin = NULL;
433 unsigned char sha1[20];
434
435 populate_vs_prog_key(&pipeline->device->info, &key);
436
437 if (cache) {
438 anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint,
439 pipeline->layout, spec_info);
440 bin = anv_pipeline_cache_search(cache, sha1, 20);
441 }
442
443 if (bin == NULL) {
444 struct brw_vs_prog_data prog_data = { 0, };
445 struct anv_pipeline_binding surface_to_descriptor[256];
446 struct anv_pipeline_binding sampler_to_descriptor[256];
447
448 map = (struct anv_pipeline_bind_map) {
449 .surface_to_descriptor = surface_to_descriptor,
450 .sampler_to_descriptor = sampler_to_descriptor
451 };
452
453 nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint,
454 MESA_SHADER_VERTEX, spec_info,
455 &prog_data.base.base, &map);
456 if (nir == NULL)
457 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
458
459 anv_fill_binding_table(&prog_data.base.base, 0);
460
461 void *mem_ctx = ralloc_context(NULL);
462
463 ralloc_steal(mem_ctx, nir);
464
465 prog_data.inputs_read = nir->info->inputs_read;
466 prog_data.double_inputs_read = nir->info->double_inputs_read;
467
468 brw_compute_vue_map(&pipeline->device->info,
469 &prog_data.base.vue_map,
470 nir->info->outputs_written,
471 nir->info->separate_shader);
472
473 unsigned code_size;
474 const unsigned *shader_code =
475 brw_compile_vs(compiler, NULL, mem_ctx, &key, &prog_data, nir,
476 NULL, false, -1, &code_size, NULL);
477 if (shader_code == NULL) {
478 ralloc_free(mem_ctx);
479 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
480 }
481
482 bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20,
483 shader_code, code_size,
484 &prog_data.base.base, sizeof(prog_data),
485 &map);
486 if (!bin) {
487 ralloc_free(mem_ctx);
488 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
489 }
490
491 ralloc_free(mem_ctx);
492 }
493
494 anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_VERTEX, bin);
495
496 return VK_SUCCESS;
497 }
498
499 static VkResult
500 anv_pipeline_compile_gs(struct anv_pipeline *pipeline,
501 struct anv_pipeline_cache *cache,
502 const VkGraphicsPipelineCreateInfo *info,
503 struct anv_shader_module *module,
504 const char *entrypoint,
505 const VkSpecializationInfo *spec_info)
506 {
507 const struct brw_compiler *compiler =
508 pipeline->device->instance->physicalDevice.compiler;
509 struct anv_pipeline_bind_map map;
510 struct brw_gs_prog_key key;
511 struct anv_shader_bin *bin = NULL;
512 unsigned char sha1[20];
513
514 populate_gs_prog_key(&pipeline->device->info, &key);
515
516 if (cache) {
517 anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint,
518 pipeline->layout, spec_info);
519 bin = anv_pipeline_cache_search(cache, sha1, 20);
520 }
521
522 if (bin == NULL) {
523 struct brw_gs_prog_data prog_data = { 0, };
524 struct anv_pipeline_binding surface_to_descriptor[256];
525 struct anv_pipeline_binding sampler_to_descriptor[256];
526
527 map = (struct anv_pipeline_bind_map) {
528 .surface_to_descriptor = surface_to_descriptor,
529 .sampler_to_descriptor = sampler_to_descriptor
530 };
531
532 nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint,
533 MESA_SHADER_GEOMETRY, spec_info,
534 &prog_data.base.base, &map);
535 if (nir == NULL)
536 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
537
538 anv_fill_binding_table(&prog_data.base.base, 0);
539
540 void *mem_ctx = ralloc_context(NULL);
541
542 ralloc_steal(mem_ctx, nir);
543
544 brw_compute_vue_map(&pipeline->device->info,
545 &prog_data.base.vue_map,
546 nir->info->outputs_written,
547 nir->info->separate_shader);
548
549 unsigned code_size;
550 const unsigned *shader_code =
551 brw_compile_gs(compiler, NULL, mem_ctx, &key, &prog_data, nir,
552 NULL, -1, &code_size, NULL);
553 if (shader_code == NULL) {
554 ralloc_free(mem_ctx);
555 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
556 }
557
558 /* TODO: SIMD8 GS */
559 bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20,
560 shader_code, code_size,
561 &prog_data.base.base, sizeof(prog_data),
562 &map);
563 if (!bin) {
564 ralloc_free(mem_ctx);
565 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
566 }
567
568 ralloc_free(mem_ctx);
569 }
570
571 anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_GEOMETRY, bin);
572
573 return VK_SUCCESS;
574 }
575
576 static VkResult
577 anv_pipeline_compile_fs(struct anv_pipeline *pipeline,
578 struct anv_pipeline_cache *cache,
579 const VkGraphicsPipelineCreateInfo *info,
580 struct anv_shader_module *module,
581 const char *entrypoint,
582 const VkSpecializationInfo *spec_info)
583 {
584 const struct brw_compiler *compiler =
585 pipeline->device->instance->physicalDevice.compiler;
586 struct anv_pipeline_bind_map map;
587 struct brw_wm_prog_key key;
588 struct anv_shader_bin *bin = NULL;
589 unsigned char sha1[20];
590
591 populate_wm_prog_key(&pipeline->device->info, info, &key);
592
593 if (cache) {
594 anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint,
595 pipeline->layout, spec_info);
596 bin = anv_pipeline_cache_search(cache, sha1, 20);
597 }
598
599 if (bin == NULL) {
600 struct brw_wm_prog_data prog_data = { 0, };
601 struct anv_pipeline_binding surface_to_descriptor[256];
602 struct anv_pipeline_binding sampler_to_descriptor[256];
603
604 map = (struct anv_pipeline_bind_map) {
605 .surface_to_descriptor = surface_to_descriptor + 8,
606 .sampler_to_descriptor = sampler_to_descriptor
607 };
608
609 nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint,
610 MESA_SHADER_FRAGMENT, spec_info,
611 &prog_data.base, &map);
612 if (nir == NULL)
613 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
614
615 unsigned num_rts = 0;
616 struct anv_pipeline_binding rt_bindings[8];
617 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
618 nir_foreach_variable_safe(var, &nir->outputs) {
619 if (var->data.location < FRAG_RESULT_DATA0)
620 continue;
621
622 unsigned rt = var->data.location - FRAG_RESULT_DATA0;
623 if (rt >= key.nr_color_regions) {
624 /* Out-of-bounds, throw it away */
625 var->data.mode = nir_var_local;
626 exec_node_remove(&var->node);
627 exec_list_push_tail(&impl->locals, &var->node);
628 continue;
629 }
630
631 /* Give it a new, compacted, location */
632 var->data.location = FRAG_RESULT_DATA0 + num_rts;
633
634 unsigned array_len =
635 glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1;
636 assert(num_rts + array_len <= 8);
637
638 for (unsigned i = 0; i < array_len; i++) {
639 rt_bindings[num_rts + i] = (struct anv_pipeline_binding) {
640 .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
641 .binding = 0,
642 .index = rt + i,
643 };
644 }
645
646 num_rts += array_len;
647 }
648
649 if (num_rts == 0) {
650 /* If we have no render targets, we need a null render target */
651 rt_bindings[0] = (struct anv_pipeline_binding) {
652 .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
653 .binding = 0,
654 .index = UINT8_MAX,
655 };
656 num_rts = 1;
657 }
658
659 assert(num_rts <= 8);
660 map.surface_to_descriptor -= num_rts;
661 map.surface_count += num_rts;
662 assert(map.surface_count <= 256);
663 memcpy(map.surface_to_descriptor, rt_bindings,
664 num_rts * sizeof(*rt_bindings));
665
666 anv_fill_binding_table(&prog_data.base, num_rts);
667
668 void *mem_ctx = ralloc_context(NULL);
669
670 ralloc_steal(mem_ctx, nir);
671
672 unsigned code_size;
673 const unsigned *shader_code =
674 brw_compile_fs(compiler, NULL, mem_ctx, &key, &prog_data, nir,
675 NULL, -1, -1, true, false, NULL, &code_size, NULL);
676 if (shader_code == NULL) {
677 ralloc_free(mem_ctx);
678 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
679 }
680
681 bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20,
682 shader_code, code_size,
683 &prog_data.base, sizeof(prog_data),
684 &map);
685 if (!bin) {
686 ralloc_free(mem_ctx);
687 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
688 }
689
690 ralloc_free(mem_ctx);
691 }
692
693 anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_FRAGMENT, bin);
694
695 return VK_SUCCESS;
696 }
697
698 VkResult
699 anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
700 struct anv_pipeline_cache *cache,
701 const VkComputePipelineCreateInfo *info,
702 struct anv_shader_module *module,
703 const char *entrypoint,
704 const VkSpecializationInfo *spec_info)
705 {
706 const struct brw_compiler *compiler =
707 pipeline->device->instance->physicalDevice.compiler;
708 struct anv_pipeline_bind_map map;
709 struct brw_cs_prog_key key;
710 struct anv_shader_bin *bin = NULL;
711 unsigned char sha1[20];
712
713 populate_cs_prog_key(&pipeline->device->info, &key);
714
715 if (cache) {
716 anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint,
717 pipeline->layout, spec_info);
718 bin = anv_pipeline_cache_search(cache, sha1, 20);
719 }
720
721 if (bin == NULL) {
722 struct brw_cs_prog_data prog_data = { 0, };
723 struct anv_pipeline_binding surface_to_descriptor[256];
724 struct anv_pipeline_binding sampler_to_descriptor[256];
725
726 map = (struct anv_pipeline_bind_map) {
727 .surface_to_descriptor = surface_to_descriptor,
728 .sampler_to_descriptor = sampler_to_descriptor
729 };
730
731 nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint,
732 MESA_SHADER_COMPUTE, spec_info,
733 &prog_data.base, &map);
734 if (nir == NULL)
735 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
736
737 anv_fill_binding_table(&prog_data.base, 1);
738
739 void *mem_ctx = ralloc_context(NULL);
740
741 ralloc_steal(mem_ctx, nir);
742
743 unsigned code_size;
744 const unsigned *shader_code =
745 brw_compile_cs(compiler, NULL, mem_ctx, &key, &prog_data, nir,
746 -1, &code_size, NULL);
747 if (shader_code == NULL) {
748 ralloc_free(mem_ctx);
749 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
750 }
751
752 bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20,
753 shader_code, code_size,
754 &prog_data.base, sizeof(prog_data),
755 &map);
756 if (!bin) {
757 ralloc_free(mem_ctx);
758 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
759 }
760
761 ralloc_free(mem_ctx);
762 }
763
764 anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_COMPUTE, bin);
765
766 return VK_SUCCESS;
767 }
768
769 /**
770 * Copy pipeline state not marked as dynamic.
771 * Dynamic state is pipeline state which hasn't been provided at pipeline
772 * creation time, but is dynamically provided afterwards using various
773 * vkCmdSet* functions.
774 *
775 * The set of state considered "non_dynamic" is determined by the pieces of
776 * state that have their corresponding VkDynamicState enums omitted from
777 * VkPipelineDynamicStateCreateInfo::pDynamicStates.
778 *
779 * @param[out] pipeline Destination non_dynamic state.
780 * @param[in] pCreateInfo Source of non_dynamic state to be copied.
781 */
782 static void
783 copy_non_dynamic_state(struct anv_pipeline *pipeline,
784 const VkGraphicsPipelineCreateInfo *pCreateInfo)
785 {
786 anv_cmd_dirty_mask_t states = ANV_CMD_DIRTY_DYNAMIC_ALL;
787 ANV_FROM_HANDLE(anv_render_pass, pass, pCreateInfo->renderPass);
788 struct anv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass];
789
790 pipeline->dynamic_state = default_dynamic_state;
791
792 if (pCreateInfo->pDynamicState) {
793 /* Remove all of the states that are marked as dynamic */
794 uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount;
795 for (uint32_t s = 0; s < count; s++)
796 states &= ~(1 << pCreateInfo->pDynamicState->pDynamicStates[s]);
797 }
798
799 struct anv_dynamic_state *dynamic = &pipeline->dynamic_state;
800
801 /* Section 9.2 of the Vulkan 1.0.15 spec says:
802 *
803 * pViewportState is [...] NULL if the pipeline
804 * has rasterization disabled.
805 */
806 if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable) {
807 assert(pCreateInfo->pViewportState);
808
809 dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount;
810 if (states & (1 << VK_DYNAMIC_STATE_VIEWPORT)) {
811 typed_memcpy(dynamic->viewport.viewports,
812 pCreateInfo->pViewportState->pViewports,
813 pCreateInfo->pViewportState->viewportCount);
814 }
815
816 dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount;
817 if (states & (1 << VK_DYNAMIC_STATE_SCISSOR)) {
818 typed_memcpy(dynamic->scissor.scissors,
819 pCreateInfo->pViewportState->pScissors,
820 pCreateInfo->pViewportState->scissorCount);
821 }
822 }
823
824 if (states & (1 << VK_DYNAMIC_STATE_LINE_WIDTH)) {
825 assert(pCreateInfo->pRasterizationState);
826 dynamic->line_width = pCreateInfo->pRasterizationState->lineWidth;
827 }
828
829 if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS)) {
830 assert(pCreateInfo->pRasterizationState);
831 dynamic->depth_bias.bias =
832 pCreateInfo->pRasterizationState->depthBiasConstantFactor;
833 dynamic->depth_bias.clamp =
834 pCreateInfo->pRasterizationState->depthBiasClamp;
835 dynamic->depth_bias.slope =
836 pCreateInfo->pRasterizationState->depthBiasSlopeFactor;
837 }
838
839 /* Section 9.2 of the Vulkan 1.0.15 spec says:
840 *
841 * pColorBlendState is [...] NULL if the pipeline has rasterization
842 * disabled or if the subpass of the render pass the pipeline is
843 * created against does not use any color attachments.
844 */
845 bool uses_color_att = false;
846 for (unsigned i = 0; i < subpass->color_count; ++i) {
847 if (subpass->color_attachments[i] != VK_ATTACHMENT_UNUSED) {
848 uses_color_att = true;
849 break;
850 }
851 }
852
853 if (uses_color_att &&
854 !pCreateInfo->pRasterizationState->rasterizerDiscardEnable) {
855 assert(pCreateInfo->pColorBlendState);
856
857 if (states & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS))
858 typed_memcpy(dynamic->blend_constants,
859 pCreateInfo->pColorBlendState->blendConstants, 4);
860 }
861
862 /* If there is no depthstencil attachment, then don't read
863 * pDepthStencilState. The Vulkan spec states that pDepthStencilState may
864 * be NULL in this case. Even if pDepthStencilState is non-NULL, there is
865 * no need to override the depthstencil defaults in
866 * anv_pipeline::dynamic_state when there is no depthstencil attachment.
867 *
868 * Section 9.2 of the Vulkan 1.0.15 spec says:
869 *
870 * pDepthStencilState is [...] NULL if the pipeline has rasterization
871 * disabled or if the subpass of the render pass the pipeline is created
872 * against does not use a depth/stencil attachment.
873 */
874 if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&
875 subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) {
876 assert(pCreateInfo->pDepthStencilState);
877
878 if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BOUNDS)) {
879 dynamic->depth_bounds.min =
880 pCreateInfo->pDepthStencilState->minDepthBounds;
881 dynamic->depth_bounds.max =
882 pCreateInfo->pDepthStencilState->maxDepthBounds;
883 }
884
885 if (states & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) {
886 dynamic->stencil_compare_mask.front =
887 pCreateInfo->pDepthStencilState->front.compareMask;
888 dynamic->stencil_compare_mask.back =
889 pCreateInfo->pDepthStencilState->back.compareMask;
890 }
891
892 if (states & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) {
893 dynamic->stencil_write_mask.front =
894 pCreateInfo->pDepthStencilState->front.writeMask;
895 dynamic->stencil_write_mask.back =
896 pCreateInfo->pDepthStencilState->back.writeMask;
897 }
898
899 if (states & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) {
900 dynamic->stencil_reference.front =
901 pCreateInfo->pDepthStencilState->front.reference;
902 dynamic->stencil_reference.back =
903 pCreateInfo->pDepthStencilState->back.reference;
904 }
905 }
906
907 pipeline->dynamic_state_mask = states;
908 }
909
910 static void
911 anv_pipeline_validate_create_info(const VkGraphicsPipelineCreateInfo *info)
912 {
913 struct anv_render_pass *renderpass = NULL;
914 struct anv_subpass *subpass = NULL;
915
916 /* Assert that all required members of VkGraphicsPipelineCreateInfo are
917 * present. See the Vulkan 1.0.28 spec, Section 9.2 Graphics Pipelines.
918 */
919 assert(info->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
920
921 renderpass = anv_render_pass_from_handle(info->renderPass);
922 assert(renderpass);
923
924 assert(info->subpass < renderpass->subpass_count);
925 subpass = &renderpass->subpasses[info->subpass];
926
927 assert(info->stageCount >= 1);
928 assert(info->pVertexInputState);
929 assert(info->pInputAssemblyState);
930 assert(info->pRasterizationState);
931 if (!info->pRasterizationState->rasterizerDiscardEnable) {
932 assert(info->pViewportState);
933 assert(info->pMultisampleState);
934
935 if (subpass && subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED)
936 assert(info->pDepthStencilState);
937
938 if (subpass && subpass->color_count > 0)
939 assert(info->pColorBlendState);
940 }
941
942 for (uint32_t i = 0; i < info->stageCount; ++i) {
943 switch (info->pStages[i].stage) {
944 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
945 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
946 assert(info->pTessellationState);
947 break;
948 default:
949 break;
950 }
951 }
952 }
953
954 /**
955 * Calculate the desired L3 partitioning based on the current state of the
956 * pipeline. For now this simply returns the conservative defaults calculated
957 * by get_default_l3_weights(), but we could probably do better by gathering
958 * more statistics from the pipeline state (e.g. guess of expected URB usage
959 * and bound surfaces), or by using feed-back from performance counters.
960 */
961 void
962 anv_pipeline_setup_l3_config(struct anv_pipeline *pipeline, bool needs_slm)
963 {
964 const struct gen_device_info *devinfo = &pipeline->device->info;
965
966 const struct gen_l3_weights w =
967 gen_get_default_l3_weights(devinfo, pipeline->needs_data_cache, needs_slm);
968
969 pipeline->urb.l3_config = gen_get_l3_config(devinfo, w);
970 pipeline->urb.total_size =
971 gen_get_l3_config_urb_size(devinfo, pipeline->urb.l3_config);
972 }
973
974 VkResult
975 anv_pipeline_init(struct anv_pipeline *pipeline,
976 struct anv_device *device,
977 struct anv_pipeline_cache *cache,
978 const VkGraphicsPipelineCreateInfo *pCreateInfo,
979 const VkAllocationCallbacks *alloc)
980 {
981 VkResult result;
982
983 anv_validate {
984 anv_pipeline_validate_create_info(pCreateInfo);
985 }
986
987 if (alloc == NULL)
988 alloc = &device->alloc;
989
990 pipeline->device = device;
991 pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout);
992
993 result = anv_reloc_list_init(&pipeline->batch_relocs, alloc);
994 if (result != VK_SUCCESS)
995 return result;
996
997 pipeline->batch.alloc = alloc;
998 pipeline->batch.next = pipeline->batch.start = pipeline->batch_data;
999 pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data);
1000 pipeline->batch.relocs = &pipeline->batch_relocs;
1001
1002 copy_non_dynamic_state(pipeline, pCreateInfo);
1003 pipeline->depth_clamp_enable = pCreateInfo->pRasterizationState &&
1004 pCreateInfo->pRasterizationState->depthClampEnable;
1005
1006 pipeline->needs_data_cache = false;
1007
1008 /* When we free the pipeline, we detect stages based on the NULL status
1009 * of various prog_data pointers. Make them NULL by default.
1010 */
1011 memset(pipeline->shaders, 0, sizeof(pipeline->shaders));
1012
1013 pipeline->active_stages = 0;
1014
1015 const VkPipelineShaderStageCreateInfo *pStages[MESA_SHADER_STAGES] = { 0, };
1016 struct anv_shader_module *modules[MESA_SHADER_STAGES] = { 0, };
1017 for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
1018 gl_shader_stage stage = ffs(pCreateInfo->pStages[i].stage) - 1;
1019 pStages[stage] = &pCreateInfo->pStages[i];
1020 modules[stage] = anv_shader_module_from_handle(pStages[stage]->module);
1021 }
1022
1023 if (modules[MESA_SHADER_VERTEX]) {
1024 result = anv_pipeline_compile_vs(pipeline, cache, pCreateInfo,
1025 modules[MESA_SHADER_VERTEX],
1026 pStages[MESA_SHADER_VERTEX]->pName,
1027 pStages[MESA_SHADER_VERTEX]->pSpecializationInfo);
1028 if (result != VK_SUCCESS)
1029 goto compile_fail;
1030 }
1031
1032 if (modules[MESA_SHADER_TESS_CTRL] || modules[MESA_SHADER_TESS_EVAL])
1033 anv_finishme("no tessellation support");
1034
1035 if (modules[MESA_SHADER_GEOMETRY]) {
1036 result = anv_pipeline_compile_gs(pipeline, cache, pCreateInfo,
1037 modules[MESA_SHADER_GEOMETRY],
1038 pStages[MESA_SHADER_GEOMETRY]->pName,
1039 pStages[MESA_SHADER_GEOMETRY]->pSpecializationInfo);
1040 if (result != VK_SUCCESS)
1041 goto compile_fail;
1042 }
1043
1044 if (modules[MESA_SHADER_FRAGMENT]) {
1045 result = anv_pipeline_compile_fs(pipeline, cache, pCreateInfo,
1046 modules[MESA_SHADER_FRAGMENT],
1047 pStages[MESA_SHADER_FRAGMENT]->pName,
1048 pStages[MESA_SHADER_FRAGMENT]->pSpecializationInfo);
1049 if (result != VK_SUCCESS)
1050 goto compile_fail;
1051 }
1052
1053 assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT);
1054
1055 anv_pipeline_setup_l3_config(pipeline, false);
1056
1057 const VkPipelineVertexInputStateCreateInfo *vi_info =
1058 pCreateInfo->pVertexInputState;
1059
1060 const uint64_t inputs_read = get_vs_prog_data(pipeline)->inputs_read;
1061
1062 pipeline->vb_used = 0;
1063 for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
1064 const VkVertexInputAttributeDescription *desc =
1065 &vi_info->pVertexAttributeDescriptions[i];
1066
1067 if (inputs_read & (1 << (VERT_ATTRIB_GENERIC0 + desc->location)))
1068 pipeline->vb_used |= 1 << desc->binding;
1069 }
1070
1071 for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
1072 const VkVertexInputBindingDescription *desc =
1073 &vi_info->pVertexBindingDescriptions[i];
1074
1075 pipeline->binding_stride[desc->binding] = desc->stride;
1076
1077 /* Step rate is programmed per vertex element (attribute), not
1078 * binding. Set up a map of which bindings step per instance, for
1079 * reference by vertex element setup. */
1080 switch (desc->inputRate) {
1081 default:
1082 case VK_VERTEX_INPUT_RATE_VERTEX:
1083 pipeline->instancing_enable[desc->binding] = false;
1084 break;
1085 case VK_VERTEX_INPUT_RATE_INSTANCE:
1086 pipeline->instancing_enable[desc->binding] = true;
1087 break;
1088 }
1089 }
1090
1091 const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1092 pCreateInfo->pInputAssemblyState;
1093 pipeline->primitive_restart = ia_info->primitiveRestartEnable;
1094 pipeline->topology = vk_to_gen_primitive_type[ia_info->topology];
1095
1096 return VK_SUCCESS;
1097
1098 compile_fail:
1099 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
1100 if (pipeline->shaders[s])
1101 anv_shader_bin_unref(device, pipeline->shaders[s]);
1102 }
1103
1104 anv_reloc_list_finish(&pipeline->batch_relocs, alloc);
1105
1106 return result;
1107 }