nir/spirv: Add support for multiple entrypoints per shader
[mesa.git] / src / vulkan / anv_pipeline.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29
30 #include "anv_private.h"
31 #include "brw_nir.h"
32 #include "anv_nir.h"
33 #include "glsl/nir/spirv/nir_spirv.h"
34
35 /* Needed for SWIZZLE macros */
36 #include "program/prog_instruction.h"
37
38 // Shader functions
39
40 VkResult anv_CreateShaderModule(
41 VkDevice _device,
42 const VkShaderModuleCreateInfo* pCreateInfo,
43 const VkAllocationCallbacks* pAllocator,
44 VkShaderModule* pShaderModule)
45 {
46 ANV_FROM_HANDLE(anv_device, device, _device);
47 struct anv_shader_module *module;
48
49 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO);
50 assert(pCreateInfo->flags == 0);
51
52 module = anv_alloc2(&device->alloc, pAllocator,
53 sizeof(*module) + pCreateInfo->codeSize, 8,
54 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
55 if (module == NULL)
56 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
57
58 module->nir = NULL;
59 module->size = pCreateInfo->codeSize;
60 memcpy(module->data, pCreateInfo->pCode, module->size);
61
62 *pShaderModule = anv_shader_module_to_handle(module);
63
64 return VK_SUCCESS;
65 }
66
67 void anv_DestroyShaderModule(
68 VkDevice _device,
69 VkShaderModule _module,
70 const VkAllocationCallbacks* pAllocator)
71 {
72 ANV_FROM_HANDLE(anv_device, device, _device);
73 ANV_FROM_HANDLE(anv_shader_module, module, _module);
74
75 anv_free2(&device->alloc, pAllocator, module);
76 }
77
78 #define SPIR_V_MAGIC_NUMBER 0x07230203
79
80 /* Eventually, this will become part of anv_CreateShader. Unfortunately,
81 * we can't do that yet because we don't have the ability to copy nir.
82 */
83 static nir_shader *
84 anv_shader_compile_to_nir(struct anv_device *device,
85 struct anv_shader_module *module,
86 const char *entrypoint_name,
87 gl_shader_stage stage)
88 {
89 if (strcmp(entrypoint_name, "main") != 0) {
90 anv_finishme("Multiple shaders per module not really supported");
91 }
92
93 const struct brw_compiler *compiler =
94 device->instance->physicalDevice.compiler;
95 const nir_shader_compiler_options *nir_options =
96 compiler->glsl_compiler_options[stage].NirOptions;
97
98 nir_shader *nir;
99 nir_function *entry_point;
100 if (module->nir) {
101 /* Some things such as our meta clear/blit code will give us a NIR
102 * shader directly. In that case, we just ignore the SPIR-V entirely
103 * and just use the NIR shader */
104 nir = module->nir;
105 nir->options = nir_options;
106 nir_validate_shader(nir);
107
108 assert(exec_list_length(&nir->functions) == 1);
109 struct exec_node *node = exec_list_get_head(&nir->functions);
110 entry_point = exec_node_data(nir_function, node, node);
111 } else {
112 uint32_t *spirv = (uint32_t *) module->data;
113 assert(spirv[0] == SPIR_V_MAGIC_NUMBER);
114 assert(module->size % 4 == 0);
115
116 entry_point = spirv_to_nir(spirv, module->size / 4, entrypoint_name,
117 nir_options);
118 nir = entry_point->shader;
119 assert(nir->stage == stage);
120 nir_validate_shader(nir);
121
122 nir_lower_returns(nir);
123 nir_validate_shader(nir);
124
125 nir_inline_functions(nir);
126 nir_validate_shader(nir);
127
128 nir_lower_system_values(nir);
129 nir_validate_shader(nir);
130 }
131
132 /* Vulkan uses the separate-shader linking model */
133 nir->info.separate_shader = true;
134
135 /* Pick off the single entrypoint that we want */
136 foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
137 if (func != entry_point)
138 exec_node_remove(&func->node);
139 }
140 assert(exec_list_length(&nir->functions) == 1);
141
142 nir = brw_preprocess_nir(nir, compiler->scalar_stage[stage]);
143
144 nir_shader_gather_info(nir, entry_point->impl);
145
146 return nir;
147 }
148
149 VkResult anv_CreatePipelineCache(
150 VkDevice device,
151 const VkPipelineCacheCreateInfo* pCreateInfo,
152 const VkAllocationCallbacks* pAllocator,
153 VkPipelineCache* pPipelineCache)
154 {
155 *pPipelineCache = (VkPipelineCache)1;
156
157 stub_return(VK_SUCCESS);
158 }
159
160 void anv_DestroyPipelineCache(
161 VkDevice _device,
162 VkPipelineCache _cache,
163 const VkAllocationCallbacks* pAllocator)
164 {
165 }
166
167 VkResult anv_GetPipelineCacheData(
168 VkDevice device,
169 VkPipelineCache pipelineCache,
170 size_t* pDataSize,
171 void* pData)
172 {
173 *pDataSize = 0;
174 stub_return(VK_SUCCESS);
175 }
176
177 VkResult anv_MergePipelineCaches(
178 VkDevice device,
179 VkPipelineCache destCache,
180 uint32_t srcCacheCount,
181 const VkPipelineCache* pSrcCaches)
182 {
183 stub_return(VK_SUCCESS);
184 }
185
186 void anv_DestroyPipeline(
187 VkDevice _device,
188 VkPipeline _pipeline,
189 const VkAllocationCallbacks* pAllocator)
190 {
191 ANV_FROM_HANDLE(anv_device, device, _device);
192 ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
193
194 anv_reloc_list_finish(&pipeline->batch_relocs,
195 pAllocator ? pAllocator : &device->alloc);
196 anv_state_stream_finish(&pipeline->program_stream);
197 if (pipeline->blend_state.map)
198 anv_state_pool_free(&device->dynamic_state_pool, pipeline->blend_state);
199 anv_free2(&device->alloc, pAllocator, pipeline);
200 }
201
202 static const uint32_t vk_to_gen_primitive_type[] = {
203 [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST,
204 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST,
205 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP,
206 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST,
207 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
208 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
209 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
210 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
211 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
212 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
213 /* [VK_PRIMITIVE_TOPOLOGY_PATCH_LIST] = _3DPRIM_PATCHLIST_1 */
214 };
215
216 static void
217 populate_sampler_prog_key(const struct brw_device_info *devinfo,
218 struct brw_sampler_prog_key_data *key)
219 {
220 /* XXX: Handle texture swizzle on HSW- */
221 for (int i = 0; i < MAX_SAMPLERS; i++) {
222 /* Assume color sampler, no swizzling. (Works for BDW+) */
223 key->swizzles[i] = SWIZZLE_XYZW;
224 }
225 }
226
227 static void
228 populate_vs_prog_key(const struct brw_device_info *devinfo,
229 struct brw_vs_prog_key *key)
230 {
231 memset(key, 0, sizeof(*key));
232
233 populate_sampler_prog_key(devinfo, &key->tex);
234
235 /* XXX: Handle vertex input work-arounds */
236
237 /* XXX: Handle sampler_prog_key */
238 }
239
240 static void
241 populate_gs_prog_key(const struct brw_device_info *devinfo,
242 struct brw_gs_prog_key *key)
243 {
244 memset(key, 0, sizeof(*key));
245
246 populate_sampler_prog_key(devinfo, &key->tex);
247 }
248
249 static void
250 populate_wm_prog_key(const struct brw_device_info *devinfo,
251 const VkGraphicsPipelineCreateInfo *info,
252 struct brw_wm_prog_key *key)
253 {
254 ANV_FROM_HANDLE(anv_render_pass, render_pass, info->renderPass);
255
256 memset(key, 0, sizeof(*key));
257
258 populate_sampler_prog_key(devinfo, &key->tex);
259
260 /* TODO: Fill out key->input_slots_valid */
261
262 /* Vulkan doesn't specify a default */
263 key->high_quality_derivatives = false;
264
265 /* XXX Vulkan doesn't appear to specify */
266 key->clamp_fragment_color = false;
267
268 /* Vulkan always specifies upper-left coordinates */
269 key->drawable_height = 0;
270 key->render_to_fbo = false;
271
272 key->nr_color_regions = render_pass->subpasses[info->subpass].color_count;
273
274 key->replicate_alpha = key->nr_color_regions > 1 &&
275 info->pMultisampleState &&
276 info->pMultisampleState->alphaToCoverageEnable;
277
278 if (info->pMultisampleState && info->pMultisampleState->rasterizationSamples > 1) {
279 /* We should probably pull this out of the shader, but it's fairly
280 * harmless to compute it and then let dead-code take care of it.
281 */
282 key->persample_shading = info->pMultisampleState->sampleShadingEnable;
283 if (key->persample_shading)
284 key->persample_2x = info->pMultisampleState->rasterizationSamples == 2;
285
286 key->compute_pos_offset = info->pMultisampleState->sampleShadingEnable;
287 key->compute_sample_id = info->pMultisampleState->sampleShadingEnable;
288 }
289 }
290
291 static void
292 populate_cs_prog_key(const struct brw_device_info *devinfo,
293 struct brw_cs_prog_key *key)
294 {
295 memset(key, 0, sizeof(*key));
296
297 populate_sampler_prog_key(devinfo, &key->tex);
298 }
299
300 static nir_shader *
301 anv_pipeline_compile(struct anv_pipeline *pipeline,
302 struct anv_shader_module *module,
303 const char *entrypoint,
304 gl_shader_stage stage,
305 struct brw_stage_prog_data *prog_data)
306 {
307 const struct brw_compiler *compiler =
308 pipeline->device->instance->physicalDevice.compiler;
309
310 nir_shader *nir = anv_shader_compile_to_nir(pipeline->device,
311 module, entrypoint, stage);
312 if (nir == NULL)
313 return NULL;
314
315 anv_nir_lower_push_constants(nir, compiler->scalar_stage[stage]);
316
317 /* Figure out the number of parameters */
318 prog_data->nr_params = 0;
319
320 if (nir->num_uniforms > 0) {
321 /* If the shader uses any push constants at all, we'll just give
322 * them the maximum possible number
323 */
324 prog_data->nr_params += MAX_PUSH_CONSTANTS_SIZE / sizeof(float);
325 }
326
327 if (pipeline->layout && pipeline->layout->stage[stage].has_dynamic_offsets)
328 prog_data->nr_params += MAX_DYNAMIC_BUFFERS * 2;
329
330 if (pipeline->layout && pipeline->layout->stage[stage].image_count > 0)
331 prog_data->nr_params += pipeline->layout->stage[stage].image_count *
332 BRW_IMAGE_PARAM_SIZE;
333
334 if (prog_data->nr_params > 0) {
335 /* XXX: I think we're leaking this */
336 prog_data->param = (const union gl_constant_value **)
337 malloc(prog_data->nr_params * sizeof(union gl_constant_value *));
338
339 /* We now set the param values to be offsets into a
340 * anv_push_constant_data structure. Since the compiler doesn't
341 * actually dereference any of the gl_constant_value pointers in the
342 * params array, it doesn't really matter what we put here.
343 */
344 struct anv_push_constants *null_data = NULL;
345 if (nir->num_uniforms > 0) {
346 /* Fill out the push constants section of the param array */
347 for (unsigned i = 0; i < MAX_PUSH_CONSTANTS_SIZE / sizeof(float); i++)
348 prog_data->param[i] = (const union gl_constant_value *)
349 &null_data->client_data[i * sizeof(float)];
350 }
351 }
352
353 /* Set up dynamic offsets */
354 anv_nir_apply_dynamic_offsets(pipeline, nir, prog_data);
355
356 /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
357 if (pipeline->layout)
358 anv_nir_apply_pipeline_layout(nir, prog_data, pipeline->layout);
359
360 /* All binding table offsets provided by apply_pipeline_layout() are
361 * relative to the start of the bindint table (plus MAX_RTS for VS).
362 */
363 unsigned bias;
364 switch (stage) {
365 case MESA_SHADER_FRAGMENT:
366 bias = MAX_RTS;
367 break;
368 case MESA_SHADER_COMPUTE:
369 bias = 1;
370 break;
371 default:
372 bias = 0;
373 break;
374 }
375 prog_data->binding_table.size_bytes = 0;
376 prog_data->binding_table.texture_start = bias;
377 prog_data->binding_table.ubo_start = bias;
378 prog_data->binding_table.ssbo_start = bias;
379 prog_data->binding_table.image_start = bias;
380
381 /* Finish the optimization and compilation process */
382 nir = brw_nir_lower_io(nir, &pipeline->device->info,
383 compiler->scalar_stage[stage]);
384
385 /* nir_lower_io will only handle the push constants; we need to set this
386 * to the full number of possible uniforms.
387 */
388 nir->num_uniforms = prog_data->nr_params * 4;
389
390 return nir;
391 }
392
393 static uint32_t
394 anv_pipeline_upload_kernel(struct anv_pipeline *pipeline,
395 const void *data, size_t size)
396 {
397 struct anv_state state =
398 anv_state_stream_alloc(&pipeline->program_stream, size, 64);
399
400 assert(size < pipeline->program_stream.block_pool->block_size);
401
402 memcpy(state.map, data, size);
403
404 if (!pipeline->device->info.has_llc)
405 anv_state_clflush(state);
406
407 return state.offset;
408 }
409
410 static void
411 anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline,
412 gl_shader_stage stage,
413 struct brw_stage_prog_data *prog_data)
414 {
415 struct brw_device_info *devinfo = &pipeline->device->info;
416 uint32_t max_threads[] = {
417 [MESA_SHADER_VERTEX] = devinfo->max_vs_threads,
418 [MESA_SHADER_TESS_CTRL] = 0,
419 [MESA_SHADER_TESS_EVAL] = 0,
420 [MESA_SHADER_GEOMETRY] = devinfo->max_gs_threads,
421 [MESA_SHADER_FRAGMENT] = devinfo->max_wm_threads,
422 [MESA_SHADER_COMPUTE] = devinfo->max_cs_threads,
423 };
424
425 pipeline->prog_data[stage] = prog_data;
426 pipeline->active_stages |= mesa_to_vk_shader_stage(stage);
427 pipeline->scratch_start[stage] = pipeline->total_scratch;
428 pipeline->total_scratch =
429 align_u32(pipeline->total_scratch, 1024) +
430 prog_data->total_scratch * max_threads[stage];
431 }
432
433 static VkResult
434 anv_pipeline_compile_vs(struct anv_pipeline *pipeline,
435 const VkGraphicsPipelineCreateInfo *info,
436 struct anv_shader_module *module,
437 const char *entrypoint)
438 {
439 const struct brw_compiler *compiler =
440 pipeline->device->instance->physicalDevice.compiler;
441 struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data;
442 struct brw_vs_prog_key key;
443
444 populate_vs_prog_key(&pipeline->device->info, &key);
445
446 /* TODO: Look up shader in cache */
447
448 memset(prog_data, 0, sizeof(*prog_data));
449
450 nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint,
451 MESA_SHADER_VERTEX,
452 &prog_data->base.base);
453 if (nir == NULL)
454 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
455
456 void *mem_ctx = ralloc_context(NULL);
457
458 if (module->nir == NULL)
459 ralloc_steal(mem_ctx, nir);
460
461 prog_data->inputs_read = nir->info.inputs_read;
462 pipeline->writes_point_size = nir->info.outputs_written & VARYING_SLOT_PSIZ;
463
464 brw_compute_vue_map(&pipeline->device->info,
465 &prog_data->base.vue_map,
466 nir->info.outputs_written,
467 nir->info.separate_shader);
468
469 unsigned code_size;
470 const unsigned *shader_code =
471 brw_compile_vs(compiler, NULL, mem_ctx, &key, prog_data, nir,
472 NULL, false, -1, &code_size, NULL);
473 if (shader_code == NULL) {
474 ralloc_free(mem_ctx);
475 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
476 }
477
478 const uint32_t offset =
479 anv_pipeline_upload_kernel(pipeline, shader_code, code_size);
480 if (prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8) {
481 pipeline->vs_simd8 = offset;
482 pipeline->vs_vec4 = NO_KERNEL;
483 } else {
484 pipeline->vs_simd8 = NO_KERNEL;
485 pipeline->vs_vec4 = offset;
486 }
487
488 ralloc_free(mem_ctx);
489
490 anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_VERTEX,
491 &prog_data->base.base);
492
493 return VK_SUCCESS;
494 }
495
496 static VkResult
497 anv_pipeline_compile_gs(struct anv_pipeline *pipeline,
498 const VkGraphicsPipelineCreateInfo *info,
499 struct anv_shader_module *module,
500 const char *entrypoint)
501 {
502 const struct brw_compiler *compiler =
503 pipeline->device->instance->physicalDevice.compiler;
504 struct brw_gs_prog_data *prog_data = &pipeline->gs_prog_data;
505 struct brw_gs_prog_key key;
506
507 populate_gs_prog_key(&pipeline->device->info, &key);
508
509 /* TODO: Look up shader in cache */
510
511 memset(prog_data, 0, sizeof(*prog_data));
512
513 nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint,
514 MESA_SHADER_GEOMETRY,
515 &prog_data->base.base);
516 if (nir == NULL)
517 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
518
519 void *mem_ctx = ralloc_context(NULL);
520
521 if (module->nir == NULL)
522 ralloc_steal(mem_ctx, nir);
523
524 brw_compute_vue_map(&pipeline->device->info,
525 &prog_data->base.vue_map,
526 nir->info.outputs_written,
527 nir->info.separate_shader);
528
529 unsigned code_size;
530 const unsigned *shader_code =
531 brw_compile_gs(compiler, NULL, mem_ctx, &key, prog_data, nir,
532 NULL, -1, &code_size, NULL);
533 if (shader_code == NULL) {
534 ralloc_free(mem_ctx);
535 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
536 }
537
538 /* TODO: SIMD8 GS */
539 pipeline->gs_kernel =
540 anv_pipeline_upload_kernel(pipeline, shader_code, code_size);
541 pipeline->gs_vertex_count = nir->info.gs.vertices_in;
542
543 ralloc_free(mem_ctx);
544
545 anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_GEOMETRY,
546 &prog_data->base.base);
547
548 return VK_SUCCESS;
549 }
550
551 static VkResult
552 anv_pipeline_compile_fs(struct anv_pipeline *pipeline,
553 const VkGraphicsPipelineCreateInfo *info,
554 struct anv_shader_module *module,
555 const char *entrypoint)
556 {
557 const struct brw_compiler *compiler =
558 pipeline->device->instance->physicalDevice.compiler;
559 struct brw_wm_prog_data *prog_data = &pipeline->wm_prog_data;
560 struct brw_wm_prog_key key;
561
562 populate_wm_prog_key(&pipeline->device->info, info, &key);
563
564 if (pipeline->use_repclear)
565 key.nr_color_regions = 1;
566
567 /* TODO: Look up shader in cache */
568
569 memset(prog_data, 0, sizeof(*prog_data));
570
571 prog_data->binding_table.render_target_start = 0;
572
573 nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint,
574 MESA_SHADER_FRAGMENT,
575 &prog_data->base);
576 if (nir == NULL)
577 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
578
579 void *mem_ctx = ralloc_context(NULL);
580
581 if (module->nir == NULL)
582 ralloc_steal(mem_ctx, nir);
583
584 unsigned code_size;
585 const unsigned *shader_code =
586 brw_compile_fs(compiler, NULL, mem_ctx, &key, prog_data, nir,
587 NULL, -1, -1, pipeline->use_repclear, &code_size, NULL);
588 if (shader_code == NULL) {
589 ralloc_free(mem_ctx);
590 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
591 }
592
593 uint32_t offset = anv_pipeline_upload_kernel(pipeline,
594 shader_code, code_size);
595 if (prog_data->no_8)
596 pipeline->ps_simd8 = NO_KERNEL;
597 else
598 pipeline->ps_simd8 = offset;
599
600 if (prog_data->no_8 || prog_data->prog_offset_16) {
601 pipeline->ps_simd16 = offset + prog_data->prog_offset_16;
602 } else {
603 pipeline->ps_simd16 = NO_KERNEL;
604 }
605
606 pipeline->ps_ksp2 = 0;
607 pipeline->ps_grf_start2 = 0;
608 if (pipeline->ps_simd8 != NO_KERNEL) {
609 pipeline->ps_ksp0 = pipeline->ps_simd8;
610 pipeline->ps_grf_start0 = prog_data->base.dispatch_grf_start_reg;
611 if (pipeline->ps_simd16 != NO_KERNEL) {
612 pipeline->ps_ksp2 = pipeline->ps_simd16;
613 pipeline->ps_grf_start2 = prog_data->dispatch_grf_start_reg_16;
614 }
615 } else if (pipeline->ps_simd16 != NO_KERNEL) {
616 pipeline->ps_ksp0 = pipeline->ps_simd16;
617 pipeline->ps_grf_start0 = prog_data->dispatch_grf_start_reg_16;
618 }
619
620 ralloc_free(mem_ctx);
621
622 anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_FRAGMENT,
623 &prog_data->base);
624
625 return VK_SUCCESS;
626 }
627
628 VkResult
629 anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
630 const VkComputePipelineCreateInfo *info,
631 struct anv_shader_module *module,
632 const char *entrypoint)
633 {
634 const struct brw_compiler *compiler =
635 pipeline->device->instance->physicalDevice.compiler;
636 struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
637 struct brw_cs_prog_key key;
638
639 populate_cs_prog_key(&pipeline->device->info, &key);
640
641 /* TODO: Look up shader in cache */
642
643 memset(prog_data, 0, sizeof(*prog_data));
644
645 prog_data->binding_table.work_groups_start = 0;
646
647 nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint,
648 MESA_SHADER_COMPUTE,
649 &prog_data->base);
650 if (nir == NULL)
651 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
652
653 void *mem_ctx = ralloc_context(NULL);
654
655 if (module->nir == NULL)
656 ralloc_steal(mem_ctx, nir);
657
658 unsigned code_size;
659 const unsigned *shader_code =
660 brw_compile_cs(compiler, NULL, mem_ctx, &key, prog_data, nir,
661 -1, &code_size, NULL);
662 if (shader_code == NULL) {
663 ralloc_free(mem_ctx);
664 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
665 }
666
667 pipeline->cs_simd = anv_pipeline_upload_kernel(pipeline,
668 shader_code, code_size);
669 ralloc_free(mem_ctx);
670
671 anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_COMPUTE,
672 &prog_data->base);
673
674 return VK_SUCCESS;
675 }
676
677 static const int gen8_push_size = 32 * 1024;
678
679 static void
680 gen7_compute_urb_partition(struct anv_pipeline *pipeline)
681 {
682 const struct brw_device_info *devinfo = &pipeline->device->info;
683 bool vs_present = pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT;
684 unsigned vs_size = vs_present ? pipeline->vs_prog_data.base.urb_entry_size : 1;
685 unsigned vs_entry_size_bytes = vs_size * 64;
686 bool gs_present = pipeline->active_stages & VK_SHADER_STAGE_GEOMETRY_BIT;
687 unsigned gs_size = gs_present ? pipeline->gs_prog_data.base.urb_entry_size : 1;
688 unsigned gs_entry_size_bytes = gs_size * 64;
689
690 /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS):
691 *
692 * VS Number of URB Entries must be divisible by 8 if the VS URB Entry
693 * Allocation Size is less than 9 512-bit URB entries.
694 *
695 * Similar text exists for GS.
696 */
697 unsigned vs_granularity = (vs_size < 9) ? 8 : 1;
698 unsigned gs_granularity = (gs_size < 9) ? 8 : 1;
699
700 /* URB allocations must be done in 8k chunks. */
701 unsigned chunk_size_bytes = 8192;
702
703 /* Determine the size of the URB in chunks. */
704 unsigned urb_chunks = devinfo->urb.size * 1024 / chunk_size_bytes;
705
706 /* Reserve space for push constants */
707 unsigned push_constant_bytes = gen8_push_size;
708 unsigned push_constant_chunks =
709 push_constant_bytes / chunk_size_bytes;
710
711 /* Initially, assign each stage the minimum amount of URB space it needs,
712 * and make a note of how much additional space it "wants" (the amount of
713 * additional space it could actually make use of).
714 */
715
716 /* VS has a lower limit on the number of URB entries */
717 unsigned vs_chunks =
718 ALIGN(devinfo->urb.min_vs_entries * vs_entry_size_bytes,
719 chunk_size_bytes) / chunk_size_bytes;
720 unsigned vs_wants =
721 ALIGN(devinfo->urb.max_vs_entries * vs_entry_size_bytes,
722 chunk_size_bytes) / chunk_size_bytes - vs_chunks;
723
724 unsigned gs_chunks = 0;
725 unsigned gs_wants = 0;
726 if (gs_present) {
727 /* There are two constraints on the minimum amount of URB space we can
728 * allocate:
729 *
730 * (1) We need room for at least 2 URB entries, since we always operate
731 * the GS in DUAL_OBJECT mode.
732 *
733 * (2) We can't allocate less than nr_gs_entries_granularity.
734 */
735 gs_chunks = ALIGN(MAX2(gs_granularity, 2) * gs_entry_size_bytes,
736 chunk_size_bytes) / chunk_size_bytes;
737 gs_wants =
738 ALIGN(devinfo->urb.max_gs_entries * gs_entry_size_bytes,
739 chunk_size_bytes) / chunk_size_bytes - gs_chunks;
740 }
741
742 /* There should always be enough URB space to satisfy the minimum
743 * requirements of each stage.
744 */
745 unsigned total_needs = push_constant_chunks + vs_chunks + gs_chunks;
746 assert(total_needs <= urb_chunks);
747
748 /* Mete out remaining space (if any) in proportion to "wants". */
749 unsigned total_wants = vs_wants + gs_wants;
750 unsigned remaining_space = urb_chunks - total_needs;
751 if (remaining_space > total_wants)
752 remaining_space = total_wants;
753 if (remaining_space > 0) {
754 unsigned vs_additional = (unsigned)
755 round(vs_wants * (((double) remaining_space) / total_wants));
756 vs_chunks += vs_additional;
757 remaining_space -= vs_additional;
758 gs_chunks += remaining_space;
759 }
760
761 /* Sanity check that we haven't over-allocated. */
762 assert(push_constant_chunks + vs_chunks + gs_chunks <= urb_chunks);
763
764 /* Finally, compute the number of entries that can fit in the space
765 * allocated to each stage.
766 */
767 unsigned nr_vs_entries = vs_chunks * chunk_size_bytes / vs_entry_size_bytes;
768 unsigned nr_gs_entries = gs_chunks * chunk_size_bytes / gs_entry_size_bytes;
769
770 /* Since we rounded up when computing *_wants, this may be slightly more
771 * than the maximum allowed amount, so correct for that.
772 */
773 nr_vs_entries = MIN2(nr_vs_entries, devinfo->urb.max_vs_entries);
774 nr_gs_entries = MIN2(nr_gs_entries, devinfo->urb.max_gs_entries);
775
776 /* Ensure that we program a multiple of the granularity. */
777 nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, vs_granularity);
778 nr_gs_entries = ROUND_DOWN_TO(nr_gs_entries, gs_granularity);
779
780 /* Finally, sanity check to make sure we have at least the minimum number
781 * of entries needed for each stage.
782 */
783 assert(nr_vs_entries >= devinfo->urb.min_vs_entries);
784 if (gs_present)
785 assert(nr_gs_entries >= 2);
786
787 /* Lay out the URB in the following order:
788 * - push constants
789 * - VS
790 * - GS
791 */
792 pipeline->urb.vs_start = push_constant_chunks;
793 pipeline->urb.vs_size = vs_size;
794 pipeline->urb.nr_vs_entries = nr_vs_entries;
795
796 pipeline->urb.gs_start = push_constant_chunks + vs_chunks;
797 pipeline->urb.gs_size = gs_size;
798 pipeline->urb.nr_gs_entries = nr_gs_entries;
799 }
800
801 static void
802 anv_pipeline_init_dynamic_state(struct anv_pipeline *pipeline,
803 const VkGraphicsPipelineCreateInfo *pCreateInfo)
804 {
805 anv_cmd_dirty_mask_t states = ANV_CMD_DIRTY_DYNAMIC_ALL;
806 ANV_FROM_HANDLE(anv_render_pass, pass, pCreateInfo->renderPass);
807 struct anv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass];
808
809 pipeline->dynamic_state = default_dynamic_state;
810
811 if (pCreateInfo->pDynamicState) {
812 /* Remove all of the states that are marked as dynamic */
813 uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount;
814 for (uint32_t s = 0; s < count; s++)
815 states &= ~(1 << pCreateInfo->pDynamicState->pDynamicStates[s]);
816 }
817
818 struct anv_dynamic_state *dynamic = &pipeline->dynamic_state;
819
820 dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount;
821 if (states & (1 << VK_DYNAMIC_STATE_VIEWPORT)) {
822 typed_memcpy(dynamic->viewport.viewports,
823 pCreateInfo->pViewportState->pViewports,
824 pCreateInfo->pViewportState->viewportCount);
825 }
826
827 dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount;
828 if (states & (1 << VK_DYNAMIC_STATE_SCISSOR)) {
829 typed_memcpy(dynamic->scissor.scissors,
830 pCreateInfo->pViewportState->pScissors,
831 pCreateInfo->pViewportState->scissorCount);
832 }
833
834 if (states & (1 << VK_DYNAMIC_STATE_LINE_WIDTH)) {
835 assert(pCreateInfo->pRasterizationState);
836 dynamic->line_width = pCreateInfo->pRasterizationState->lineWidth;
837 }
838
839 if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS)) {
840 assert(pCreateInfo->pRasterizationState);
841 dynamic->depth_bias.bias =
842 pCreateInfo->pRasterizationState->depthBiasConstantFactor;
843 dynamic->depth_bias.clamp =
844 pCreateInfo->pRasterizationState->depthBiasClamp;
845 dynamic->depth_bias.slope =
846 pCreateInfo->pRasterizationState->depthBiasSlopeFactor;
847 }
848
849 if (states & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS)) {
850 assert(pCreateInfo->pColorBlendState);
851 typed_memcpy(dynamic->blend_constants,
852 pCreateInfo->pColorBlendState->blendConstants, 4);
853 }
854
855 /* If there is no depthstencil attachment, then don't read
856 * pDepthStencilState. The Vulkan spec states that pDepthStencilState may
857 * be NULL in this case. Even if pDepthStencilState is non-NULL, there is
858 * no need to override the depthstencil defaults in
859 * anv_pipeline::dynamic_state when there is no depthstencil attachment.
860 *
861 * From the Vulkan spec (20 Oct 2015, git-aa308cb):
862 *
863 * pDepthStencilState [...] may only be NULL if renderPass and subpass
864 * specify a subpass that has no depth/stencil attachment.
865 */
866 if (subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) {
867 if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BOUNDS)) {
868 assert(pCreateInfo->pDepthStencilState);
869 dynamic->depth_bounds.min =
870 pCreateInfo->pDepthStencilState->minDepthBounds;
871 dynamic->depth_bounds.max =
872 pCreateInfo->pDepthStencilState->maxDepthBounds;
873 }
874
875 if (states & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) {
876 assert(pCreateInfo->pDepthStencilState);
877 dynamic->stencil_compare_mask.front =
878 pCreateInfo->pDepthStencilState->front.compareMask;
879 dynamic->stencil_compare_mask.back =
880 pCreateInfo->pDepthStencilState->back.compareMask;
881 }
882
883 if (states & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) {
884 assert(pCreateInfo->pDepthStencilState);
885 dynamic->stencil_write_mask.front =
886 pCreateInfo->pDepthStencilState->front.writeMask;
887 dynamic->stencil_write_mask.back =
888 pCreateInfo->pDepthStencilState->back.writeMask;
889 }
890
891 if (states & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) {
892 assert(pCreateInfo->pDepthStencilState);
893 dynamic->stencil_reference.front =
894 pCreateInfo->pDepthStencilState->front.reference;
895 dynamic->stencil_reference.back =
896 pCreateInfo->pDepthStencilState->back.reference;
897 }
898 }
899
900 pipeline->dynamic_state_mask = states;
901 }
902
903 static void
904 anv_pipeline_validate_create_info(const VkGraphicsPipelineCreateInfo *info)
905 {
906 struct anv_render_pass *renderpass = NULL;
907 struct anv_subpass *subpass = NULL;
908
909 /* Assert that all required members of VkGraphicsPipelineCreateInfo are
910 * present, as explained by the Vulkan (20 Oct 2015, git-aa308cb), Section
911 * 4.2 Graphics Pipeline.
912 */
913 assert(info->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
914
915 renderpass = anv_render_pass_from_handle(info->renderPass);
916 assert(renderpass);
917
918 if (renderpass != &anv_meta_dummy_renderpass) {
919 assert(info->subpass < renderpass->subpass_count);
920 subpass = &renderpass->subpasses[info->subpass];
921 }
922
923 assert(info->stageCount >= 1);
924 assert(info->pVertexInputState);
925 assert(info->pInputAssemblyState);
926 assert(info->pViewportState);
927 assert(info->pRasterizationState);
928
929 if (subpass && subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED)
930 assert(info->pDepthStencilState);
931
932 if (subpass && subpass->color_count > 0)
933 assert(info->pColorBlendState);
934
935 for (uint32_t i = 0; i < info->stageCount; ++i) {
936 switch (info->pStages[i].stage) {
937 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
938 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
939 assert(info->pTessellationState);
940 break;
941 default:
942 break;
943 }
944 }
945 }
946
947 VkResult
948 anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device,
949 const VkGraphicsPipelineCreateInfo *pCreateInfo,
950 const struct anv_graphics_pipeline_create_info *extra,
951 const VkAllocationCallbacks *alloc)
952 {
953 anv_validate {
954 anv_pipeline_validate_create_info(pCreateInfo);
955 }
956
957 if (alloc == NULL)
958 alloc = &device->alloc;
959
960 pipeline->device = device;
961 pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout);
962
963 anv_reloc_list_init(&pipeline->batch_relocs, alloc);
964 /* TODO: Handle allocation fail */
965
966 pipeline->batch.alloc = alloc;
967 pipeline->batch.next = pipeline->batch.start = pipeline->batch_data;
968 pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data);
969 pipeline->batch.relocs = &pipeline->batch_relocs;
970
971 anv_state_stream_init(&pipeline->program_stream,
972 &device->instruction_block_pool);
973
974 anv_pipeline_init_dynamic_state(pipeline, pCreateInfo);
975
976 if (pCreateInfo->pTessellationState)
977 anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO");
978 if (pCreateInfo->pMultisampleState &&
979 pCreateInfo->pMultisampleState->rasterizationSamples > 1)
980 anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO");
981
982 pipeline->use_repclear = extra && extra->use_repclear;
983 pipeline->writes_point_size = false;
984
985 /* When we free the pipeline, we detect stages based on the NULL status
986 * of various prog_data pointers. Make them NULL by default.
987 */
988 memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data));
989 memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start));
990
991 pipeline->vs_simd8 = NO_KERNEL;
992 pipeline->vs_vec4 = NO_KERNEL;
993 pipeline->gs_kernel = NO_KERNEL;
994
995 pipeline->active_stages = 0;
996 pipeline->total_scratch = 0;
997
998 for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
999 ANV_FROM_HANDLE(anv_shader_module, module,
1000 pCreateInfo->pStages[i].module);
1001 const char *entrypoint = pCreateInfo->pStages[i].pName;
1002
1003 switch (pCreateInfo->pStages[i].stage) {
1004 case VK_SHADER_STAGE_VERTEX_BIT:
1005 anv_pipeline_compile_vs(pipeline, pCreateInfo, module, entrypoint);
1006 break;
1007 case VK_SHADER_STAGE_GEOMETRY_BIT:
1008 anv_pipeline_compile_gs(pipeline, pCreateInfo, module, entrypoint);
1009 break;
1010 case VK_SHADER_STAGE_FRAGMENT_BIT:
1011 anv_pipeline_compile_fs(pipeline, pCreateInfo, module, entrypoint);
1012 break;
1013 default:
1014 anv_finishme("Unsupported shader stage");
1015 }
1016 }
1017
1018 if (!(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT)) {
1019 /* Vertex is only optional if disable_vs is set */
1020 assert(extra->disable_vs);
1021 memset(&pipeline->vs_prog_data, 0, sizeof(pipeline->vs_prog_data));
1022 }
1023
1024 gen7_compute_urb_partition(pipeline);
1025
1026 const VkPipelineVertexInputStateCreateInfo *vi_info =
1027 pCreateInfo->pVertexInputState;
1028
1029 uint64_t inputs_read;
1030 if (extra && extra->disable_vs) {
1031 /* If the VS is disabled, just assume the user knows what they're
1032 * doing and apply the layout blindly. This can only come from
1033 * meta, so this *should* be safe.
1034 */
1035 inputs_read = ~0ull;
1036 } else {
1037 inputs_read = pipeline->vs_prog_data.inputs_read;
1038 }
1039
1040 pipeline->vb_used = 0;
1041 for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
1042 const VkVertexInputAttributeDescription *desc =
1043 &vi_info->pVertexAttributeDescriptions[i];
1044
1045 if (inputs_read & (1 << (VERT_ATTRIB_GENERIC0 + desc->location)))
1046 pipeline->vb_used |= 1 << desc->binding;
1047 }
1048
1049 for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
1050 const VkVertexInputBindingDescription *desc =
1051 &vi_info->pVertexBindingDescriptions[i];
1052
1053 pipeline->binding_stride[desc->binding] = desc->stride;
1054
1055 /* Step rate is programmed per vertex element (attribute), not
1056 * binding. Set up a map of which bindings step per instance, for
1057 * reference by vertex element setup. */
1058 switch (desc->inputRate) {
1059 default:
1060 case VK_VERTEX_INPUT_RATE_VERTEX:
1061 pipeline->instancing_enable[desc->binding] = false;
1062 break;
1063 case VK_VERTEX_INPUT_RATE_INSTANCE:
1064 pipeline->instancing_enable[desc->binding] = true;
1065 break;
1066 }
1067 }
1068
1069 const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1070 pCreateInfo->pInputAssemblyState;
1071 pipeline->primitive_restart = ia_info->primitiveRestartEnable;
1072 pipeline->topology = vk_to_gen_primitive_type[ia_info->topology];
1073
1074 if (extra && extra->use_rectlist)
1075 pipeline->topology = _3DPRIM_RECTLIST;
1076
1077 return VK_SUCCESS;
1078 }
1079
1080 VkResult
1081 anv_graphics_pipeline_create(
1082 VkDevice _device,
1083 const VkGraphicsPipelineCreateInfo *pCreateInfo,
1084 const struct anv_graphics_pipeline_create_info *extra,
1085 const VkAllocationCallbacks *pAllocator,
1086 VkPipeline *pPipeline)
1087 {
1088 ANV_FROM_HANDLE(anv_device, device, _device);
1089
1090 switch (device->info.gen) {
1091 case 7:
1092 if (device->info.is_haswell)
1093 return gen75_graphics_pipeline_create(_device, pCreateInfo, extra, pAllocator, pPipeline);
1094 else
1095 return gen7_graphics_pipeline_create(_device, pCreateInfo, extra, pAllocator, pPipeline);
1096 case 8:
1097 return gen8_graphics_pipeline_create(_device, pCreateInfo, extra, pAllocator, pPipeline);
1098 case 9:
1099 return gen9_graphics_pipeline_create(_device, pCreateInfo, extra, pAllocator, pPipeline);
1100 default:
1101 unreachable("unsupported gen\n");
1102 }
1103 }
1104
1105 VkResult anv_CreateGraphicsPipelines(
1106 VkDevice _device,
1107 VkPipelineCache pipelineCache,
1108 uint32_t count,
1109 const VkGraphicsPipelineCreateInfo* pCreateInfos,
1110 const VkAllocationCallbacks* pAllocator,
1111 VkPipeline* pPipelines)
1112 {
1113 VkResult result = VK_SUCCESS;
1114
1115 unsigned i = 0;
1116 for (; i < count; i++) {
1117 result = anv_graphics_pipeline_create(_device, &pCreateInfos[i],
1118 NULL, pAllocator, &pPipelines[i]);
1119 if (result != VK_SUCCESS) {
1120 for (unsigned j = 0; j < i; j++) {
1121 anv_DestroyPipeline(_device, pPipelines[j], pAllocator);
1122 }
1123
1124 return result;
1125 }
1126 }
1127
1128 return VK_SUCCESS;
1129 }
1130
1131 static VkResult anv_compute_pipeline_create(
1132 VkDevice _device,
1133 const VkComputePipelineCreateInfo* pCreateInfo,
1134 const VkAllocationCallbacks* pAllocator,
1135 VkPipeline* pPipeline)
1136 {
1137 ANV_FROM_HANDLE(anv_device, device, _device);
1138
1139 switch (device->info.gen) {
1140 case 7:
1141 if (device->info.is_haswell)
1142 return gen75_compute_pipeline_create(_device, pCreateInfo, pAllocator, pPipeline);
1143 else
1144 return gen7_compute_pipeline_create(_device, pCreateInfo, pAllocator, pPipeline);
1145 case 8:
1146 return gen8_compute_pipeline_create(_device, pCreateInfo, pAllocator, pPipeline);
1147 case 9:
1148 return gen9_compute_pipeline_create(_device, pCreateInfo, pAllocator, pPipeline);
1149 default:
1150 unreachable("unsupported gen\n");
1151 }
1152 }
1153
1154 VkResult anv_CreateComputePipelines(
1155 VkDevice _device,
1156 VkPipelineCache pipelineCache,
1157 uint32_t count,
1158 const VkComputePipelineCreateInfo* pCreateInfos,
1159 const VkAllocationCallbacks* pAllocator,
1160 VkPipeline* pPipelines)
1161 {
1162 VkResult result = VK_SUCCESS;
1163
1164 unsigned i = 0;
1165 for (; i < count; i++) {
1166 result = anv_compute_pipeline_create(_device, &pCreateInfos[i],
1167 pAllocator, &pPipelines[i]);
1168 if (result != VK_SUCCESS) {
1169 for (unsigned j = 0; j < i; j++) {
1170 anv_DestroyPipeline(_device, pPipelines[j], pAllocator);
1171 }
1172
1173 return result;
1174 }
1175 }
1176
1177 return VK_SUCCESS;
1178 }