anv/meta_clear: Don't trash state if no clears are needed
[mesa.git] / src / vulkan / anv_pipeline.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29
30 #include "anv_private.h"
31 #include "brw_nir.h"
32 #include "anv_nir.h"
33 #include "glsl/nir/nir_spirv.h"
34
35 /* Needed for SWIZZLE macros */
36 #include "program/prog_instruction.h"
37
38 // Shader functions
39
40 VkResult anv_CreateShaderModule(
41 VkDevice _device,
42 const VkShaderModuleCreateInfo* pCreateInfo,
43 VkShaderModule* pShaderModule)
44 {
45 ANV_FROM_HANDLE(anv_device, device, _device);
46 struct anv_shader_module *module;
47
48 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO);
49 assert(pCreateInfo->flags == 0);
50
51 module = anv_device_alloc(device, sizeof(*module) + pCreateInfo->codeSize, 8,
52 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
53 if (module == NULL)
54 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
55
56 module->nir = NULL;
57 module->size = pCreateInfo->codeSize;
58 memcpy(module->data, pCreateInfo->pCode, module->size);
59
60 *pShaderModule = anv_shader_module_to_handle(module);
61
62 return VK_SUCCESS;
63 }
64
65 void anv_DestroyShaderModule(
66 VkDevice _device,
67 VkShaderModule _module)
68 {
69 ANV_FROM_HANDLE(anv_device, device, _device);
70 ANV_FROM_HANDLE(anv_shader_module, module, _module);
71
72 anv_device_free(device, module);
73 }
74
75 VkResult anv_CreateShader(
76 VkDevice _device,
77 const VkShaderCreateInfo* pCreateInfo,
78 VkShader* pShader)
79 {
80 ANV_FROM_HANDLE(anv_device, device, _device);
81 ANV_FROM_HANDLE(anv_shader_module, module, pCreateInfo->module);
82 struct anv_shader *shader;
83
84 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_CREATE_INFO);
85 assert(pCreateInfo->flags == 0);
86
87 const char *name = pCreateInfo->pName ? pCreateInfo->pName : "main";
88 size_t name_len = strlen(name);
89
90 shader = anv_device_alloc(device, sizeof(*shader) + name_len + 1, 8,
91 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
92 if (shader == NULL)
93 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
94
95 shader->module = module,
96 memcpy(shader->entrypoint, name, name_len + 1);
97
98 *pShader = anv_shader_to_handle(shader);
99
100 return VK_SUCCESS;
101 }
102
103 void anv_DestroyShader(
104 VkDevice _device,
105 VkShader _shader)
106 {
107 ANV_FROM_HANDLE(anv_device, device, _device);
108 ANV_FROM_HANDLE(anv_shader, shader, _shader);
109
110 anv_device_free(device, shader);
111 }
112
113 #define SPIR_V_MAGIC_NUMBER 0x07230203
114
115 static const gl_shader_stage vk_shader_stage_to_mesa_stage[] = {
116 [VK_SHADER_STAGE_VERTEX] = MESA_SHADER_VERTEX,
117 [VK_SHADER_STAGE_TESS_CONTROL] = -1,
118 [VK_SHADER_STAGE_TESS_EVALUATION] = -1,
119 [VK_SHADER_STAGE_GEOMETRY] = MESA_SHADER_GEOMETRY,
120 [VK_SHADER_STAGE_FRAGMENT] = MESA_SHADER_FRAGMENT,
121 [VK_SHADER_STAGE_COMPUTE] = MESA_SHADER_COMPUTE,
122 };
123
124 bool
125 anv_is_scalar_shader_stage(const struct brw_compiler *compiler,
126 VkShaderStage stage)
127 {
128 switch (stage) {
129 case VK_SHADER_STAGE_VERTEX:
130 return compiler->scalar_vs;
131 case VK_SHADER_STAGE_GEOMETRY:
132 return false;
133 case VK_SHADER_STAGE_FRAGMENT:
134 case VK_SHADER_STAGE_COMPUTE:
135 return true;
136 default:
137 unreachable("Unsupported shader stage");
138 }
139 }
140
141 /* Eventually, this will become part of anv_CreateShader. Unfortunately,
142 * we can't do that yet because we don't have the ability to copy nir.
143 */
144 static nir_shader *
145 anv_shader_compile_to_nir(struct anv_device *device,
146 struct anv_shader *shader, VkShaderStage vk_stage)
147 {
148 if (strcmp(shader->entrypoint, "main") != 0) {
149 anv_finishme("Multiple shaders per module not really supported");
150 }
151
152 gl_shader_stage stage = vk_shader_stage_to_mesa_stage[vk_stage];
153 const struct brw_compiler *compiler =
154 device->instance->physicalDevice.compiler;
155 const nir_shader_compiler_options *nir_options =
156 compiler->glsl_compiler_options[stage].NirOptions;
157
158 nir_shader *nir;
159 if (shader->module->nir) {
160 /* Some things such as our meta clear/blit code will give us a NIR
161 * shader directly. In that case, we just ignore the SPIR-V entirely
162 * and just use the NIR shader */
163 nir = shader->module->nir;
164 nir->options = nir_options;
165 } else {
166 uint32_t *spirv = (uint32_t *) shader->module->data;
167 assert(spirv[0] == SPIR_V_MAGIC_NUMBER);
168 assert(shader->module->size % 4 == 0);
169
170 nir = spirv_to_nir(spirv, shader->module->size / 4, stage, nir_options);
171 }
172 nir_validate_shader(nir);
173
174 /* Vulkan uses the separate-shader linking model */
175 nir->info.separate_shader = true;
176
177 /* Make sure the provided shader has exactly one entrypoint and that the
178 * name matches the name that came in from the VkShader.
179 */
180 nir_function_impl *entrypoint = NULL;
181 nir_foreach_overload(nir, overload) {
182 if (strcmp(shader->entrypoint, overload->function->name) == 0 &&
183 overload->impl) {
184 assert(entrypoint == NULL);
185 entrypoint = overload->impl;
186 }
187 }
188 assert(entrypoint != NULL);
189
190 brw_preprocess_nir(nir, &device->info,
191 anv_is_scalar_shader_stage(compiler, vk_stage));
192
193 nir_shader_gather_info(nir, entrypoint);
194
195 return nir;
196 }
197
198 VkResult anv_CreatePipelineCache(
199 VkDevice device,
200 const VkPipelineCacheCreateInfo* pCreateInfo,
201 VkPipelineCache* pPipelineCache)
202 {
203 pPipelineCache->handle = 1;
204
205 stub_return(VK_SUCCESS);
206 }
207
208 void anv_DestroyPipelineCache(
209 VkDevice _device,
210 VkPipelineCache _cache)
211 {
212 }
213
214 size_t anv_GetPipelineCacheSize(
215 VkDevice device,
216 VkPipelineCache pipelineCache)
217 {
218 stub_return(0);
219 }
220
221 VkResult anv_GetPipelineCacheData(
222 VkDevice device,
223 VkPipelineCache pipelineCache,
224 void* pData)
225 {
226 stub_return(VK_UNSUPPORTED);
227 }
228
229 VkResult anv_MergePipelineCaches(
230 VkDevice device,
231 VkPipelineCache destCache,
232 uint32_t srcCacheCount,
233 const VkPipelineCache* pSrcCaches)
234 {
235 stub_return(VK_UNSUPPORTED);
236 }
237
238 void anv_DestroyPipeline(
239 VkDevice _device,
240 VkPipeline _pipeline)
241 {
242 ANV_FROM_HANDLE(anv_device, device, _device);
243 ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
244
245 anv_reloc_list_finish(&pipeline->batch_relocs, pipeline->device);
246 anv_state_stream_finish(&pipeline->program_stream);
247 if (pipeline->blend_state.map)
248 anv_state_pool_free(&device->dynamic_state_pool, pipeline->blend_state);
249 anv_device_free(pipeline->device, pipeline);
250 }
251
252 static const uint32_t vk_to_gen_primitive_type[] = {
253 [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST,
254 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST,
255 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP,
256 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST,
257 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
258 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
259 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_ADJ] = _3DPRIM_LINELIST_ADJ,
260 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_ADJ] = _3DPRIM_LINESTRIP_ADJ,
261 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_ADJ] = _3DPRIM_TRILIST_ADJ,
262 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_ADJ] = _3DPRIM_TRISTRIP_ADJ,
263 /* [VK_PRIMITIVE_TOPOLOGY_PATCH] = _3DPRIM_PATCHLIST_1 */
264 };
265
266 static void
267 populate_sampler_prog_key(const struct brw_device_info *devinfo,
268 struct brw_sampler_prog_key_data *key)
269 {
270 /* XXX: Handle texture swizzle on HSW- */
271 for (int i = 0; i < MAX_SAMPLERS; i++) {
272 /* Assume color sampler, no swizzling. (Works for BDW+) */
273 key->swizzles[i] = SWIZZLE_XYZW;
274 }
275 }
276
277 static void
278 populate_vs_prog_key(const struct brw_device_info *devinfo,
279 struct brw_vs_prog_key *key)
280 {
281 memset(key, 0, sizeof(*key));
282
283 populate_sampler_prog_key(devinfo, &key->tex);
284
285 /* XXX: Handle vertex input work-arounds */
286
287 /* XXX: Handle sampler_prog_key */
288 }
289
290 static void
291 populate_gs_prog_key(const struct brw_device_info *devinfo,
292 struct brw_gs_prog_key *key)
293 {
294 memset(key, 0, sizeof(*key));
295
296 populate_sampler_prog_key(devinfo, &key->tex);
297 }
298
299 static void
300 populate_wm_prog_key(const struct brw_device_info *devinfo,
301 const VkGraphicsPipelineCreateInfo *info,
302 struct brw_wm_prog_key *key)
303 {
304 ANV_FROM_HANDLE(anv_render_pass, render_pass, info->renderPass);
305
306 memset(key, 0, sizeof(*key));
307
308 populate_sampler_prog_key(devinfo, &key->tex);
309
310 /* TODO: Fill out key->input_slots_valid */
311
312 /* Vulkan doesn't specify a default */
313 key->high_quality_derivatives = false;
314
315 /* XXX Vulkan doesn't appear to specify */
316 key->clamp_fragment_color = false;
317
318 /* Vulkan always specifies upper-left coordinates */
319 key->drawable_height = 0;
320 key->render_to_fbo = false;
321
322 key->nr_color_regions = render_pass->subpasses[info->subpass].color_count;
323
324 key->replicate_alpha = key->nr_color_regions > 1 &&
325 info->pColorBlendState->alphaToCoverageEnable;
326
327 if (info->pMultisampleState && info->pMultisampleState->rasterSamples > 1) {
328 /* We should probably pull this out of the shader, but it's fairly
329 * harmless to compute it and then let dead-code take care of it.
330 */
331 key->persample_shading = info->pMultisampleState->sampleShadingEnable;
332 if (key->persample_shading)
333 key->persample_2x = info->pMultisampleState->rasterSamples == 2;
334
335 key->compute_pos_offset = info->pMultisampleState->sampleShadingEnable;
336 key->compute_sample_id = info->pMultisampleState->sampleShadingEnable;
337 }
338 }
339
340 static void
341 populate_cs_prog_key(const struct brw_device_info *devinfo,
342 struct brw_cs_prog_key *key)
343 {
344 memset(key, 0, sizeof(*key));
345
346 populate_sampler_prog_key(devinfo, &key->tex);
347 }
348
349 static nir_shader *
350 anv_pipeline_compile(struct anv_pipeline *pipeline,
351 struct anv_shader *shader,
352 VkShaderStage stage,
353 struct brw_stage_prog_data *prog_data)
354 {
355 const struct brw_compiler *compiler =
356 pipeline->device->instance->physicalDevice.compiler;
357
358 nir_shader *nir = anv_shader_compile_to_nir(pipeline->device, shader, stage);
359 if (nir == NULL)
360 return NULL;
361
362 anv_nir_lower_push_constants(nir, anv_is_scalar_shader_stage(compiler, stage));
363
364 /* Figure out the number of parameters */
365 prog_data->nr_params = 0;
366
367 if (nir->num_uniforms > 0) {
368 /* If the shader uses any push constants at all, we'll just give
369 * them the maximum possible number
370 */
371 prog_data->nr_params += MAX_PUSH_CONSTANTS_SIZE / sizeof(float);
372 }
373
374 if (pipeline->layout && pipeline->layout->stage[stage].has_dynamic_offsets)
375 prog_data->nr_params += MAX_DYNAMIC_BUFFERS * 2;
376
377 if (prog_data->nr_params > 0) {
378 prog_data->param = (const gl_constant_value **)
379 anv_device_alloc(pipeline->device,
380 prog_data->nr_params * sizeof(gl_constant_value *),
381 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL_SHADER);
382
383 /* We now set the param values to be offsets into a
384 * anv_push_constant_data structure. Since the compiler doesn't
385 * actually dereference any of the gl_constant_value pointers in the
386 * params array, it doesn't really matter what we put here.
387 */
388 struct anv_push_constants *null_data = NULL;
389 if (nir->num_uniforms > 0) {
390 /* Fill out the push constants section of the param array */
391 for (unsigned i = 0; i < MAX_PUSH_CONSTANTS_SIZE / sizeof(float); i++)
392 prog_data->param[i] = (const gl_constant_value *)
393 &null_data->client_data[i * sizeof(float)];
394 }
395 }
396
397 /* Set up dynamic offsets */
398 anv_nir_apply_dynamic_offsets(pipeline, nir, prog_data);
399
400 /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
401 anv_nir_apply_pipeline_layout(nir, pipeline->layout);
402
403 /* All binding table offsets provided by apply_pipeline_layout() are
404 * relative to the start of the bindint table (plus MAX_RTS for VS).
405 */
406 unsigned bias = stage == VK_SHADER_STAGE_FRAGMENT ? MAX_RTS : 0;
407 prog_data->binding_table.size_bytes = 0;
408 prog_data->binding_table.texture_start = bias;
409 prog_data->binding_table.ubo_start = bias;
410 prog_data->binding_table.ssbo_start = bias;
411 prog_data->binding_table.image_start = bias;
412
413 /* Finish the optimization and compilation process */
414 brw_postprocess_nir(nir, &pipeline->device->info,
415 anv_is_scalar_shader_stage(compiler, stage));
416
417 /* nir_lower_io will only handle the push constants; we need to set this
418 * to the full number of possible uniforms.
419 */
420 nir->num_uniforms = prog_data->nr_params;
421
422 return nir;
423 }
424
425 static uint32_t
426 anv_pipeline_upload_kernel(struct anv_pipeline *pipeline,
427 const void *data, size_t size)
428 {
429 struct anv_state state =
430 anv_state_stream_alloc(&pipeline->program_stream, size, 64);
431
432 assert(size < pipeline->program_stream.block_pool->block_size);
433
434 memcpy(state.map, data, size);
435
436 return state.offset;
437 }
438 static void
439 anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline,
440 VkShaderStage stage,
441 struct brw_stage_prog_data *prog_data)
442 {
443 struct brw_device_info *devinfo = &pipeline->device->info;
444 uint32_t max_threads[] = {
445 [VK_SHADER_STAGE_VERTEX] = devinfo->max_vs_threads,
446 [VK_SHADER_STAGE_TESS_CONTROL] = 0,
447 [VK_SHADER_STAGE_TESS_EVALUATION] = 0,
448 [VK_SHADER_STAGE_GEOMETRY] = devinfo->max_gs_threads,
449 [VK_SHADER_STAGE_FRAGMENT] = devinfo->max_wm_threads,
450 [VK_SHADER_STAGE_COMPUTE] = devinfo->max_cs_threads,
451 };
452
453 pipeline->prog_data[stage] = prog_data;
454 pipeline->active_stages |= 1 << stage;
455 pipeline->scratch_start[stage] = pipeline->total_scratch;
456 pipeline->total_scratch =
457 align_u32(pipeline->total_scratch, 1024) +
458 prog_data->total_scratch * max_threads[stage];
459 }
460
461 static VkResult
462 anv_pipeline_compile_vs(struct anv_pipeline *pipeline,
463 const VkGraphicsPipelineCreateInfo *info,
464 struct anv_shader *shader)
465 {
466 const struct brw_compiler *compiler =
467 pipeline->device->instance->physicalDevice.compiler;
468 struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data;
469 struct brw_vs_prog_key key;
470
471 populate_vs_prog_key(&pipeline->device->info, &key);
472
473 /* TODO: Look up shader in cache */
474
475 memset(prog_data, 0, sizeof(*prog_data));
476
477 nir_shader *nir = anv_pipeline_compile(pipeline, shader,
478 VK_SHADER_STAGE_VERTEX,
479 &prog_data->base.base);
480 if (nir == NULL)
481 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
482
483 void *mem_ctx = ralloc_context(NULL);
484
485 if (shader->module->nir == NULL)
486 ralloc_steal(mem_ctx, nir);
487
488 prog_data->inputs_read = nir->info.inputs_read;
489 pipeline->writes_point_size = nir->info.outputs_written & VARYING_SLOT_PSIZ;
490
491 brw_compute_vue_map(&pipeline->device->info,
492 &prog_data->base.vue_map,
493 nir->info.outputs_written,
494 nir->info.separate_shader);
495
496 unsigned code_size;
497 const unsigned *shader_code =
498 brw_compile_vs(compiler, NULL, mem_ctx, &key, prog_data, nir,
499 NULL, false, -1, &code_size, NULL);
500 if (shader_code == NULL) {
501 ralloc_free(mem_ctx);
502 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
503 }
504
505 const uint32_t offset =
506 anv_pipeline_upload_kernel(pipeline, shader_code, code_size);
507 if (prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8) {
508 pipeline->vs_simd8 = offset;
509 pipeline->vs_vec4 = NO_KERNEL;
510 } else {
511 pipeline->vs_simd8 = NO_KERNEL;
512 pipeline->vs_vec4 = offset;
513 }
514
515 ralloc_free(mem_ctx);
516
517 anv_pipeline_add_compiled_stage(pipeline, VK_SHADER_STAGE_VERTEX,
518 &prog_data->base.base);
519
520 return VK_SUCCESS;
521 }
522
523 static VkResult
524 anv_pipeline_compile_gs(struct anv_pipeline *pipeline,
525 const VkGraphicsPipelineCreateInfo *info,
526 struct anv_shader *shader)
527 {
528 const struct brw_compiler *compiler =
529 pipeline->device->instance->physicalDevice.compiler;
530 struct brw_gs_prog_data *prog_data = &pipeline->gs_prog_data;
531 struct brw_gs_prog_key key;
532
533 populate_gs_prog_key(&pipeline->device->info, &key);
534
535 /* TODO: Look up shader in cache */
536
537 memset(prog_data, 0, sizeof(*prog_data));
538
539 nir_shader *nir = anv_pipeline_compile(pipeline, shader,
540 VK_SHADER_STAGE_GEOMETRY,
541 &prog_data->base.base);
542 if (nir == NULL)
543 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
544
545 void *mem_ctx = ralloc_context(NULL);
546
547 if (shader->module->nir == NULL)
548 ralloc_steal(mem_ctx, nir);
549
550 brw_compute_vue_map(&pipeline->device->info,
551 &prog_data->base.vue_map,
552 nir->info.outputs_written,
553 nir->info.separate_shader);
554
555 unsigned code_size;
556 const unsigned *shader_code =
557 brw_compile_gs(compiler, NULL, mem_ctx, &key, prog_data, nir,
558 NULL, -1, &code_size, NULL);
559 if (shader_code == NULL) {
560 ralloc_free(mem_ctx);
561 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
562 }
563
564 /* TODO: SIMD8 GS */
565 pipeline->gs_vec4 =
566 anv_pipeline_upload_kernel(pipeline, shader_code, code_size);
567 pipeline->gs_vertex_count = nir->info.gs.vertices_in;
568
569 ralloc_free(mem_ctx);
570
571 anv_pipeline_add_compiled_stage(pipeline, VK_SHADER_STAGE_GEOMETRY,
572 &prog_data->base.base);
573
574 return VK_SUCCESS;
575 }
576
577 static VkResult
578 anv_pipeline_compile_fs(struct anv_pipeline *pipeline,
579 const VkGraphicsPipelineCreateInfo *info,
580 struct anv_shader *shader)
581 {
582 const struct brw_compiler *compiler =
583 pipeline->device->instance->physicalDevice.compiler;
584 struct brw_wm_prog_data *prog_data = &pipeline->wm_prog_data;
585 struct brw_wm_prog_key key;
586
587 populate_wm_prog_key(&pipeline->device->info, info, &key);
588
589 if (pipeline->use_repclear)
590 key.nr_color_regions = 1;
591
592 /* TODO: Look up shader in cache */
593
594 memset(prog_data, 0, sizeof(*prog_data));
595
596 prog_data->binding_table.render_target_start = 0;
597
598 nir_shader *nir = anv_pipeline_compile(pipeline, shader,
599 VK_SHADER_STAGE_FRAGMENT,
600 &prog_data->base);
601 if (nir == NULL)
602 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
603
604 void *mem_ctx = ralloc_context(NULL);
605
606 if (shader->module->nir == NULL)
607 ralloc_steal(mem_ctx, nir);
608
609 unsigned code_size;
610 const unsigned *shader_code =
611 brw_compile_fs(compiler, NULL, mem_ctx, &key, prog_data, nir,
612 NULL, -1, -1, pipeline->use_repclear, &code_size, NULL);
613 if (shader_code == NULL) {
614 ralloc_free(mem_ctx);
615 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
616 }
617
618 uint32_t offset = anv_pipeline_upload_kernel(pipeline,
619 shader_code, code_size);
620 if (prog_data->no_8)
621 pipeline->ps_simd8 = NO_KERNEL;
622 else
623 pipeline->ps_simd8 = offset;
624
625 if (prog_data->no_8 || prog_data->prog_offset_16) {
626 pipeline->ps_simd16 = offset + prog_data->prog_offset_16;
627 } else {
628 pipeline->ps_simd16 = NO_KERNEL;
629 }
630
631 pipeline->ps_ksp2 = 0;
632 pipeline->ps_grf_start2 = 0;
633 if (pipeline->ps_simd8 != NO_KERNEL) {
634 pipeline->ps_ksp0 = pipeline->ps_simd8;
635 pipeline->ps_grf_start0 = prog_data->base.dispatch_grf_start_reg;
636 if (pipeline->ps_simd16 != NO_KERNEL) {
637 pipeline->ps_ksp2 = pipeline->ps_simd16;
638 pipeline->ps_grf_start2 = prog_data->dispatch_grf_start_reg_16;
639 }
640 } else if (pipeline->ps_simd16 != NO_KERNEL) {
641 pipeline->ps_ksp0 = pipeline->ps_simd16;
642 pipeline->ps_grf_start0 = prog_data->dispatch_grf_start_reg_16;
643 }
644
645 ralloc_free(mem_ctx);
646
647 anv_pipeline_add_compiled_stage(pipeline, VK_SHADER_STAGE_FRAGMENT,
648 &prog_data->base);
649
650 return VK_SUCCESS;
651 }
652
653 VkResult
654 anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
655 const VkComputePipelineCreateInfo *info,
656 struct anv_shader *shader)
657 {
658 const struct brw_compiler *compiler =
659 pipeline->device->instance->physicalDevice.compiler;
660 struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
661 struct brw_cs_prog_key key;
662
663 populate_cs_prog_key(&pipeline->device->info, &key);
664
665 /* TODO: Look up shader in cache */
666
667 memset(prog_data, 0, sizeof(*prog_data));
668
669 nir_shader *nir = anv_pipeline_compile(pipeline, shader,
670 VK_SHADER_STAGE_COMPUTE,
671 &prog_data->base);
672 if (nir == NULL)
673 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
674
675 void *mem_ctx = ralloc_context(NULL);
676
677 if (shader->module->nir == NULL)
678 ralloc_steal(mem_ctx, nir);
679
680 unsigned code_size;
681 const unsigned *shader_code =
682 brw_compile_cs(compiler, NULL, mem_ctx, &key, prog_data, nir,
683 -1, &code_size, NULL);
684 if (shader_code == NULL) {
685 ralloc_free(mem_ctx);
686 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
687 }
688
689 pipeline->cs_simd = anv_pipeline_upload_kernel(pipeline,
690 shader_code, code_size);
691 ralloc_free(mem_ctx);
692
693 anv_pipeline_add_compiled_stage(pipeline, VK_SHADER_STAGE_COMPUTE,
694 &prog_data->base);
695
696 return VK_SUCCESS;
697 }
698
699 static const int gen8_push_size = 32 * 1024;
700
701 static void
702 gen7_compute_urb_partition(struct anv_pipeline *pipeline)
703 {
704 const struct brw_device_info *devinfo = &pipeline->device->info;
705 bool vs_present = pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT;
706 unsigned vs_size = vs_present ? pipeline->vs_prog_data.base.urb_entry_size : 1;
707 unsigned vs_entry_size_bytes = vs_size * 64;
708 bool gs_present = pipeline->active_stages & VK_SHADER_STAGE_GEOMETRY_BIT;
709 unsigned gs_size = gs_present ? pipeline->gs_prog_data.base.urb_entry_size : 1;
710 unsigned gs_entry_size_bytes = gs_size * 64;
711
712 /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS):
713 *
714 * VS Number of URB Entries must be divisible by 8 if the VS URB Entry
715 * Allocation Size is less than 9 512-bit URB entries.
716 *
717 * Similar text exists for GS.
718 */
719 unsigned vs_granularity = (vs_size < 9) ? 8 : 1;
720 unsigned gs_granularity = (gs_size < 9) ? 8 : 1;
721
722 /* URB allocations must be done in 8k chunks. */
723 unsigned chunk_size_bytes = 8192;
724
725 /* Determine the size of the URB in chunks. */
726 unsigned urb_chunks = devinfo->urb.size * 1024 / chunk_size_bytes;
727
728 /* Reserve space for push constants */
729 unsigned push_constant_bytes = gen8_push_size;
730 unsigned push_constant_chunks =
731 push_constant_bytes / chunk_size_bytes;
732
733 /* Initially, assign each stage the minimum amount of URB space it needs,
734 * and make a note of how much additional space it "wants" (the amount of
735 * additional space it could actually make use of).
736 */
737
738 /* VS has a lower limit on the number of URB entries */
739 unsigned vs_chunks =
740 ALIGN(devinfo->urb.min_vs_entries * vs_entry_size_bytes,
741 chunk_size_bytes) / chunk_size_bytes;
742 unsigned vs_wants =
743 ALIGN(devinfo->urb.max_vs_entries * vs_entry_size_bytes,
744 chunk_size_bytes) / chunk_size_bytes - vs_chunks;
745
746 unsigned gs_chunks = 0;
747 unsigned gs_wants = 0;
748 if (gs_present) {
749 /* There are two constraints on the minimum amount of URB space we can
750 * allocate:
751 *
752 * (1) We need room for at least 2 URB entries, since we always operate
753 * the GS in DUAL_OBJECT mode.
754 *
755 * (2) We can't allocate less than nr_gs_entries_granularity.
756 */
757 gs_chunks = ALIGN(MAX2(gs_granularity, 2) * gs_entry_size_bytes,
758 chunk_size_bytes) / chunk_size_bytes;
759 gs_wants =
760 ALIGN(devinfo->urb.max_gs_entries * gs_entry_size_bytes,
761 chunk_size_bytes) / chunk_size_bytes - gs_chunks;
762 }
763
764 /* There should always be enough URB space to satisfy the minimum
765 * requirements of each stage.
766 */
767 unsigned total_needs = push_constant_chunks + vs_chunks + gs_chunks;
768 assert(total_needs <= urb_chunks);
769
770 /* Mete out remaining space (if any) in proportion to "wants". */
771 unsigned total_wants = vs_wants + gs_wants;
772 unsigned remaining_space = urb_chunks - total_needs;
773 if (remaining_space > total_wants)
774 remaining_space = total_wants;
775 if (remaining_space > 0) {
776 unsigned vs_additional = (unsigned)
777 round(vs_wants * (((double) remaining_space) / total_wants));
778 vs_chunks += vs_additional;
779 remaining_space -= vs_additional;
780 gs_chunks += remaining_space;
781 }
782
783 /* Sanity check that we haven't over-allocated. */
784 assert(push_constant_chunks + vs_chunks + gs_chunks <= urb_chunks);
785
786 /* Finally, compute the number of entries that can fit in the space
787 * allocated to each stage.
788 */
789 unsigned nr_vs_entries = vs_chunks * chunk_size_bytes / vs_entry_size_bytes;
790 unsigned nr_gs_entries = gs_chunks * chunk_size_bytes / gs_entry_size_bytes;
791
792 /* Since we rounded up when computing *_wants, this may be slightly more
793 * than the maximum allowed amount, so correct for that.
794 */
795 nr_vs_entries = MIN2(nr_vs_entries, devinfo->urb.max_vs_entries);
796 nr_gs_entries = MIN2(nr_gs_entries, devinfo->urb.max_gs_entries);
797
798 /* Ensure that we program a multiple of the granularity. */
799 nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, vs_granularity);
800 nr_gs_entries = ROUND_DOWN_TO(nr_gs_entries, gs_granularity);
801
802 /* Finally, sanity check to make sure we have at least the minimum number
803 * of entries needed for each stage.
804 */
805 assert(nr_vs_entries >= devinfo->urb.min_vs_entries);
806 if (gs_present)
807 assert(nr_gs_entries >= 2);
808
809 /* Lay out the URB in the following order:
810 * - push constants
811 * - VS
812 * - GS
813 */
814 pipeline->urb.vs_start = push_constant_chunks;
815 pipeline->urb.vs_size = vs_size;
816 pipeline->urb.nr_vs_entries = nr_vs_entries;
817
818 pipeline->urb.gs_start = push_constant_chunks + vs_chunks;
819 pipeline->urb.gs_size = gs_size;
820 pipeline->urb.nr_gs_entries = nr_gs_entries;
821 }
822
823 static void
824 anv_pipeline_init_dynamic_state(struct anv_pipeline *pipeline,
825 const VkGraphicsPipelineCreateInfo *pCreateInfo)
826 {
827 anv_cmd_dirty_mask_t states = ANV_CMD_DIRTY_DYNAMIC_ALL;
828 ANV_FROM_HANDLE(anv_render_pass, pass, pCreateInfo->renderPass);
829 struct anv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass];
830
831 pipeline->dynamic_state = default_dynamic_state;
832
833 if (pCreateInfo->pDynamicState) {
834 /* Remove all of the states that are marked as dynamic */
835 uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount;
836 for (uint32_t s = 0; s < count; s++)
837 states &= ~(1 << pCreateInfo->pDynamicState->pDynamicStates[s]);
838 }
839
840 struct anv_dynamic_state *dynamic = &pipeline->dynamic_state;
841
842 dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount;
843 if (states & (1 << VK_DYNAMIC_STATE_VIEWPORT)) {
844 typed_memcpy(dynamic->viewport.viewports,
845 pCreateInfo->pViewportState->pViewports,
846 pCreateInfo->pViewportState->viewportCount);
847 }
848
849 dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount;
850 if (states & (1 << VK_DYNAMIC_STATE_SCISSOR)) {
851 typed_memcpy(dynamic->scissor.scissors,
852 pCreateInfo->pViewportState->pScissors,
853 pCreateInfo->pViewportState->scissorCount);
854 }
855
856 if (states & (1 << VK_DYNAMIC_STATE_LINE_WIDTH)) {
857 assert(pCreateInfo->pRasterState);
858 dynamic->line_width = pCreateInfo->pRasterState->lineWidth;
859 }
860
861 if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS)) {
862 assert(pCreateInfo->pRasterState);
863 dynamic->depth_bias.bias = pCreateInfo->pRasterState->depthBias;
864 dynamic->depth_bias.clamp = pCreateInfo->pRasterState->depthBiasClamp;
865 dynamic->depth_bias.slope_scaled =
866 pCreateInfo->pRasterState->slopeScaledDepthBias;
867 }
868
869 if (states & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS)) {
870 assert(pCreateInfo->pColorBlendState);
871 typed_memcpy(dynamic->blend_constants,
872 pCreateInfo->pColorBlendState->blendConst, 4);
873 }
874
875 /* If there is no depthstencil attachment, then don't read
876 * pDepthStencilState. The Vulkan spec states that pDepthStencilState may
877 * be NULL in this case. Even if pDepthStencilState is non-NULL, there is
878 * no need to override the depthstencil defaults in
879 * anv_pipeline::dynamic_state when there is no depthstencil attachment.
880 *
881 * From the Vulkan spec (20 Oct 2015, git-aa308cb):
882 *
883 * pDepthStencilState [...] may only be NULL if renderPass and subpass
884 * specify a subpass that has no depth/stencil attachment.
885 */
886 if (subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) {
887 if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BOUNDS)) {
888 assert(pCreateInfo->pDepthStencilState);
889 dynamic->depth_bounds.min =
890 pCreateInfo->pDepthStencilState->minDepthBounds;
891 dynamic->depth_bounds.max =
892 pCreateInfo->pDepthStencilState->maxDepthBounds;
893 }
894
895 if (states & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) {
896 assert(pCreateInfo->pDepthStencilState);
897 dynamic->stencil_compare_mask.front =
898 pCreateInfo->pDepthStencilState->front.stencilCompareMask;
899 dynamic->stencil_compare_mask.back =
900 pCreateInfo->pDepthStencilState->back.stencilCompareMask;
901 }
902
903 if (states & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) {
904 assert(pCreateInfo->pDepthStencilState);
905 dynamic->stencil_write_mask.front =
906 pCreateInfo->pDepthStencilState->front.stencilWriteMask;
907 dynamic->stencil_write_mask.back =
908 pCreateInfo->pDepthStencilState->back.stencilWriteMask;
909 }
910
911 if (states & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) {
912 assert(pCreateInfo->pDepthStencilState);
913 dynamic->stencil_reference.front =
914 pCreateInfo->pDepthStencilState->front.stencilReference;
915 dynamic->stencil_reference.back =
916 pCreateInfo->pDepthStencilState->back.stencilReference;
917 }
918 }
919
920 pipeline->dynamic_state_mask = states;
921 }
922
923 static void
924 anv_pipeline_validate_create_info(const VkGraphicsPipelineCreateInfo *info)
925 {
926 struct anv_render_pass *renderpass = NULL;
927 struct anv_subpass *subpass = NULL;
928
929 /* Assert that all required members of VkGraphicsPipelineCreateInfo are
930 * present, as explained by the Vulkan (20 Oct 2015, git-aa308cb), Section
931 * 4.2 Graphics Pipeline.
932 */
933 assert(info->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
934
935 renderpass = anv_render_pass_from_handle(info->renderPass);
936 assert(renderpass);
937
938 if (renderpass != &anv_meta_dummy_renderpass) {
939 assert(info->subpass < renderpass->subpass_count);
940 subpass = &renderpass->subpasses[info->subpass];
941 }
942
943 assert(info->stageCount >= 1);
944 assert(info->pVertexInputState);
945 assert(info->pInputAssemblyState);
946 assert(info->pViewportState);
947 assert(info->pRasterState);
948 assert(info->pMultisampleState);
949
950 if (subpass && subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED)
951 assert(info->pDepthStencilState);
952
953 if (subpass && subpass->color_count > 0)
954 assert(info->pColorBlendState);
955
956 for (uint32_t i = 0; i < info->stageCount; ++i) {
957 switch (info->pStages[i].stage) {
958 case VK_SHADER_STAGE_TESS_CONTROL:
959 case VK_SHADER_STAGE_TESS_EVALUATION:
960 assert(info->pTessellationState);
961 break;
962 default:
963 break;
964 }
965 }
966 }
967
968 VkResult
969 anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device,
970 const VkGraphicsPipelineCreateInfo *pCreateInfo,
971 const struct anv_graphics_pipeline_create_info *extra)
972 {
973 VkResult result;
974
975 anv_validate {
976 anv_pipeline_validate_create_info(pCreateInfo);
977 }
978
979 pipeline->device = device;
980 pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout);
981
982 result = anv_reloc_list_init(&pipeline->batch_relocs, device);
983 if (result != VK_SUCCESS) {
984 anv_device_free(device, pipeline);
985 return result;
986 }
987 pipeline->batch.next = pipeline->batch.start = pipeline->batch_data;
988 pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data);
989 pipeline->batch.relocs = &pipeline->batch_relocs;
990
991 anv_state_stream_init(&pipeline->program_stream,
992 &device->instruction_block_pool);
993
994 anv_pipeline_init_dynamic_state(pipeline, pCreateInfo);
995
996 if (pCreateInfo->pTessellationState)
997 anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO");
998 if (pCreateInfo->pMultisampleState &&
999 pCreateInfo->pMultisampleState->rasterSamples > 1)
1000 anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO");
1001
1002 pipeline->use_repclear = extra && extra->use_repclear;
1003 pipeline->writes_point_size = false;
1004
1005 /* When we free the pipeline, we detect stages based on the NULL status
1006 * of various prog_data pointers. Make them NULL by default.
1007 */
1008 memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data));
1009 memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start));
1010
1011 pipeline->vs_simd8 = NO_KERNEL;
1012 pipeline->vs_vec4 = NO_KERNEL;
1013 pipeline->gs_vec4 = NO_KERNEL;
1014
1015 pipeline->active_stages = 0;
1016 pipeline->total_scratch = 0;
1017
1018 for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
1019 ANV_FROM_HANDLE(anv_shader, shader, pCreateInfo->pStages[i].shader);
1020
1021 switch (pCreateInfo->pStages[i].stage) {
1022 case VK_SHADER_STAGE_VERTEX:
1023 anv_pipeline_compile_vs(pipeline, pCreateInfo, shader);
1024 break;
1025 case VK_SHADER_STAGE_GEOMETRY:
1026 anv_pipeline_compile_gs(pipeline, pCreateInfo, shader);
1027 break;
1028 case VK_SHADER_STAGE_FRAGMENT:
1029 anv_pipeline_compile_fs(pipeline, pCreateInfo, shader);
1030 break;
1031 default:
1032 anv_finishme("Unsupported shader stage");
1033 }
1034 }
1035
1036 if (!(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT)) {
1037 /* Vertex is only optional if disable_vs is set */
1038 assert(extra->disable_vs);
1039 memset(&pipeline->vs_prog_data, 0, sizeof(pipeline->vs_prog_data));
1040 }
1041
1042 gen7_compute_urb_partition(pipeline);
1043
1044 const VkPipelineVertexInputStateCreateInfo *vi_info =
1045 pCreateInfo->pVertexInputState;
1046 pipeline->vb_used = 0;
1047 for (uint32_t i = 0; i < vi_info->bindingCount; i++) {
1048 const VkVertexInputBindingDescription *desc =
1049 &vi_info->pVertexBindingDescriptions[i];
1050
1051 pipeline->vb_used |= 1 << desc->binding;
1052 pipeline->binding_stride[desc->binding] = desc->strideInBytes;
1053
1054 /* Step rate is programmed per vertex element (attribute), not
1055 * binding. Set up a map of which bindings step per instance, for
1056 * reference by vertex element setup. */
1057 switch (desc->stepRate) {
1058 default:
1059 case VK_VERTEX_INPUT_STEP_RATE_VERTEX:
1060 pipeline->instancing_enable[desc->binding] = false;
1061 break;
1062 case VK_VERTEX_INPUT_STEP_RATE_INSTANCE:
1063 pipeline->instancing_enable[desc->binding] = true;
1064 break;
1065 }
1066 }
1067
1068 const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1069 pCreateInfo->pInputAssemblyState;
1070 pipeline->primitive_restart = ia_info->primitiveRestartEnable;
1071 pipeline->topology = vk_to_gen_primitive_type[ia_info->topology];
1072
1073 if (extra && extra->use_rectlist)
1074 pipeline->topology = _3DPRIM_RECTLIST;
1075
1076 return VK_SUCCESS;
1077 }
1078
1079 VkResult
1080 anv_graphics_pipeline_create(
1081 VkDevice _device,
1082 const VkGraphicsPipelineCreateInfo *pCreateInfo,
1083 const struct anv_graphics_pipeline_create_info *extra,
1084 VkPipeline *pPipeline)
1085 {
1086 ANV_FROM_HANDLE(anv_device, device, _device);
1087
1088 switch (device->info.gen) {
1089 case 7:
1090 if (device->info.is_haswell)
1091 return gen75_graphics_pipeline_create(_device, pCreateInfo, extra, pPipeline);
1092 else
1093 return gen7_graphics_pipeline_create(_device, pCreateInfo, extra, pPipeline);
1094 case 8:
1095 return gen8_graphics_pipeline_create(_device, pCreateInfo, extra, pPipeline);
1096 default:
1097 unreachable("unsupported gen\n");
1098 }
1099 }
1100
1101 VkResult anv_CreateGraphicsPipelines(
1102 VkDevice _device,
1103 VkPipelineCache pipelineCache,
1104 uint32_t count,
1105 const VkGraphicsPipelineCreateInfo* pCreateInfos,
1106 VkPipeline* pPipelines)
1107 {
1108 VkResult result = VK_SUCCESS;
1109
1110 unsigned i = 0;
1111 for (; i < count; i++) {
1112 result = anv_graphics_pipeline_create(_device, &pCreateInfos[i],
1113 NULL, &pPipelines[i]);
1114 if (result != VK_SUCCESS) {
1115 for (unsigned j = 0; j < i; j++) {
1116 anv_DestroyPipeline(_device, pPipelines[j]);
1117 }
1118
1119 return result;
1120 }
1121 }
1122
1123 return VK_SUCCESS;
1124 }
1125
1126 static VkResult anv_compute_pipeline_create(
1127 VkDevice _device,
1128 const VkComputePipelineCreateInfo* pCreateInfo,
1129 VkPipeline* pPipeline)
1130 {
1131 ANV_FROM_HANDLE(anv_device, device, _device);
1132
1133 switch (device->info.gen) {
1134 case 7:
1135 if (device->info.is_haswell)
1136 return gen75_compute_pipeline_create(_device, pCreateInfo, pPipeline);
1137 else
1138 return gen7_compute_pipeline_create(_device, pCreateInfo, pPipeline);
1139 case 8:
1140 return gen8_compute_pipeline_create(_device, pCreateInfo, pPipeline);
1141 default:
1142 unreachable("unsupported gen\n");
1143 }
1144 }
1145
1146 VkResult anv_CreateComputePipelines(
1147 VkDevice _device,
1148 VkPipelineCache pipelineCache,
1149 uint32_t count,
1150 const VkComputePipelineCreateInfo* pCreateInfos,
1151 VkPipeline* pPipelines)
1152 {
1153 VkResult result = VK_SUCCESS;
1154
1155 unsigned i = 0;
1156 for (; i < count; i++) {
1157 result = anv_compute_pipeline_create(_device, &pCreateInfos[i],
1158 &pPipelines[i]);
1159 if (result != VK_SUCCESS) {
1160 for (unsigned j = 0; j < i; j++) {
1161 anv_DestroyPipeline(_device, pPipelines[j]);
1162 }
1163
1164 return result;
1165 }
1166 }
1167
1168 return VK_SUCCESS;
1169 }