turnip: set shader key msaa field
[mesa.git] / src / freedreno / vulkan / tu_shader.c
1 /*
2 * Copyright © 2019 Google LLC
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "tu_private.h"
25
26 #include "spirv/nir_spirv.h"
27 #include "util/mesa-sha1.h"
28 #include "nir/nir_xfb_info.h"
29
30 #include "ir3/ir3_nir.h"
31
32 static nir_shader *
33 tu_spirv_to_nir(struct ir3_compiler *compiler,
34 const uint32_t *words,
35 size_t word_count,
36 gl_shader_stage stage,
37 const char *entry_point_name,
38 const VkSpecializationInfo *spec_info)
39 {
40 /* TODO these are made-up */
41 const struct spirv_to_nir_options spirv_options = {
42 .frag_coord_is_sysval = true,
43 .lower_ubo_ssbo_access_to_offsets = true,
44 .caps = {
45 .transform_feedback = compiler->gpu_id >= 600,
46 },
47 };
48 const nir_shader_compiler_options *nir_options =
49 ir3_get_compiler_options(compiler);
50
51 /* convert VkSpecializationInfo */
52 struct nir_spirv_specialization *spec = NULL;
53 uint32_t num_spec = 0;
54 if (spec_info && spec_info->mapEntryCount) {
55 spec = malloc(sizeof(*spec) * spec_info->mapEntryCount);
56 if (!spec)
57 return NULL;
58
59 for (uint32_t i = 0; i < spec_info->mapEntryCount; i++) {
60 const VkSpecializationMapEntry *entry = &spec_info->pMapEntries[i];
61 const void *data = spec_info->pData + entry->offset;
62 assert(data + entry->size <= spec_info->pData + spec_info->dataSize);
63 spec[i].id = entry->constantID;
64 if (entry->size == 8)
65 spec[i].data64 = *(const uint64_t *) data;
66 else
67 spec[i].data32 = *(const uint32_t *) data;
68 spec[i].defined_on_module = false;
69 }
70
71 num_spec = spec_info->mapEntryCount;
72 }
73
74 nir_shader *nir =
75 spirv_to_nir(words, word_count, spec, num_spec, stage, entry_point_name,
76 &spirv_options, nir_options);
77
78 free(spec);
79
80 assert(nir->info.stage == stage);
81 nir_validate_shader(nir, "after spirv_to_nir");
82
83 return nir;
84 }
85
86 static void
87 lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *instr,
88 struct tu_shader *shader)
89 {
90 nir_intrinsic_instr *load =
91 nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_uniform);
92 load->num_components = instr->num_components;
93 uint32_t base = nir_intrinsic_base(instr);
94 assert(base % 4 == 0);
95 assert(base >= shader->push_consts.lo * 16);
96 base -= shader->push_consts.lo * 16;
97 nir_intrinsic_set_base(load, base / 4);
98 load->src[0] =
99 nir_src_for_ssa(nir_ushr(b, instr->src[0].ssa, nir_imm_int(b, 2)));
100 nir_ssa_dest_init(&load->instr, &load->dest,
101 load->num_components, instr->dest.ssa.bit_size,
102 instr->dest.ssa.name);
103 nir_builder_instr_insert(b, &load->instr);
104 nir_ssa_def_rewrite_uses(&instr->dest.ssa, nir_src_for_ssa(&load->dest.ssa));
105
106 nir_instr_remove(&instr->instr);
107 }
108
109 static void
110 lower_vulkan_resource_index(nir_builder *b, nir_intrinsic_instr *instr,
111 struct tu_shader *shader,
112 const struct tu_pipeline_layout *layout)
113 {
114 nir_ssa_def *vulkan_idx = instr->src[0].ssa;
115
116 unsigned set = nir_intrinsic_desc_set(instr);
117 unsigned binding = nir_intrinsic_binding(instr);
118 struct tu_descriptor_set_layout *set_layout = layout->set[set].layout;
119 struct tu_descriptor_set_binding_layout *binding_layout =
120 &set_layout->binding[binding];
121 uint32_t base;
122
123 switch (binding_layout->type) {
124 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
125 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
126 base = layout->set[set].dynamic_offset_start +
127 binding_layout->dynamic_offset_offset +
128 layout->input_attachment_count;
129 set = MAX_SETS;
130 break;
131 default:
132 base = binding_layout->offset / (4 * A6XX_TEX_CONST_DWORDS);
133 break;
134 }
135
136 nir_intrinsic_instr *bindless =
137 nir_intrinsic_instr_create(b->shader,
138 nir_intrinsic_bindless_resource_ir3);
139 bindless->num_components = 1;
140 nir_ssa_dest_init(&bindless->instr, &bindless->dest,
141 1, 32, NULL);
142 nir_intrinsic_set_desc_set(bindless, set);
143 bindless->src[0] = nir_src_for_ssa(nir_iadd(b, nir_imm_int(b, base), vulkan_idx));
144 nir_builder_instr_insert(b, &bindless->instr);
145
146 nir_ssa_def_rewrite_uses(&instr->dest.ssa,
147 nir_src_for_ssa(&bindless->dest.ssa));
148 nir_instr_remove(&instr->instr);
149 }
150
151 static nir_ssa_def *
152 build_bindless(nir_builder *b, nir_deref_instr *deref, bool is_sampler,
153 struct tu_shader *shader,
154 const struct tu_pipeline_layout *layout)
155 {
156 nir_variable *var = nir_deref_instr_get_variable(deref);
157
158 unsigned set = var->data.descriptor_set;
159 unsigned binding = var->data.binding;
160 const struct tu_descriptor_set_binding_layout *bind_layout =
161 &layout->set[set].layout->binding[binding];
162
163 nir_ssa_def *desc_offset;
164 unsigned descriptor_stride;
165 if (bind_layout->type == VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT) {
166 unsigned offset =
167 layout->set[set].input_attachment_start +
168 bind_layout->input_attachment_offset;
169 desc_offset = nir_imm_int(b, offset);
170 set = MAX_SETS;
171 descriptor_stride = 1;
172 } else {
173 unsigned offset = 0;
174 /* Samplers come second in combined image/sampler descriptors, see
175 * write_combined_image_sampler_descriptor().
176 */
177 if (is_sampler && bind_layout->type ==
178 VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
179 offset = 1;
180 }
181 desc_offset =
182 nir_imm_int(b, (bind_layout->offset / (4 * A6XX_TEX_CONST_DWORDS)) +
183 offset);
184 descriptor_stride = bind_layout->size / (4 * A6XX_TEX_CONST_DWORDS);
185 }
186
187 if (deref->deref_type != nir_deref_type_var) {
188 assert(deref->deref_type == nir_deref_type_array);
189
190 nir_ssa_def *arr_index = nir_ssa_for_src(b, deref->arr.index, 1);
191 desc_offset = nir_iadd(b, desc_offset,
192 nir_imul_imm(b, arr_index, descriptor_stride));
193 }
194
195 nir_intrinsic_instr *bindless =
196 nir_intrinsic_instr_create(b->shader,
197 nir_intrinsic_bindless_resource_ir3);
198 bindless->num_components = 1;
199 nir_ssa_dest_init(&bindless->instr, &bindless->dest,
200 1, 32, NULL);
201 nir_intrinsic_set_desc_set(bindless, set);
202 bindless->src[0] = nir_src_for_ssa(desc_offset);
203 nir_builder_instr_insert(b, &bindless->instr);
204
205 return &bindless->dest.ssa;
206 }
207
208 static void
209 lower_image_deref(nir_builder *b,
210 nir_intrinsic_instr *instr, struct tu_shader *shader,
211 const struct tu_pipeline_layout *layout)
212 {
213 nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
214 nir_ssa_def *bindless = build_bindless(b, deref, false, shader, layout);
215 nir_rewrite_image_intrinsic(instr, bindless, true);
216 }
217
218 static bool
219 lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,
220 struct tu_shader *shader,
221 const struct tu_pipeline_layout *layout)
222 {
223 switch (instr->intrinsic) {
224 case nir_intrinsic_load_layer_id:
225 /* TODO: remove this when layered rendering is implemented */
226 nir_ssa_def_rewrite_uses(&instr->dest.ssa,
227 nir_src_for_ssa(nir_imm_int(b, 0)));
228 nir_instr_remove(&instr->instr);
229 return true;
230
231 case nir_intrinsic_load_push_constant:
232 lower_load_push_constant(b, instr, shader);
233 return true;
234
235 case nir_intrinsic_vulkan_resource_index:
236 lower_vulkan_resource_index(b, instr, shader, layout);
237 return true;
238
239 case nir_intrinsic_image_deref_load:
240 case nir_intrinsic_image_deref_store:
241 case nir_intrinsic_image_deref_atomic_add:
242 case nir_intrinsic_image_deref_atomic_imin:
243 case nir_intrinsic_image_deref_atomic_umin:
244 case nir_intrinsic_image_deref_atomic_imax:
245 case nir_intrinsic_image_deref_atomic_umax:
246 case nir_intrinsic_image_deref_atomic_and:
247 case nir_intrinsic_image_deref_atomic_or:
248 case nir_intrinsic_image_deref_atomic_xor:
249 case nir_intrinsic_image_deref_atomic_exchange:
250 case nir_intrinsic_image_deref_atomic_comp_swap:
251 case nir_intrinsic_image_deref_size:
252 case nir_intrinsic_image_deref_samples:
253 lower_image_deref(b, instr, shader, layout);
254 return true;
255
256 default:
257 return false;
258 }
259 }
260
261 static bool
262 lower_tex(nir_builder *b, nir_tex_instr *tex,
263 struct tu_shader *shader, const struct tu_pipeline_layout *layout)
264 {
265 int sampler_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
266 if (sampler_src_idx >= 0) {
267 nir_deref_instr *deref = nir_src_as_deref(tex->src[sampler_src_idx].src);
268 nir_ssa_def *bindless = build_bindless(b, deref, true, shader, layout);
269 nir_instr_rewrite_src(&tex->instr, &tex->src[sampler_src_idx].src,
270 nir_src_for_ssa(bindless));
271 tex->src[sampler_src_idx].src_type = nir_tex_src_sampler_handle;
272 }
273
274 int tex_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
275 if (tex_src_idx >= 0) {
276 nir_deref_instr *deref = nir_src_as_deref(tex->src[tex_src_idx].src);
277 nir_ssa_def *bindless = build_bindless(b, deref, false, shader, layout);
278 nir_instr_rewrite_src(&tex->instr, &tex->src[tex_src_idx].src,
279 nir_src_for_ssa(bindless));
280 tex->src[tex_src_idx].src_type = nir_tex_src_texture_handle;
281 }
282
283 return true;
284 }
285
286 static bool
287 lower_impl(nir_function_impl *impl, struct tu_shader *shader,
288 const struct tu_pipeline_layout *layout)
289 {
290 nir_builder b;
291 nir_builder_init(&b, impl);
292 bool progress = false;
293
294 nir_foreach_block(block, impl) {
295 nir_foreach_instr_safe(instr, block) {
296 b.cursor = nir_before_instr(instr);
297 switch (instr->type) {
298 case nir_instr_type_tex:
299 progress |= lower_tex(&b, nir_instr_as_tex(instr), shader, layout);
300 break;
301 case nir_instr_type_intrinsic:
302 progress |= lower_intrinsic(&b, nir_instr_as_intrinsic(instr), shader, layout);
303 break;
304 default:
305 break;
306 }
307 }
308 }
309
310 return progress;
311 }
312
313
314 /* Figure out the range of push constants that we're actually going to push to
315 * the shader, and tell the backend to reserve this range when pushing UBO
316 * constants.
317 */
318
319 static void
320 gather_push_constants(nir_shader *shader, struct tu_shader *tu_shader)
321 {
322 uint32_t min = UINT32_MAX, max = 0;
323 nir_foreach_function(function, shader) {
324 if (!function->impl)
325 continue;
326
327 nir_foreach_block(block, function->impl) {
328 nir_foreach_instr_safe(instr, block) {
329 if (instr->type != nir_instr_type_intrinsic)
330 continue;
331
332 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
333 if (intrin->intrinsic != nir_intrinsic_load_push_constant)
334 continue;
335
336 uint32_t base = nir_intrinsic_base(intrin);
337 uint32_t range = nir_intrinsic_range(intrin);
338 min = MIN2(min, base);
339 max = MAX2(max, base + range);
340 break;
341 }
342 }
343 }
344
345 if (min >= max) {
346 tu_shader->push_consts.lo = 0;
347 tu_shader->push_consts.count = 0;
348 tu_shader->ir3_shader.const_state.num_reserved_user_consts = 0;
349 return;
350 }
351
352 /* CP_LOAD_STATE OFFSET and NUM_UNIT are in units of vec4 (4 dwords),
353 * however there's an alignment requirement of 4 on OFFSET. Expand the
354 * range and change units accordingly.
355 */
356 tu_shader->push_consts.lo = (min / 16) / 4 * 4;
357 tu_shader->push_consts.count =
358 align(max, 16) / 16 - tu_shader->push_consts.lo;
359 tu_shader->ir3_shader.const_state.num_reserved_user_consts =
360 align(tu_shader->push_consts.count, 4);
361 }
362
363 /* Gather the InputAttachmentIndex for each input attachment from the NIR
364 * shader and organize the info in a way so that draw-time patching is easy.
365 */
366 static void
367 gather_input_attachments(nir_shader *shader, struct tu_shader *tu_shader,
368 const struct tu_pipeline_layout *layout)
369 {
370 nir_foreach_variable(var, &shader->uniforms) {
371 const struct glsl_type *glsl_type = glsl_without_array(var->type);
372
373 if (!glsl_type_is_image(glsl_type))
374 continue;
375
376 enum glsl_sampler_dim dim = glsl_get_sampler_dim(glsl_type);
377
378 const uint32_t set = var->data.descriptor_set;
379 const uint32_t binding = var->data.binding;
380 const struct tu_descriptor_set_binding_layout *bind_layout =
381 &layout->set[set].layout->binding[binding];
382 const uint32_t array_size = bind_layout->array_size;
383
384 if (dim == GLSL_SAMPLER_DIM_SUBPASS ||
385 dim == GLSL_SAMPLER_DIM_SUBPASS_MS) {
386 unsigned offset =
387 layout->set[set].input_attachment_start +
388 bind_layout->input_attachment_offset;
389 for (unsigned i = 0; i < array_size; i++)
390 tu_shader->attachment_idx[offset + i] = var->data.index + i;
391 }
392 }
393 }
394
395 static bool
396 tu_lower_io(nir_shader *shader, struct tu_shader *tu_shader,
397 const struct tu_pipeline_layout *layout)
398 {
399 bool progress = false;
400
401 gather_push_constants(shader, tu_shader);
402 gather_input_attachments(shader, tu_shader, layout);
403
404 nir_foreach_function(function, shader) {
405 if (function->impl)
406 progress |= lower_impl(function->impl, tu_shader, layout);
407 }
408
409 return progress;
410 }
411
412 static void
413 tu_gather_xfb_info(nir_shader *nir, struct tu_shader *shader)
414 {
415 struct ir3_stream_output_info *info = &shader->ir3_shader.stream_output;
416 nir_xfb_info *xfb = nir_gather_xfb_info(nir, NULL);
417
418 if (!xfb)
419 return;
420
421 /* creating a map from VARYING_SLOT_* enums to consecutive index */
422 uint8_t num_outputs = 0;
423 uint64_t outputs_written = 0;
424 for (int i = 0; i < xfb->output_count; i++)
425 outputs_written |= BITFIELD64_BIT(xfb->outputs[i].location);
426
427 uint8_t output_map[VARYING_SLOT_TESS_MAX];
428 memset(output_map, 0, sizeof(output_map));
429
430 for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
431 if (outputs_written & BITFIELD64_BIT(attr))
432 output_map[attr] = num_outputs++;
433 }
434
435 assert(xfb->output_count < IR3_MAX_SO_OUTPUTS);
436 info->num_outputs = xfb->output_count;
437
438 for (int i = 0; i < IR3_MAX_SO_BUFFERS; i++)
439 info->stride[i] = xfb->buffers[i].stride / 4;
440
441 for (int i = 0; i < xfb->output_count; i++) {
442 info->output[i].register_index = output_map[xfb->outputs[i].location];
443 info->output[i].start_component = xfb->outputs[i].component_offset;
444 info->output[i].num_components =
445 util_bitcount(xfb->outputs[i].component_mask);
446 info->output[i].output_buffer = xfb->outputs[i].buffer;
447 info->output[i].dst_offset = xfb->outputs[i].offset / 4;
448 info->output[i].stream = xfb->buffer_to_stream[xfb->outputs[i].buffer];
449 }
450
451 ralloc_free(xfb);
452 }
453
454 struct tu_shader *
455 tu_shader_create(struct tu_device *dev,
456 gl_shader_stage stage,
457 const VkPipelineShaderStageCreateInfo *stage_info,
458 struct tu_pipeline_layout *layout,
459 const VkAllocationCallbacks *alloc)
460 {
461 const struct tu_shader_module *module =
462 tu_shader_module_from_handle(stage_info->module);
463 struct tu_shader *shader;
464
465 const uint32_t max_variant_count = (stage == MESA_SHADER_VERTEX) ? 2 : 1;
466 shader = vk_zalloc2(
467 &dev->alloc, alloc,
468 sizeof(*shader) + sizeof(struct ir3_shader_variant) * max_variant_count,
469 8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
470 if (!shader)
471 return NULL;
472
473 /* translate SPIR-V to NIR */
474 assert(module->code_size % 4 == 0);
475 nir_shader *nir = tu_spirv_to_nir(
476 dev->compiler, (const uint32_t *) module->code, module->code_size / 4,
477 stage, stage_info->pName, stage_info->pSpecializationInfo);
478 if (!nir) {
479 vk_free2(&dev->alloc, alloc, shader);
480 return NULL;
481 }
482
483 if (unlikely(dev->physical_device->instance->debug_flags & TU_DEBUG_NIR)) {
484 fprintf(stderr, "translated nir:\n");
485 nir_print_shader(nir, stderr);
486 }
487
488 /* multi step inlining procedure */
489 NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
490 NIR_PASS_V(nir, nir_lower_returns);
491 NIR_PASS_V(nir, nir_inline_functions);
492 NIR_PASS_V(nir, nir_opt_deref);
493 foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
494 if (!func->is_entrypoint)
495 exec_node_remove(&func->node);
496 }
497 assert(exec_list_length(&nir->functions) == 1);
498 NIR_PASS_V(nir, nir_lower_variable_initializers, ~nir_var_function_temp);
499
500 /* Split member structs. We do this before lower_io_to_temporaries so that
501 * it doesn't lower system values to temporaries by accident.
502 */
503 NIR_PASS_V(nir, nir_split_var_copies);
504 NIR_PASS_V(nir, nir_split_per_member_structs);
505
506 NIR_PASS_V(nir, nir_remove_dead_variables,
507 nir_var_shader_in | nir_var_shader_out | nir_var_system_value | nir_var_mem_shared);
508
509 /* Gather information for transform feedback.
510 * This should be called after nir_split_per_member_structs.
511 * Also needs to be called after nir_remove_dead_variables with varyings,
512 * so that we could align stream outputs correctly.
513 */
514 if (nir->info.stage == MESA_SHADER_VERTEX ||
515 nir->info.stage == MESA_SHADER_TESS_EVAL ||
516 nir->info.stage == MESA_SHADER_GEOMETRY)
517 tu_gather_xfb_info(nir, shader);
518
519 NIR_PASS_V(nir, nir_propagate_invariant);
520
521 NIR_PASS_V(nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(nir), true, true);
522
523 NIR_PASS_V(nir, nir_lower_global_vars_to_local);
524 NIR_PASS_V(nir, nir_split_var_copies);
525 NIR_PASS_V(nir, nir_lower_var_copies);
526
527 NIR_PASS_V(nir, nir_opt_copy_prop_vars);
528 NIR_PASS_V(nir, nir_opt_combine_stores, nir_var_all);
529
530 /* ir3 doesn't support indirect input/output */
531 NIR_PASS_V(nir, nir_lower_indirect_derefs, nir_var_shader_in | nir_var_shader_out);
532
533 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
534
535 nir_assign_io_var_locations(&nir->inputs, &nir->num_inputs, stage);
536 nir_assign_io_var_locations(&nir->outputs, &nir->num_outputs, stage);
537
538 NIR_PASS_V(nir, nir_lower_system_values);
539 NIR_PASS_V(nir, nir_lower_frexp);
540
541 if (stage == MESA_SHADER_FRAGMENT)
542 NIR_PASS_V(nir, nir_lower_input_attachments, true);
543
544 NIR_PASS_V(nir, tu_lower_io, shader, layout);
545
546 NIR_PASS_V(nir, nir_lower_io, nir_var_all, ir3_glsl_type_size, 0);
547
548 if (stage == MESA_SHADER_FRAGMENT) {
549 /* NOTE: lower load_barycentric_at_sample first, since it
550 * produces load_barycentric_at_offset:
551 */
552 NIR_PASS_V(nir, ir3_nir_lower_load_barycentric_at_sample);
553 NIR_PASS_V(nir, ir3_nir_lower_load_barycentric_at_offset);
554
555 NIR_PASS_V(nir, ir3_nir_move_varying_inputs);
556 }
557
558 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
559
560 /* num_uniforms only used by ir3 for size of ubo 0 (push constants) */
561 nir->num_uniforms = MAX_PUSH_CONSTANTS_SIZE / 16;
562
563 shader->ir3_shader.compiler = dev->compiler;
564 shader->ir3_shader.type = stage;
565 shader->ir3_shader.nir = nir;
566
567 return shader;
568 }
569
570 void
571 tu_shader_destroy(struct tu_device *dev,
572 struct tu_shader *shader,
573 const VkAllocationCallbacks *alloc)
574 {
575 if (shader->ir3_shader.nir)
576 ralloc_free(shader->ir3_shader.nir);
577
578 for (uint32_t i = 0; i < 1 + shader->has_binning_pass; i++) {
579 if (shader->variants[i].ir)
580 ir3_destroy(shader->variants[i].ir);
581 }
582
583 if (shader->ir3_shader.const_state.immediates)
584 free(shader->ir3_shader.const_state.immediates);
585 if (shader->binary)
586 free(shader->binary);
587 if (shader->binning_binary)
588 free(shader->binning_binary);
589
590 vk_free2(&dev->alloc, alloc, shader);
591 }
592
593 void
594 tu_shader_compile_options_init(
595 struct tu_shader_compile_options *options,
596 const VkGraphicsPipelineCreateInfo *pipeline_info)
597 {
598 bool has_gs = false;
599 bool msaa = false;
600 if (pipeline_info) {
601 for (uint32_t i = 0; i < pipeline_info->stageCount; i++) {
602 if (pipeline_info->pStages[i].stage == VK_SHADER_STAGE_GEOMETRY_BIT) {
603 has_gs = true;
604 break;
605 }
606 }
607
608 if (!pipeline_info->pRasterizationState->rasterizerDiscardEnable &&
609 pipeline_info->pMultisampleState->rasterizationSamples > 1)
610 msaa = true;
611 }
612
613 *options = (struct tu_shader_compile_options) {
614 /* TODO: Populate the remaining fields of ir3_shader_key. */
615 .key = {
616 .has_gs = has_gs,
617 .msaa = msaa,
618 },
619 /* TODO: VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT
620 * some optimizations need to happen otherwise shader might not compile
621 */
622 .optimize = true,
623 .include_binning_pass = true,
624 };
625 }
626
627 static uint32_t *
628 tu_compile_shader_variant(struct ir3_shader *shader,
629 const struct ir3_shader_key *key,
630 struct ir3_shader_variant *nonbinning,
631 struct ir3_shader_variant *variant)
632 {
633 variant->shader = shader;
634 variant->type = shader->type;
635 variant->key = *key;
636 variant->binning_pass = !!nonbinning;
637 variant->nonbinning = nonbinning;
638
639 int ret = ir3_compile_shader_nir(shader->compiler, variant);
640 if (ret)
641 return NULL;
642
643 /* when assemble fails, we rely on tu_shader_destroy to clean up the
644 * variant
645 */
646 return ir3_shader_assemble(variant, shader->compiler->gpu_id);
647 }
648
649 VkResult
650 tu_shader_compile(struct tu_device *dev,
651 struct tu_shader *shader,
652 const struct tu_shader *next_stage,
653 const struct tu_shader_compile_options *options,
654 const VkAllocationCallbacks *alloc)
655 {
656 if (options->optimize) {
657 /* ignore the key for the first pass of optimization */
658 ir3_optimize_nir(&shader->ir3_shader, shader->ir3_shader.nir, NULL);
659
660 if (unlikely(dev->physical_device->instance->debug_flags &
661 TU_DEBUG_NIR)) {
662 fprintf(stderr, "optimized nir:\n");
663 nir_print_shader(shader->ir3_shader.nir, stderr);
664 }
665 }
666
667 shader->binary = tu_compile_shader_variant(
668 &shader->ir3_shader, &options->key, NULL, &shader->variants[0]);
669 if (!shader->binary)
670 return VK_ERROR_OUT_OF_HOST_MEMORY;
671
672 if (shader_debug_enabled(shader->ir3_shader.type)) {
673 fprintf(stdout, "Native code for unnamed %s shader %s:\n",
674 ir3_shader_stage(&shader->variants[0]), shader->ir3_shader.nir->info.name);
675 if (shader->ir3_shader.type == MESA_SHADER_FRAGMENT)
676 fprintf(stdout, "SIMD0\n");
677 ir3_shader_disasm(&shader->variants[0], shader->binary, stdout);
678 }
679
680 /* compile another variant for the binning pass */
681 if (options->include_binning_pass &&
682 shader->ir3_shader.type == MESA_SHADER_VERTEX) {
683 shader->binning_binary = tu_compile_shader_variant(
684 &shader->ir3_shader, &options->key, &shader->variants[0],
685 &shader->variants[1]);
686 if (!shader->binning_binary)
687 return VK_ERROR_OUT_OF_HOST_MEMORY;
688
689 shader->has_binning_pass = true;
690
691 if (shader_debug_enabled(MESA_SHADER_VERTEX)) {
692 fprintf(stdout, "Native code for unnamed binning shader %s:\n",
693 shader->ir3_shader.nir->info.name);
694 ir3_shader_disasm(&shader->variants[1], shader->binary, stdout);
695 }
696 }
697
698 if (unlikely(dev->physical_device->instance->debug_flags & TU_DEBUG_IR3)) {
699 fprintf(stderr, "disassembled ir3:\n");
700 fprintf(stderr, "shader: %s\n",
701 gl_shader_stage_name(shader->ir3_shader.type));
702 ir3_shader_disasm(&shader->variants[0], shader->binary, stderr);
703
704 if (shader->has_binning_pass) {
705 fprintf(stderr, "disassembled ir3:\n");
706 fprintf(stderr, "shader: %s (binning)\n",
707 gl_shader_stage_name(shader->ir3_shader.type));
708 ir3_shader_disasm(&shader->variants[1], shader->binning_binary,
709 stderr);
710 }
711 }
712
713 return VK_SUCCESS;
714 }
715
716 VkResult
717 tu_CreateShaderModule(VkDevice _device,
718 const VkShaderModuleCreateInfo *pCreateInfo,
719 const VkAllocationCallbacks *pAllocator,
720 VkShaderModule *pShaderModule)
721 {
722 TU_FROM_HANDLE(tu_device, device, _device);
723 struct tu_shader_module *module;
724
725 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO);
726 assert(pCreateInfo->flags == 0);
727 assert(pCreateInfo->codeSize % 4 == 0);
728
729 module = vk_alloc2(&device->alloc, pAllocator,
730 sizeof(*module) + pCreateInfo->codeSize, 8,
731 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
732 if (module == NULL)
733 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
734
735 module->code_size = pCreateInfo->codeSize;
736 memcpy(module->code, pCreateInfo->pCode, pCreateInfo->codeSize);
737
738 _mesa_sha1_compute(module->code, module->code_size, module->sha1);
739
740 *pShaderModule = tu_shader_module_to_handle(module);
741
742 return VK_SUCCESS;
743 }
744
745 void
746 tu_DestroyShaderModule(VkDevice _device,
747 VkShaderModule _module,
748 const VkAllocationCallbacks *pAllocator)
749 {
750 TU_FROM_HANDLE(tu_device, device, _device);
751 TU_FROM_HANDLE(tu_shader_module, module, _module);
752
753 if (!module)
754 return;
755
756 vk_free2(&device->alloc, pAllocator, module);
757 }