tu: Support pipelines without a fragment shader
[mesa.git] / src / freedreno / vulkan / tu_shader.c
1 /*
2 * Copyright © 2019 Google LLC
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "tu_private.h"
25
26 #include "spirv/nir_spirv.h"
27 #include "util/mesa-sha1.h"
28 #include "nir/nir_xfb_info.h"
29 #include "vk_util.h"
30
31 #include "ir3/ir3_nir.h"
32
33 static nir_shader *
34 tu_spirv_to_nir(struct ir3_compiler *compiler,
35 const uint32_t *words,
36 size_t word_count,
37 gl_shader_stage stage,
38 const char *entry_point_name,
39 const VkSpecializationInfo *spec_info)
40 {
41 /* TODO these are made-up */
42 const struct spirv_to_nir_options spirv_options = {
43 .frag_coord_is_sysval = true,
44 .lower_ubo_ssbo_access_to_offsets = true,
45 .caps = {
46 .transform_feedback = compiler->gpu_id >= 600,
47 },
48 };
49 const nir_shader_compiler_options *nir_options =
50 ir3_get_compiler_options(compiler);
51
52 /* convert VkSpecializationInfo */
53 struct nir_spirv_specialization *spec = NULL;
54 uint32_t num_spec = 0;
55 if (spec_info && spec_info->mapEntryCount) {
56 spec = calloc(spec_info->mapEntryCount, sizeof(*spec));
57 if (!spec)
58 return NULL;
59
60 for (uint32_t i = 0; i < spec_info->mapEntryCount; i++) {
61 const VkSpecializationMapEntry *entry = &spec_info->pMapEntries[i];
62 const void *data = spec_info->pData + entry->offset;
63 assert(data + entry->size <= spec_info->pData + spec_info->dataSize);
64 spec[i].id = entry->constantID;
65 switch (entry->size) {
66 case 8:
67 spec[i].value.u64 = *(const uint64_t *)data;
68 break;
69 case 4:
70 spec[i].value.u32 = *(const uint32_t *)data;
71 break;
72 case 2:
73 spec[i].value.u16 = *(const uint16_t *)data;
74 break;
75 case 1:
76 spec[i].value.u8 = *(const uint8_t *)data;
77 break;
78 default:
79 assert(!"Invalid spec constant size");
80 break;
81 }
82 spec[i].defined_on_module = false;
83 }
84
85 num_spec = spec_info->mapEntryCount;
86 }
87
88 nir_shader *nir =
89 spirv_to_nir(words, word_count, spec, num_spec, stage, entry_point_name,
90 &spirv_options, nir_options);
91
92 free(spec);
93
94 assert(nir->info.stage == stage);
95 nir_validate_shader(nir, "after spirv_to_nir");
96
97 return nir;
98 }
99
100 static void
101 lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *instr,
102 struct tu_shader *shader)
103 {
104 nir_intrinsic_instr *load =
105 nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_uniform);
106 load->num_components = instr->num_components;
107 uint32_t base = nir_intrinsic_base(instr);
108 assert(base % 4 == 0);
109 assert(base >= shader->push_consts.lo * 16);
110 base -= shader->push_consts.lo * 16;
111 nir_intrinsic_set_base(load, base / 4);
112 load->src[0] =
113 nir_src_for_ssa(nir_ushr(b, instr->src[0].ssa, nir_imm_int(b, 2)));
114 nir_ssa_dest_init(&load->instr, &load->dest,
115 load->num_components, instr->dest.ssa.bit_size,
116 instr->dest.ssa.name);
117 nir_builder_instr_insert(b, &load->instr);
118 nir_ssa_def_rewrite_uses(&instr->dest.ssa, nir_src_for_ssa(&load->dest.ssa));
119
120 nir_instr_remove(&instr->instr);
121 }
122
123 static void
124 lower_vulkan_resource_index(nir_builder *b, nir_intrinsic_instr *instr,
125 struct tu_shader *shader,
126 const struct tu_pipeline_layout *layout)
127 {
128 nir_ssa_def *vulkan_idx = instr->src[0].ssa;
129
130 unsigned set = nir_intrinsic_desc_set(instr);
131 unsigned binding = nir_intrinsic_binding(instr);
132 struct tu_descriptor_set_layout *set_layout = layout->set[set].layout;
133 struct tu_descriptor_set_binding_layout *binding_layout =
134 &set_layout->binding[binding];
135 uint32_t base;
136
137 switch (binding_layout->type) {
138 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
139 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
140 base = layout->set[set].dynamic_offset_start +
141 binding_layout->dynamic_offset_offset +
142 layout->input_attachment_count;
143 set = MAX_SETS;
144 break;
145 default:
146 base = binding_layout->offset / (4 * A6XX_TEX_CONST_DWORDS);
147 break;
148 }
149
150 nir_intrinsic_instr *bindless =
151 nir_intrinsic_instr_create(b->shader,
152 nir_intrinsic_bindless_resource_ir3);
153 bindless->num_components = 1;
154 nir_ssa_dest_init(&bindless->instr, &bindless->dest,
155 1, 32, NULL);
156 nir_intrinsic_set_desc_set(bindless, set);
157 bindless->src[0] = nir_src_for_ssa(nir_iadd(b, nir_imm_int(b, base), vulkan_idx));
158 nir_builder_instr_insert(b, &bindless->instr);
159
160 nir_ssa_def_rewrite_uses(&instr->dest.ssa,
161 nir_src_for_ssa(&bindless->dest.ssa));
162 nir_instr_remove(&instr->instr);
163 }
164
165 static nir_ssa_def *
166 build_bindless(nir_builder *b, nir_deref_instr *deref, bool is_sampler,
167 struct tu_shader *shader,
168 const struct tu_pipeline_layout *layout)
169 {
170 nir_variable *var = nir_deref_instr_get_variable(deref);
171
172 unsigned set = var->data.descriptor_set;
173 unsigned binding = var->data.binding;
174 const struct tu_descriptor_set_binding_layout *bind_layout =
175 &layout->set[set].layout->binding[binding];
176
177 nir_ssa_def *desc_offset;
178 unsigned descriptor_stride;
179 if (bind_layout->type == VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT) {
180 unsigned offset =
181 layout->set[set].input_attachment_start +
182 bind_layout->input_attachment_offset;
183 desc_offset = nir_imm_int(b, offset);
184 set = MAX_SETS;
185 descriptor_stride = 1;
186 } else {
187 unsigned offset = 0;
188 /* Samplers come second in combined image/sampler descriptors, see
189 * write_combined_image_sampler_descriptor().
190 */
191 if (is_sampler && bind_layout->type ==
192 VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
193 offset = 1;
194 }
195 desc_offset =
196 nir_imm_int(b, (bind_layout->offset / (4 * A6XX_TEX_CONST_DWORDS)) +
197 offset);
198 descriptor_stride = bind_layout->size / (4 * A6XX_TEX_CONST_DWORDS);
199 }
200
201 if (deref->deref_type != nir_deref_type_var) {
202 assert(deref->deref_type == nir_deref_type_array);
203
204 nir_ssa_def *arr_index = nir_ssa_for_src(b, deref->arr.index, 1);
205 desc_offset = nir_iadd(b, desc_offset,
206 nir_imul_imm(b, arr_index, descriptor_stride));
207 }
208
209 nir_intrinsic_instr *bindless =
210 nir_intrinsic_instr_create(b->shader,
211 nir_intrinsic_bindless_resource_ir3);
212 bindless->num_components = 1;
213 nir_ssa_dest_init(&bindless->instr, &bindless->dest,
214 1, 32, NULL);
215 nir_intrinsic_set_desc_set(bindless, set);
216 bindless->src[0] = nir_src_for_ssa(desc_offset);
217 nir_builder_instr_insert(b, &bindless->instr);
218
219 return &bindless->dest.ssa;
220 }
221
222 static void
223 lower_image_deref(nir_builder *b,
224 nir_intrinsic_instr *instr, struct tu_shader *shader,
225 const struct tu_pipeline_layout *layout)
226 {
227 nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
228 nir_ssa_def *bindless = build_bindless(b, deref, false, shader, layout);
229 nir_rewrite_image_intrinsic(instr, bindless, true);
230 }
231
232 static bool
233 lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,
234 struct tu_shader *shader,
235 const struct tu_pipeline_layout *layout)
236 {
237 switch (instr->intrinsic) {
238 case nir_intrinsic_load_layer_id:
239 /* TODO: remove this when layered rendering is implemented */
240 nir_ssa_def_rewrite_uses(&instr->dest.ssa,
241 nir_src_for_ssa(nir_imm_int(b, 0)));
242 nir_instr_remove(&instr->instr);
243 return true;
244
245 case nir_intrinsic_load_push_constant:
246 lower_load_push_constant(b, instr, shader);
247 return true;
248
249 case nir_intrinsic_vulkan_resource_index:
250 lower_vulkan_resource_index(b, instr, shader, layout);
251 return true;
252
253 case nir_intrinsic_image_deref_load:
254 case nir_intrinsic_image_deref_store:
255 case nir_intrinsic_image_deref_atomic_add:
256 case nir_intrinsic_image_deref_atomic_imin:
257 case nir_intrinsic_image_deref_atomic_umin:
258 case nir_intrinsic_image_deref_atomic_imax:
259 case nir_intrinsic_image_deref_atomic_umax:
260 case nir_intrinsic_image_deref_atomic_and:
261 case nir_intrinsic_image_deref_atomic_or:
262 case nir_intrinsic_image_deref_atomic_xor:
263 case nir_intrinsic_image_deref_atomic_exchange:
264 case nir_intrinsic_image_deref_atomic_comp_swap:
265 case nir_intrinsic_image_deref_size:
266 case nir_intrinsic_image_deref_samples:
267 lower_image_deref(b, instr, shader, layout);
268 return true;
269
270 default:
271 return false;
272 }
273 }
274
275 static bool
276 lower_tex(nir_builder *b, nir_tex_instr *tex,
277 struct tu_shader *shader, const struct tu_pipeline_layout *layout)
278 {
279 int sampler_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
280 if (sampler_src_idx >= 0) {
281 nir_deref_instr *deref = nir_src_as_deref(tex->src[sampler_src_idx].src);
282 nir_ssa_def *bindless = build_bindless(b, deref, true, shader, layout);
283 nir_instr_rewrite_src(&tex->instr, &tex->src[sampler_src_idx].src,
284 nir_src_for_ssa(bindless));
285 tex->src[sampler_src_idx].src_type = nir_tex_src_sampler_handle;
286 }
287
288 int tex_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
289 if (tex_src_idx >= 0) {
290 nir_deref_instr *deref = nir_src_as_deref(tex->src[tex_src_idx].src);
291 nir_ssa_def *bindless = build_bindless(b, deref, false, shader, layout);
292 nir_instr_rewrite_src(&tex->instr, &tex->src[tex_src_idx].src,
293 nir_src_for_ssa(bindless));
294 tex->src[tex_src_idx].src_type = nir_tex_src_texture_handle;
295 }
296
297 return true;
298 }
299
300 static bool
301 lower_impl(nir_function_impl *impl, struct tu_shader *shader,
302 const struct tu_pipeline_layout *layout)
303 {
304 nir_builder b;
305 nir_builder_init(&b, impl);
306 bool progress = false;
307
308 nir_foreach_block(block, impl) {
309 nir_foreach_instr_safe(instr, block) {
310 b.cursor = nir_before_instr(instr);
311 switch (instr->type) {
312 case nir_instr_type_tex:
313 progress |= lower_tex(&b, nir_instr_as_tex(instr), shader, layout);
314 break;
315 case nir_instr_type_intrinsic:
316 progress |= lower_intrinsic(&b, nir_instr_as_intrinsic(instr), shader, layout);
317 break;
318 default:
319 break;
320 }
321 }
322 }
323
324 return progress;
325 }
326
327
328 /* Figure out the range of push constants that we're actually going to push to
329 * the shader, and tell the backend to reserve this range when pushing UBO
330 * constants.
331 */
332
333 static void
334 gather_push_constants(nir_shader *shader, struct tu_shader *tu_shader)
335 {
336 uint32_t min = UINT32_MAX, max = 0;
337 nir_foreach_function(function, shader) {
338 if (!function->impl)
339 continue;
340
341 nir_foreach_block(block, function->impl) {
342 nir_foreach_instr_safe(instr, block) {
343 if (instr->type != nir_instr_type_intrinsic)
344 continue;
345
346 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
347 if (intrin->intrinsic != nir_intrinsic_load_push_constant)
348 continue;
349
350 uint32_t base = nir_intrinsic_base(intrin);
351 uint32_t range = nir_intrinsic_range(intrin);
352 min = MIN2(min, base);
353 max = MAX2(max, base + range);
354 break;
355 }
356 }
357 }
358
359 if (min >= max) {
360 tu_shader->push_consts.lo = 0;
361 tu_shader->push_consts.count = 0;
362 tu_shader->ir3_shader.const_state.num_reserved_user_consts = 0;
363 return;
364 }
365
366 /* CP_LOAD_STATE OFFSET and NUM_UNIT are in units of vec4 (4 dwords),
367 * however there's an alignment requirement of 4 on OFFSET. Expand the
368 * range and change units accordingly.
369 */
370 tu_shader->push_consts.lo = (min / 16) / 4 * 4;
371 tu_shader->push_consts.count =
372 align(max, 16) / 16 - tu_shader->push_consts.lo;
373 tu_shader->ir3_shader.const_state.num_reserved_user_consts =
374 align(tu_shader->push_consts.count, 4);
375 }
376
377 /* Gather the InputAttachmentIndex for each input attachment from the NIR
378 * shader and organize the info in a way so that draw-time patching is easy.
379 */
380 static void
381 gather_input_attachments(nir_shader *shader, struct tu_shader *tu_shader,
382 const struct tu_pipeline_layout *layout)
383 {
384 nir_foreach_variable(var, &shader->uniforms) {
385 const struct glsl_type *glsl_type = glsl_without_array(var->type);
386
387 if (!glsl_type_is_image(glsl_type))
388 continue;
389
390 enum glsl_sampler_dim dim = glsl_get_sampler_dim(glsl_type);
391
392 const uint32_t set = var->data.descriptor_set;
393 const uint32_t binding = var->data.binding;
394 const struct tu_descriptor_set_binding_layout *bind_layout =
395 &layout->set[set].layout->binding[binding];
396 const uint32_t array_size = bind_layout->array_size;
397
398 if (dim == GLSL_SAMPLER_DIM_SUBPASS ||
399 dim == GLSL_SAMPLER_DIM_SUBPASS_MS) {
400 unsigned offset =
401 layout->set[set].input_attachment_start +
402 bind_layout->input_attachment_offset;
403 for (unsigned i = 0; i < array_size; i++)
404 tu_shader->attachment_idx[offset + i] = var->data.index + i;
405 }
406 }
407 }
408
409 static bool
410 tu_lower_io(nir_shader *shader, struct tu_shader *tu_shader,
411 const struct tu_pipeline_layout *layout)
412 {
413 bool progress = false;
414
415 gather_push_constants(shader, tu_shader);
416 gather_input_attachments(shader, tu_shader, layout);
417
418 nir_foreach_function(function, shader) {
419 if (function->impl)
420 progress |= lower_impl(function->impl, tu_shader, layout);
421 }
422
423 return progress;
424 }
425
426 static void
427 tu_gather_xfb_info(nir_shader *nir, struct tu_shader *shader)
428 {
429 struct ir3_stream_output_info *info = &shader->ir3_shader.stream_output;
430 nir_xfb_info *xfb = nir_gather_xfb_info(nir, NULL);
431
432 if (!xfb)
433 return;
434
435 /* creating a map from VARYING_SLOT_* enums to consecutive index */
436 uint8_t num_outputs = 0;
437 uint64_t outputs_written = 0;
438 for (int i = 0; i < xfb->output_count; i++)
439 outputs_written |= BITFIELD64_BIT(xfb->outputs[i].location);
440
441 uint8_t output_map[VARYING_SLOT_TESS_MAX];
442 memset(output_map, 0, sizeof(output_map));
443
444 for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
445 if (outputs_written & BITFIELD64_BIT(attr))
446 output_map[attr] = num_outputs++;
447 }
448
449 assert(xfb->output_count < IR3_MAX_SO_OUTPUTS);
450 info->num_outputs = xfb->output_count;
451
452 for (int i = 0; i < IR3_MAX_SO_BUFFERS; i++)
453 info->stride[i] = xfb->buffers[i].stride / 4;
454
455 for (int i = 0; i < xfb->output_count; i++) {
456 info->output[i].register_index = output_map[xfb->outputs[i].location];
457 info->output[i].start_component = xfb->outputs[i].component_offset;
458 info->output[i].num_components =
459 util_bitcount(xfb->outputs[i].component_mask);
460 info->output[i].output_buffer = xfb->outputs[i].buffer;
461 info->output[i].dst_offset = xfb->outputs[i].offset / 4;
462 info->output[i].stream = xfb->buffer_to_stream[xfb->outputs[i].buffer];
463 }
464
465 ralloc_free(xfb);
466 }
467
468 struct tu_shader *
469 tu_shader_create(struct tu_device *dev,
470 gl_shader_stage stage,
471 const VkPipelineShaderStageCreateInfo *stage_info,
472 struct tu_pipeline_layout *layout,
473 const VkAllocationCallbacks *alloc)
474 {
475 struct tu_shader *shader;
476
477 const uint32_t max_variant_count = (stage == MESA_SHADER_VERTEX) ? 2 : 1;
478 shader = vk_zalloc2(
479 &dev->alloc, alloc,
480 sizeof(*shader) + sizeof(struct ir3_shader_variant) * max_variant_count,
481 8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
482 if (!shader)
483 return NULL;
484
485 nir_shader *nir;
486 if (stage_info) {
487 /* translate SPIR-V to NIR */
488 const struct tu_shader_module *module =
489 tu_shader_module_from_handle(stage_info->module);
490 assert(module->code_size % 4 == 0);
491 nir = tu_spirv_to_nir(
492 dev->compiler, (const uint32_t *) module->code, module->code_size / 4,
493 stage, stage_info->pName, stage_info->pSpecializationInfo);
494 } else {
495 assert(stage == MESA_SHADER_FRAGMENT);
496 nir_builder fs_b;
497 const nir_shader_compiler_options *nir_options =
498 ir3_get_compiler_options(dev->compiler);
499 nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, nir_options);
500 fs_b.shader->info.name = ralloc_strdup(fs_b.shader, "noop_fs");
501 nir = fs_b.shader;
502 }
503
504 if (!nir) {
505 vk_free2(&dev->alloc, alloc, shader);
506 return NULL;
507 }
508
509 if (unlikely(dev->physical_device->instance->debug_flags & TU_DEBUG_NIR)) {
510 fprintf(stderr, "translated nir:\n");
511 nir_print_shader(nir, stderr);
512 }
513
514 /* multi step inlining procedure */
515 NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
516 NIR_PASS_V(nir, nir_lower_returns);
517 NIR_PASS_V(nir, nir_inline_functions);
518 NIR_PASS_V(nir, nir_opt_deref);
519 foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
520 if (!func->is_entrypoint)
521 exec_node_remove(&func->node);
522 }
523 assert(exec_list_length(&nir->functions) == 1);
524 NIR_PASS_V(nir, nir_lower_variable_initializers, ~nir_var_function_temp);
525
526 /* Split member structs. We do this before lower_io_to_temporaries so that
527 * it doesn't lower system values to temporaries by accident.
528 */
529 NIR_PASS_V(nir, nir_split_var_copies);
530 NIR_PASS_V(nir, nir_split_per_member_structs);
531
532 NIR_PASS_V(nir, nir_remove_dead_variables,
533 nir_var_shader_in | nir_var_shader_out | nir_var_system_value | nir_var_mem_shared);
534
535 /* Gather information for transform feedback.
536 * This should be called after nir_split_per_member_structs.
537 * Also needs to be called after nir_remove_dead_variables with varyings,
538 * so that we could align stream outputs correctly.
539 */
540 if (nir->info.stage == MESA_SHADER_VERTEX ||
541 nir->info.stage == MESA_SHADER_TESS_EVAL ||
542 nir->info.stage == MESA_SHADER_GEOMETRY)
543 tu_gather_xfb_info(nir, shader);
544
545 NIR_PASS_V(nir, nir_propagate_invariant);
546
547 NIR_PASS_V(nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(nir), true, true);
548
549 NIR_PASS_V(nir, nir_lower_global_vars_to_local);
550 NIR_PASS_V(nir, nir_split_var_copies);
551 NIR_PASS_V(nir, nir_lower_var_copies);
552
553 NIR_PASS_V(nir, nir_opt_copy_prop_vars);
554 NIR_PASS_V(nir, nir_opt_combine_stores, nir_var_all);
555
556 /* ir3 doesn't support indirect input/output */
557 NIR_PASS_V(nir, nir_lower_indirect_derefs, nir_var_shader_in | nir_var_shader_out);
558
559 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
560
561 nir_assign_io_var_locations(&nir->inputs, &nir->num_inputs, stage);
562 nir_assign_io_var_locations(&nir->outputs, &nir->num_outputs, stage);
563
564 NIR_PASS_V(nir, nir_lower_system_values);
565 NIR_PASS_V(nir, nir_lower_frexp);
566
567 if (stage == MESA_SHADER_FRAGMENT)
568 NIR_PASS_V(nir, nir_lower_input_attachments, true);
569
570 NIR_PASS_V(nir, tu_lower_io, shader, layout);
571
572 NIR_PASS_V(nir, nir_lower_io, nir_var_all, ir3_glsl_type_size, 0);
573
574 if (stage == MESA_SHADER_FRAGMENT) {
575 /* NOTE: lower load_barycentric_at_sample first, since it
576 * produces load_barycentric_at_offset:
577 */
578 NIR_PASS_V(nir, ir3_nir_lower_load_barycentric_at_sample);
579 NIR_PASS_V(nir, ir3_nir_lower_load_barycentric_at_offset);
580
581 NIR_PASS_V(nir, ir3_nir_move_varying_inputs);
582 }
583
584 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
585
586 /* num_uniforms only used by ir3 for size of ubo 0 (push constants) */
587 nir->num_uniforms = MAX_PUSH_CONSTANTS_SIZE / 16;
588
589 shader->ir3_shader.compiler = dev->compiler;
590 shader->ir3_shader.type = stage;
591 shader->ir3_shader.nir = nir;
592
593 return shader;
594 }
595
596 void
597 tu_shader_destroy(struct tu_device *dev,
598 struct tu_shader *shader,
599 const VkAllocationCallbacks *alloc)
600 {
601 if (shader->ir3_shader.nir)
602 ralloc_free(shader->ir3_shader.nir);
603
604 for (uint32_t i = 0; i < 1 + shader->has_binning_pass; i++) {
605 if (shader->variants[i].ir)
606 ir3_destroy(shader->variants[i].ir);
607 }
608
609 if (shader->ir3_shader.const_state.immediates)
610 free(shader->ir3_shader.const_state.immediates);
611 if (shader->binary)
612 free(shader->binary);
613 if (shader->binning_binary)
614 free(shader->binning_binary);
615
616 vk_free2(&dev->alloc, alloc, shader);
617 }
618
619 void
620 tu_shader_compile_options_init(
621 struct tu_shader_compile_options *options,
622 const VkGraphicsPipelineCreateInfo *pipeline_info)
623 {
624 bool has_gs = false;
625 bool msaa = false;
626 if (pipeline_info) {
627 for (uint32_t i = 0; i < pipeline_info->stageCount; i++) {
628 if (pipeline_info->pStages[i].stage == VK_SHADER_STAGE_GEOMETRY_BIT) {
629 has_gs = true;
630 break;
631 }
632 }
633
634 const VkPipelineMultisampleStateCreateInfo *msaa_info = pipeline_info->pMultisampleState;
635 const struct VkPipelineSampleLocationsStateCreateInfoEXT *sample_locations =
636 vk_find_struct_const(msaa_info->pNext, PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT);
637 if (!pipeline_info->pRasterizationState->rasterizerDiscardEnable &&
638 (msaa_info->rasterizationSamples > 1 ||
639 /* also set msaa key when sample location is not the default
640 * since this affects varying interpolation */
641 (sample_locations && sample_locations->sampleLocationsEnable))) {
642 msaa = true;
643 }
644 }
645
646 *options = (struct tu_shader_compile_options) {
647 /* TODO: Populate the remaining fields of ir3_shader_key. */
648 .key = {
649 .has_gs = has_gs,
650 .msaa = msaa,
651 },
652 /* TODO: VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT
653 * some optimizations need to happen otherwise shader might not compile
654 */
655 .optimize = true,
656 .include_binning_pass = true,
657 };
658 }
659
660 static uint32_t *
661 tu_compile_shader_variant(struct ir3_shader *shader,
662 const struct ir3_shader_key *key,
663 struct ir3_shader_variant *nonbinning,
664 struct ir3_shader_variant *variant)
665 {
666 variant->shader = shader;
667 variant->type = shader->type;
668 variant->key = *key;
669 variant->binning_pass = !!nonbinning;
670 variant->nonbinning = nonbinning;
671
672 int ret = ir3_compile_shader_nir(shader->compiler, variant);
673 if (ret)
674 return NULL;
675
676 /* when assemble fails, we rely on tu_shader_destroy to clean up the
677 * variant
678 */
679 return ir3_shader_assemble(variant, shader->compiler->gpu_id);
680 }
681
682 VkResult
683 tu_shader_compile(struct tu_device *dev,
684 struct tu_shader *shader,
685 const struct tu_shader *next_stage,
686 const struct tu_shader_compile_options *options,
687 const VkAllocationCallbacks *alloc)
688 {
689 if (options->optimize) {
690 /* ignore the key for the first pass of optimization */
691 ir3_optimize_nir(&shader->ir3_shader, shader->ir3_shader.nir, NULL);
692
693 if (unlikely(dev->physical_device->instance->debug_flags &
694 TU_DEBUG_NIR)) {
695 fprintf(stderr, "optimized nir:\n");
696 nir_print_shader(shader->ir3_shader.nir, stderr);
697 }
698 }
699
700 shader->binary = tu_compile_shader_variant(
701 &shader->ir3_shader, &options->key, NULL, &shader->variants[0]);
702 if (!shader->binary)
703 return VK_ERROR_OUT_OF_HOST_MEMORY;
704
705 if (shader_debug_enabled(shader->ir3_shader.type)) {
706 fprintf(stdout, "Native code for unnamed %s shader %s:\n",
707 ir3_shader_stage(&shader->variants[0]), shader->ir3_shader.nir->info.name);
708 if (shader->ir3_shader.type == MESA_SHADER_FRAGMENT)
709 fprintf(stdout, "SIMD0\n");
710 ir3_shader_disasm(&shader->variants[0], shader->binary, stdout);
711 }
712
713 /* compile another variant for the binning pass */
714 if (options->include_binning_pass &&
715 shader->ir3_shader.type == MESA_SHADER_VERTEX) {
716 shader->binning_binary = tu_compile_shader_variant(
717 &shader->ir3_shader, &options->key, &shader->variants[0],
718 &shader->variants[1]);
719 if (!shader->binning_binary)
720 return VK_ERROR_OUT_OF_HOST_MEMORY;
721
722 shader->has_binning_pass = true;
723
724 if (shader_debug_enabled(MESA_SHADER_VERTEX)) {
725 fprintf(stdout, "Native code for unnamed binning shader %s:\n",
726 shader->ir3_shader.nir->info.name);
727 ir3_shader_disasm(&shader->variants[1], shader->binary, stdout);
728 }
729 }
730
731 if (unlikely(dev->physical_device->instance->debug_flags & TU_DEBUG_IR3)) {
732 fprintf(stderr, "disassembled ir3:\n");
733 fprintf(stderr, "shader: %s\n",
734 gl_shader_stage_name(shader->ir3_shader.type));
735 ir3_shader_disasm(&shader->variants[0], shader->binary, stderr);
736
737 if (shader->has_binning_pass) {
738 fprintf(stderr, "disassembled ir3:\n");
739 fprintf(stderr, "shader: %s (binning)\n",
740 gl_shader_stage_name(shader->ir3_shader.type));
741 ir3_shader_disasm(&shader->variants[1], shader->binning_binary,
742 stderr);
743 }
744 }
745
746 return VK_SUCCESS;
747 }
748
749 VkResult
750 tu_CreateShaderModule(VkDevice _device,
751 const VkShaderModuleCreateInfo *pCreateInfo,
752 const VkAllocationCallbacks *pAllocator,
753 VkShaderModule *pShaderModule)
754 {
755 TU_FROM_HANDLE(tu_device, device, _device);
756 struct tu_shader_module *module;
757
758 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO);
759 assert(pCreateInfo->flags == 0);
760 assert(pCreateInfo->codeSize % 4 == 0);
761
762 module = vk_alloc2(&device->alloc, pAllocator,
763 sizeof(*module) + pCreateInfo->codeSize, 8,
764 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
765 if (module == NULL)
766 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
767
768 module->code_size = pCreateInfo->codeSize;
769 memcpy(module->code, pCreateInfo->pCode, pCreateInfo->codeSize);
770
771 _mesa_sha1_compute(module->code, module->code_size, module->sha1);
772
773 *pShaderModule = tu_shader_module_to_handle(module);
774
775 return VK_SUCCESS;
776 }
777
778 void
779 tu_DestroyShaderModule(VkDevice _device,
780 VkShaderModule _module,
781 const VkAllocationCallbacks *pAllocator)
782 {
783 TU_FROM_HANDLE(tu_device, device, _device);
784 TU_FROM_HANDLE(tu_shader_module, module, _module);
785
786 if (!module)
787 return;
788
789 vk_free2(&device->alloc, pAllocator, module);
790 }