33826e37691db5e75fc76aae1e281bcc6c8e06fd
[mesa.git] / src / freedreno / vulkan / tu_shader.c
1 /*
2 * Copyright © 2019 Google LLC
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "tu_private.h"
25
26 #include "spirv/nir_spirv.h"
27 #include "util/mesa-sha1.h"
28 #include "nir/nir_xfb_info.h"
29 #include "vk_util.h"
30
31 #include "ir3/ir3_nir.h"
32
33 static nir_shader *
34 tu_spirv_to_nir(struct ir3_compiler *compiler,
35 const uint32_t *words,
36 size_t word_count,
37 gl_shader_stage stage,
38 const char *entry_point_name,
39 const VkSpecializationInfo *spec_info)
40 {
41 /* TODO these are made-up */
42 const struct spirv_to_nir_options spirv_options = {
43 .frag_coord_is_sysval = true,
44 .lower_ubo_ssbo_access_to_offsets = true,
45 .caps = {
46 .transform_feedback = compiler->gpu_id >= 600,
47 },
48 };
49 const nir_shader_compiler_options *nir_options =
50 ir3_get_compiler_options(compiler);
51
52 /* convert VkSpecializationInfo */
53 struct nir_spirv_specialization *spec = NULL;
54 uint32_t num_spec = 0;
55 if (spec_info && spec_info->mapEntryCount) {
56 spec = calloc(spec_info->mapEntryCount, sizeof(*spec));
57 if (!spec)
58 return NULL;
59
60 for (uint32_t i = 0; i < spec_info->mapEntryCount; i++) {
61 const VkSpecializationMapEntry *entry = &spec_info->pMapEntries[i];
62 const void *data = spec_info->pData + entry->offset;
63 assert(data + entry->size <= spec_info->pData + spec_info->dataSize);
64 spec[i].id = entry->constantID;
65 switch (entry->size) {
66 case 8:
67 spec[i].value.u64 = *(const uint64_t *)data;
68 break;
69 case 4:
70 spec[i].value.u32 = *(const uint32_t *)data;
71 break;
72 case 2:
73 spec[i].value.u16 = *(const uint16_t *)data;
74 break;
75 case 1:
76 spec[i].value.u8 = *(const uint8_t *)data;
77 break;
78 default:
79 assert(!"Invalid spec constant size");
80 break;
81 }
82 spec[i].defined_on_module = false;
83 }
84
85 num_spec = spec_info->mapEntryCount;
86 }
87
88 nir_shader *nir =
89 spirv_to_nir(words, word_count, spec, num_spec, stage, entry_point_name,
90 &spirv_options, nir_options);
91
92 free(spec);
93
94 assert(nir->info.stage == stage);
95 nir_validate_shader(nir, "after spirv_to_nir");
96
97 return nir;
98 }
99
100 static void
101 lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *instr,
102 struct tu_shader *shader)
103 {
104 nir_intrinsic_instr *load =
105 nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_uniform);
106 load->num_components = instr->num_components;
107 uint32_t base = nir_intrinsic_base(instr);
108 assert(base % 4 == 0);
109 assert(base >= shader->push_consts.lo * 16);
110 base -= shader->push_consts.lo * 16;
111 nir_intrinsic_set_base(load, base / 4);
112 load->src[0] =
113 nir_src_for_ssa(nir_ushr(b, instr->src[0].ssa, nir_imm_int(b, 2)));
114 nir_ssa_dest_init(&load->instr, &load->dest,
115 load->num_components, instr->dest.ssa.bit_size,
116 instr->dest.ssa.name);
117 nir_builder_instr_insert(b, &load->instr);
118 nir_ssa_def_rewrite_uses(&instr->dest.ssa, nir_src_for_ssa(&load->dest.ssa));
119
120 nir_instr_remove(&instr->instr);
121 }
122
123 static void
124 lower_vulkan_resource_index(nir_builder *b, nir_intrinsic_instr *instr,
125 struct tu_shader *shader,
126 const struct tu_pipeline_layout *layout)
127 {
128 nir_ssa_def *vulkan_idx = instr->src[0].ssa;
129
130 unsigned set = nir_intrinsic_desc_set(instr);
131 unsigned binding = nir_intrinsic_binding(instr);
132 struct tu_descriptor_set_layout *set_layout = layout->set[set].layout;
133 struct tu_descriptor_set_binding_layout *binding_layout =
134 &set_layout->binding[binding];
135 uint32_t base;
136
137 switch (binding_layout->type) {
138 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
139 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
140 base = layout->set[set].dynamic_offset_start +
141 binding_layout->dynamic_offset_offset +
142 layout->input_attachment_count;
143 set = MAX_SETS;
144 break;
145 default:
146 base = binding_layout->offset / (4 * A6XX_TEX_CONST_DWORDS);
147 break;
148 }
149
150 nir_intrinsic_instr *bindless =
151 nir_intrinsic_instr_create(b->shader,
152 nir_intrinsic_bindless_resource_ir3);
153 bindless->num_components = 1;
154 nir_ssa_dest_init(&bindless->instr, &bindless->dest,
155 1, 32, NULL);
156 nir_intrinsic_set_desc_set(bindless, set);
157 bindless->src[0] = nir_src_for_ssa(nir_iadd(b, nir_imm_int(b, base), vulkan_idx));
158 nir_builder_instr_insert(b, &bindless->instr);
159
160 nir_ssa_def_rewrite_uses(&instr->dest.ssa,
161 nir_src_for_ssa(&bindless->dest.ssa));
162 nir_instr_remove(&instr->instr);
163 }
164
165 static nir_ssa_def *
166 build_bindless(nir_builder *b, nir_deref_instr *deref, bool is_sampler,
167 struct tu_shader *shader,
168 const struct tu_pipeline_layout *layout)
169 {
170 nir_variable *var = nir_deref_instr_get_variable(deref);
171
172 unsigned set = var->data.descriptor_set;
173 unsigned binding = var->data.binding;
174 const struct tu_descriptor_set_binding_layout *bind_layout =
175 &layout->set[set].layout->binding[binding];
176
177 nir_ssa_def *desc_offset;
178 unsigned descriptor_stride;
179 if (bind_layout->type == VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT) {
180 unsigned offset =
181 layout->set[set].input_attachment_start +
182 bind_layout->input_attachment_offset;
183 desc_offset = nir_imm_int(b, offset);
184 set = MAX_SETS;
185 descriptor_stride = 1;
186 } else {
187 unsigned offset = 0;
188 /* Samplers come second in combined image/sampler descriptors, see
189 * write_combined_image_sampler_descriptor().
190 */
191 if (is_sampler && bind_layout->type ==
192 VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
193 offset = 1;
194 }
195 desc_offset =
196 nir_imm_int(b, (bind_layout->offset / (4 * A6XX_TEX_CONST_DWORDS)) +
197 offset);
198 descriptor_stride = bind_layout->size / (4 * A6XX_TEX_CONST_DWORDS);
199 }
200
201 if (deref->deref_type != nir_deref_type_var) {
202 assert(deref->deref_type == nir_deref_type_array);
203
204 nir_ssa_def *arr_index = nir_ssa_for_src(b, deref->arr.index, 1);
205 desc_offset = nir_iadd(b, desc_offset,
206 nir_imul_imm(b, arr_index, descriptor_stride));
207 }
208
209 nir_intrinsic_instr *bindless =
210 nir_intrinsic_instr_create(b->shader,
211 nir_intrinsic_bindless_resource_ir3);
212 bindless->num_components = 1;
213 nir_ssa_dest_init(&bindless->instr, &bindless->dest,
214 1, 32, NULL);
215 nir_intrinsic_set_desc_set(bindless, set);
216 bindless->src[0] = nir_src_for_ssa(desc_offset);
217 nir_builder_instr_insert(b, &bindless->instr);
218
219 return &bindless->dest.ssa;
220 }
221
222 static void
223 lower_image_deref(nir_builder *b,
224 nir_intrinsic_instr *instr, struct tu_shader *shader,
225 const struct tu_pipeline_layout *layout)
226 {
227 nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
228 nir_ssa_def *bindless = build_bindless(b, deref, false, shader, layout);
229 nir_rewrite_image_intrinsic(instr, bindless, true);
230 }
231
232 static bool
233 lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,
234 struct tu_shader *shader,
235 const struct tu_pipeline_layout *layout)
236 {
237 switch (instr->intrinsic) {
238 case nir_intrinsic_load_layer_id:
239 /* TODO: remove this when layered rendering is implemented */
240 nir_ssa_def_rewrite_uses(&instr->dest.ssa,
241 nir_src_for_ssa(nir_imm_int(b, 0)));
242 nir_instr_remove(&instr->instr);
243 return true;
244
245 case nir_intrinsic_load_push_constant:
246 lower_load_push_constant(b, instr, shader);
247 return true;
248
249 case nir_intrinsic_vulkan_resource_index:
250 lower_vulkan_resource_index(b, instr, shader, layout);
251 return true;
252
253 case nir_intrinsic_image_deref_load:
254 case nir_intrinsic_image_deref_store:
255 case nir_intrinsic_image_deref_atomic_add:
256 case nir_intrinsic_image_deref_atomic_imin:
257 case nir_intrinsic_image_deref_atomic_umin:
258 case nir_intrinsic_image_deref_atomic_imax:
259 case nir_intrinsic_image_deref_atomic_umax:
260 case nir_intrinsic_image_deref_atomic_and:
261 case nir_intrinsic_image_deref_atomic_or:
262 case nir_intrinsic_image_deref_atomic_xor:
263 case nir_intrinsic_image_deref_atomic_exchange:
264 case nir_intrinsic_image_deref_atomic_comp_swap:
265 case nir_intrinsic_image_deref_size:
266 case nir_intrinsic_image_deref_samples:
267 lower_image_deref(b, instr, shader, layout);
268 return true;
269
270 default:
271 return false;
272 }
273 }
274
275 static bool
276 lower_tex(nir_builder *b, nir_tex_instr *tex,
277 struct tu_shader *shader, const struct tu_pipeline_layout *layout)
278 {
279 int sampler_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
280 if (sampler_src_idx >= 0) {
281 nir_deref_instr *deref = nir_src_as_deref(tex->src[sampler_src_idx].src);
282 nir_ssa_def *bindless = build_bindless(b, deref, true, shader, layout);
283 nir_instr_rewrite_src(&tex->instr, &tex->src[sampler_src_idx].src,
284 nir_src_for_ssa(bindless));
285 tex->src[sampler_src_idx].src_type = nir_tex_src_sampler_handle;
286 }
287
288 int tex_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
289 if (tex_src_idx >= 0) {
290 nir_deref_instr *deref = nir_src_as_deref(tex->src[tex_src_idx].src);
291 nir_ssa_def *bindless = build_bindless(b, deref, false, shader, layout);
292 nir_instr_rewrite_src(&tex->instr, &tex->src[tex_src_idx].src,
293 nir_src_for_ssa(bindless));
294 tex->src[tex_src_idx].src_type = nir_tex_src_texture_handle;
295 }
296
297 return true;
298 }
299
300 static bool
301 lower_impl(nir_function_impl *impl, struct tu_shader *shader,
302 const struct tu_pipeline_layout *layout)
303 {
304 nir_builder b;
305 nir_builder_init(&b, impl);
306 bool progress = false;
307
308 nir_foreach_block(block, impl) {
309 nir_foreach_instr_safe(instr, block) {
310 b.cursor = nir_before_instr(instr);
311 switch (instr->type) {
312 case nir_instr_type_tex:
313 progress |= lower_tex(&b, nir_instr_as_tex(instr), shader, layout);
314 break;
315 case nir_instr_type_intrinsic:
316 progress |= lower_intrinsic(&b, nir_instr_as_intrinsic(instr), shader, layout);
317 break;
318 default:
319 break;
320 }
321 }
322 }
323
324 return progress;
325 }
326
327
328 /* Figure out the range of push constants that we're actually going to push to
329 * the shader, and tell the backend to reserve this range when pushing UBO
330 * constants.
331 */
332
333 static void
334 gather_push_constants(nir_shader *shader, struct tu_shader *tu_shader)
335 {
336 uint32_t min = UINT32_MAX, max = 0;
337 nir_foreach_function(function, shader) {
338 if (!function->impl)
339 continue;
340
341 nir_foreach_block(block, function->impl) {
342 nir_foreach_instr_safe(instr, block) {
343 if (instr->type != nir_instr_type_intrinsic)
344 continue;
345
346 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
347 if (intrin->intrinsic != nir_intrinsic_load_push_constant)
348 continue;
349
350 uint32_t base = nir_intrinsic_base(intrin);
351 uint32_t range = nir_intrinsic_range(intrin);
352 min = MIN2(min, base);
353 max = MAX2(max, base + range);
354 break;
355 }
356 }
357 }
358
359 if (min >= max) {
360 tu_shader->push_consts.lo = 0;
361 tu_shader->push_consts.count = 0;
362 tu_shader->ir3_shader.const_state.num_reserved_user_consts = 0;
363 return;
364 }
365
366 /* CP_LOAD_STATE OFFSET and NUM_UNIT are in units of vec4 (4 dwords),
367 * however there's an alignment requirement of 4 on OFFSET. Expand the
368 * range and change units accordingly.
369 */
370 tu_shader->push_consts.lo = (min / 16) / 4 * 4;
371 tu_shader->push_consts.count =
372 align(max, 16) / 16 - tu_shader->push_consts.lo;
373 tu_shader->ir3_shader.const_state.num_reserved_user_consts =
374 align(tu_shader->push_consts.count, 4);
375 }
376
377 /* Gather the InputAttachmentIndex for each input attachment from the NIR
378 * shader and organize the info in a way so that draw-time patching is easy.
379 */
380 static void
381 gather_input_attachments(nir_shader *shader, struct tu_shader *tu_shader,
382 const struct tu_pipeline_layout *layout)
383 {
384 nir_foreach_variable(var, &shader->uniforms) {
385 const struct glsl_type *glsl_type = glsl_without_array(var->type);
386
387 if (!glsl_type_is_image(glsl_type))
388 continue;
389
390 enum glsl_sampler_dim dim = glsl_get_sampler_dim(glsl_type);
391
392 const uint32_t set = var->data.descriptor_set;
393 const uint32_t binding = var->data.binding;
394 const struct tu_descriptor_set_binding_layout *bind_layout =
395 &layout->set[set].layout->binding[binding];
396 const uint32_t array_size = bind_layout->array_size;
397
398 if (dim == GLSL_SAMPLER_DIM_SUBPASS ||
399 dim == GLSL_SAMPLER_DIM_SUBPASS_MS) {
400 unsigned offset =
401 layout->set[set].input_attachment_start +
402 bind_layout->input_attachment_offset;
403 for (unsigned i = 0; i < array_size; i++)
404 tu_shader->attachment_idx[offset + i] = var->data.index + i;
405 }
406 }
407 }
408
409 static bool
410 tu_lower_io(nir_shader *shader, struct tu_shader *tu_shader,
411 const struct tu_pipeline_layout *layout)
412 {
413 bool progress = false;
414
415 gather_push_constants(shader, tu_shader);
416 gather_input_attachments(shader, tu_shader, layout);
417
418 nir_foreach_function(function, shader) {
419 if (function->impl)
420 progress |= lower_impl(function->impl, tu_shader, layout);
421 }
422
423 return progress;
424 }
425
426 static void
427 tu_gather_xfb_info(nir_shader *nir, struct tu_shader *shader)
428 {
429 struct ir3_stream_output_info *info = &shader->ir3_shader.stream_output;
430 nir_xfb_info *xfb = nir_gather_xfb_info(nir, NULL);
431
432 if (!xfb)
433 return;
434
435 /* creating a map from VARYING_SLOT_* enums to consecutive index */
436 uint8_t num_outputs = 0;
437 uint64_t outputs_written = 0;
438 for (int i = 0; i < xfb->output_count; i++)
439 outputs_written |= BITFIELD64_BIT(xfb->outputs[i].location);
440
441 uint8_t output_map[VARYING_SLOT_TESS_MAX];
442 memset(output_map, 0, sizeof(output_map));
443
444 for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
445 if (outputs_written & BITFIELD64_BIT(attr))
446 output_map[attr] = num_outputs++;
447 }
448
449 assert(xfb->output_count < IR3_MAX_SO_OUTPUTS);
450 info->num_outputs = xfb->output_count;
451
452 for (int i = 0; i < IR3_MAX_SO_BUFFERS; i++)
453 info->stride[i] = xfb->buffers[i].stride / 4;
454
455 for (int i = 0; i < xfb->output_count; i++) {
456 info->output[i].register_index = output_map[xfb->outputs[i].location];
457 info->output[i].start_component = xfb->outputs[i].component_offset;
458 info->output[i].num_components =
459 util_bitcount(xfb->outputs[i].component_mask);
460 info->output[i].output_buffer = xfb->outputs[i].buffer;
461 info->output[i].dst_offset = xfb->outputs[i].offset / 4;
462 info->output[i].stream = xfb->buffer_to_stream[xfb->outputs[i].buffer];
463 }
464
465 ralloc_free(xfb);
466 }
467
468 struct tu_shader *
469 tu_shader_create(struct tu_device *dev,
470 gl_shader_stage stage,
471 const VkPipelineShaderStageCreateInfo *stage_info,
472 struct tu_pipeline_layout *layout,
473 const VkAllocationCallbacks *alloc)
474 {
475 const struct tu_shader_module *module =
476 tu_shader_module_from_handle(stage_info->module);
477 struct tu_shader *shader;
478
479 const uint32_t max_variant_count = (stage == MESA_SHADER_VERTEX) ? 2 : 1;
480 shader = vk_zalloc2(
481 &dev->alloc, alloc,
482 sizeof(*shader) + sizeof(struct ir3_shader_variant) * max_variant_count,
483 8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
484 if (!shader)
485 return NULL;
486
487 /* translate SPIR-V to NIR */
488 assert(module->code_size % 4 == 0);
489 nir_shader *nir = tu_spirv_to_nir(
490 dev->compiler, (const uint32_t *) module->code, module->code_size / 4,
491 stage, stage_info->pName, stage_info->pSpecializationInfo);
492 if (!nir) {
493 vk_free2(&dev->alloc, alloc, shader);
494 return NULL;
495 }
496
497 if (unlikely(dev->physical_device->instance->debug_flags & TU_DEBUG_NIR)) {
498 fprintf(stderr, "translated nir:\n");
499 nir_print_shader(nir, stderr);
500 }
501
502 /* multi step inlining procedure */
503 NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
504 NIR_PASS_V(nir, nir_lower_returns);
505 NIR_PASS_V(nir, nir_inline_functions);
506 NIR_PASS_V(nir, nir_opt_deref);
507 foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
508 if (!func->is_entrypoint)
509 exec_node_remove(&func->node);
510 }
511 assert(exec_list_length(&nir->functions) == 1);
512 NIR_PASS_V(nir, nir_lower_variable_initializers, ~nir_var_function_temp);
513
514 /* Split member structs. We do this before lower_io_to_temporaries so that
515 * it doesn't lower system values to temporaries by accident.
516 */
517 NIR_PASS_V(nir, nir_split_var_copies);
518 NIR_PASS_V(nir, nir_split_per_member_structs);
519
520 NIR_PASS_V(nir, nir_remove_dead_variables,
521 nir_var_shader_in | nir_var_shader_out | nir_var_system_value | nir_var_mem_shared);
522
523 /* Gather information for transform feedback.
524 * This should be called after nir_split_per_member_structs.
525 * Also needs to be called after nir_remove_dead_variables with varyings,
526 * so that we could align stream outputs correctly.
527 */
528 if (nir->info.stage == MESA_SHADER_VERTEX ||
529 nir->info.stage == MESA_SHADER_TESS_EVAL ||
530 nir->info.stage == MESA_SHADER_GEOMETRY)
531 tu_gather_xfb_info(nir, shader);
532
533 NIR_PASS_V(nir, nir_propagate_invariant);
534
535 NIR_PASS_V(nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(nir), true, true);
536
537 NIR_PASS_V(nir, nir_lower_global_vars_to_local);
538 NIR_PASS_V(nir, nir_split_var_copies);
539 NIR_PASS_V(nir, nir_lower_var_copies);
540
541 NIR_PASS_V(nir, nir_opt_copy_prop_vars);
542 NIR_PASS_V(nir, nir_opt_combine_stores, nir_var_all);
543
544 /* ir3 doesn't support indirect input/output */
545 NIR_PASS_V(nir, nir_lower_indirect_derefs, nir_var_shader_in | nir_var_shader_out);
546
547 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
548
549 nir_assign_io_var_locations(&nir->inputs, &nir->num_inputs, stage);
550 nir_assign_io_var_locations(&nir->outputs, &nir->num_outputs, stage);
551
552 NIR_PASS_V(nir, nir_lower_system_values);
553 NIR_PASS_V(nir, nir_lower_frexp);
554
555 if (stage == MESA_SHADER_FRAGMENT)
556 NIR_PASS_V(nir, nir_lower_input_attachments, true);
557
558 NIR_PASS_V(nir, tu_lower_io, shader, layout);
559
560 NIR_PASS_V(nir, nir_lower_io, nir_var_all, ir3_glsl_type_size, 0);
561
562 if (stage == MESA_SHADER_FRAGMENT) {
563 /* NOTE: lower load_barycentric_at_sample first, since it
564 * produces load_barycentric_at_offset:
565 */
566 NIR_PASS_V(nir, ir3_nir_lower_load_barycentric_at_sample);
567 NIR_PASS_V(nir, ir3_nir_lower_load_barycentric_at_offset);
568
569 NIR_PASS_V(nir, ir3_nir_move_varying_inputs);
570 }
571
572 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
573
574 /* num_uniforms only used by ir3 for size of ubo 0 (push constants) */
575 nir->num_uniforms = MAX_PUSH_CONSTANTS_SIZE / 16;
576
577 shader->ir3_shader.compiler = dev->compiler;
578 shader->ir3_shader.type = stage;
579 shader->ir3_shader.nir = nir;
580
581 return shader;
582 }
583
584 void
585 tu_shader_destroy(struct tu_device *dev,
586 struct tu_shader *shader,
587 const VkAllocationCallbacks *alloc)
588 {
589 if (shader->ir3_shader.nir)
590 ralloc_free(shader->ir3_shader.nir);
591
592 for (uint32_t i = 0; i < 1 + shader->has_binning_pass; i++) {
593 if (shader->variants[i].ir)
594 ir3_destroy(shader->variants[i].ir);
595 }
596
597 if (shader->ir3_shader.const_state.immediates)
598 free(shader->ir3_shader.const_state.immediates);
599 if (shader->binary)
600 free(shader->binary);
601 if (shader->binning_binary)
602 free(shader->binning_binary);
603
604 vk_free2(&dev->alloc, alloc, shader);
605 }
606
607 void
608 tu_shader_compile_options_init(
609 struct tu_shader_compile_options *options,
610 const VkGraphicsPipelineCreateInfo *pipeline_info)
611 {
612 bool has_gs = false;
613 bool msaa = false;
614 if (pipeline_info) {
615 for (uint32_t i = 0; i < pipeline_info->stageCount; i++) {
616 if (pipeline_info->pStages[i].stage == VK_SHADER_STAGE_GEOMETRY_BIT) {
617 has_gs = true;
618 break;
619 }
620 }
621
622 const VkPipelineMultisampleStateCreateInfo *msaa_info = pipeline_info->pMultisampleState;
623 const struct VkPipelineSampleLocationsStateCreateInfoEXT *sample_locations =
624 vk_find_struct_const(msaa_info->pNext, PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT);
625 if (!pipeline_info->pRasterizationState->rasterizerDiscardEnable &&
626 (msaa_info->rasterizationSamples > 1 ||
627 /* also set msaa key when sample location is not the default
628 * since this affects varying interpolation */
629 (sample_locations && sample_locations->sampleLocationsEnable))) {
630 msaa = true;
631 }
632 }
633
634 *options = (struct tu_shader_compile_options) {
635 /* TODO: Populate the remaining fields of ir3_shader_key. */
636 .key = {
637 .has_gs = has_gs,
638 .msaa = msaa,
639 },
640 /* TODO: VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT
641 * some optimizations need to happen otherwise shader might not compile
642 */
643 .optimize = true,
644 .include_binning_pass = true,
645 };
646 }
647
648 static uint32_t *
649 tu_compile_shader_variant(struct ir3_shader *shader,
650 const struct ir3_shader_key *key,
651 struct ir3_shader_variant *nonbinning,
652 struct ir3_shader_variant *variant)
653 {
654 variant->shader = shader;
655 variant->type = shader->type;
656 variant->key = *key;
657 variant->binning_pass = !!nonbinning;
658 variant->nonbinning = nonbinning;
659
660 int ret = ir3_compile_shader_nir(shader->compiler, variant);
661 if (ret)
662 return NULL;
663
664 /* when assemble fails, we rely on tu_shader_destroy to clean up the
665 * variant
666 */
667 return ir3_shader_assemble(variant, shader->compiler->gpu_id);
668 }
669
670 VkResult
671 tu_shader_compile(struct tu_device *dev,
672 struct tu_shader *shader,
673 const struct tu_shader *next_stage,
674 const struct tu_shader_compile_options *options,
675 const VkAllocationCallbacks *alloc)
676 {
677 if (options->optimize) {
678 /* ignore the key for the first pass of optimization */
679 ir3_optimize_nir(&shader->ir3_shader, shader->ir3_shader.nir, NULL);
680
681 if (unlikely(dev->physical_device->instance->debug_flags &
682 TU_DEBUG_NIR)) {
683 fprintf(stderr, "optimized nir:\n");
684 nir_print_shader(shader->ir3_shader.nir, stderr);
685 }
686 }
687
688 shader->binary = tu_compile_shader_variant(
689 &shader->ir3_shader, &options->key, NULL, &shader->variants[0]);
690 if (!shader->binary)
691 return VK_ERROR_OUT_OF_HOST_MEMORY;
692
693 if (shader_debug_enabled(shader->ir3_shader.type)) {
694 fprintf(stdout, "Native code for unnamed %s shader %s:\n",
695 ir3_shader_stage(&shader->variants[0]), shader->ir3_shader.nir->info.name);
696 if (shader->ir3_shader.type == MESA_SHADER_FRAGMENT)
697 fprintf(stdout, "SIMD0\n");
698 ir3_shader_disasm(&shader->variants[0], shader->binary, stdout);
699 }
700
701 /* compile another variant for the binning pass */
702 if (options->include_binning_pass &&
703 shader->ir3_shader.type == MESA_SHADER_VERTEX) {
704 shader->binning_binary = tu_compile_shader_variant(
705 &shader->ir3_shader, &options->key, &shader->variants[0],
706 &shader->variants[1]);
707 if (!shader->binning_binary)
708 return VK_ERROR_OUT_OF_HOST_MEMORY;
709
710 shader->has_binning_pass = true;
711
712 if (shader_debug_enabled(MESA_SHADER_VERTEX)) {
713 fprintf(stdout, "Native code for unnamed binning shader %s:\n",
714 shader->ir3_shader.nir->info.name);
715 ir3_shader_disasm(&shader->variants[1], shader->binary, stdout);
716 }
717 }
718
719 if (unlikely(dev->physical_device->instance->debug_flags & TU_DEBUG_IR3)) {
720 fprintf(stderr, "disassembled ir3:\n");
721 fprintf(stderr, "shader: %s\n",
722 gl_shader_stage_name(shader->ir3_shader.type));
723 ir3_shader_disasm(&shader->variants[0], shader->binary, stderr);
724
725 if (shader->has_binning_pass) {
726 fprintf(stderr, "disassembled ir3:\n");
727 fprintf(stderr, "shader: %s (binning)\n",
728 gl_shader_stage_name(shader->ir3_shader.type));
729 ir3_shader_disasm(&shader->variants[1], shader->binning_binary,
730 stderr);
731 }
732 }
733
734 return VK_SUCCESS;
735 }
736
737 VkResult
738 tu_CreateShaderModule(VkDevice _device,
739 const VkShaderModuleCreateInfo *pCreateInfo,
740 const VkAllocationCallbacks *pAllocator,
741 VkShaderModule *pShaderModule)
742 {
743 TU_FROM_HANDLE(tu_device, device, _device);
744 struct tu_shader_module *module;
745
746 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO);
747 assert(pCreateInfo->flags == 0);
748 assert(pCreateInfo->codeSize % 4 == 0);
749
750 module = vk_alloc2(&device->alloc, pAllocator,
751 sizeof(*module) + pCreateInfo->codeSize, 8,
752 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
753 if (module == NULL)
754 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
755
756 module->code_size = pCreateInfo->codeSize;
757 memcpy(module->code, pCreateInfo->pCode, pCreateInfo->codeSize);
758
759 _mesa_sha1_compute(module->code, module->code_size, module->sha1);
760
761 *pShaderModule = tu_shader_module_to_handle(module);
762
763 return VK_SUCCESS;
764 }
765
766 void
767 tu_DestroyShaderModule(VkDevice _device,
768 VkShaderModule _module,
769 const VkAllocationCallbacks *pAllocator)
770 {
771 TU_FROM_HANDLE(tu_device, device, _device);
772 TU_FROM_HANDLE(tu_shader_module, module, _module);
773
774 if (!module)
775 return;
776
777 vk_free2(&device->alloc, pAllocator, module);
778 }