Updated code generation so that for vertex shader output position is written at last...
[mesa.git] / src / libre-soc / vulkan / libresoc_shader.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include "libresoc_shader.h"
29
30 static const struct nir_shader_compiler_options nir_options_llvm = {
31 .vertex_id_zero_based = false,
32 .lower_scmp = true,
33 .lower_flrp16 = true,
34 .lower_flrp32 = true,
35 .lower_flrp64 = true,
36 .lower_device_index_to_zero = true,
37 .lower_fsat = true,
38 .lower_fdiv = true,
39 .lower_fmod = true,
40 .lower_bitfield_insert_to_bitfield_select = true,
41 .lower_bitfield_extract = true,
42 .lower_sub = true,
43 .lower_pack_snorm_2x16 = true,
44 .lower_pack_snorm_4x8 = true,
45 .lower_pack_unorm_2x16 = true,
46 .lower_pack_unorm_4x8 = true,
47 .lower_unpack_snorm_2x16 = true,
48 .lower_unpack_snorm_4x8 = true,
49 .lower_unpack_unorm_2x16 = true,
50 .lower_unpack_unorm_4x8 = true,
51 .lower_extract_byte = true,
52 .lower_extract_word = true,
53 .lower_ffma = true,
54 .lower_fpow = true,
55 .lower_mul_2x32_64 = true,
56 .lower_rotate = true,
57 .use_scoped_barrier = true,
58 .max_unroll_iterations = 32,
59 .use_interpolated_input_intrinsics = true,
60 /* nir_lower_int64() isn't actually called for the LLVM backend, but
61 * this helps the loop unrolling heuristics. */
62 .lower_int64_options = nir_lower_imul64 |
63 nir_lower_imul_high64 |
64 nir_lower_imul_2x32_64 |
65 nir_lower_divmod64 |
66 nir_lower_minmax64 |
67 nir_lower_iabs64,
68 .lower_doubles_options = nir_lower_drcp |
69 nir_lower_dsqrt |
70 nir_lower_drsq |
71 nir_lower_ddiv,
72 };
73
74 static char *
75 libresoc_dump_nir_shaders(struct nir_shader * const *shaders,
76 int shader_count)
77 {
78 char *data = NULL;
79 char *ret = NULL;
80 size_t size = 0;
81 FILE *f = open_memstream(&data, &size);
82 if (f) {
83 for (int i = 0; i < shader_count; ++i)
84 nir_print_shader(shaders[i], f);
85 fclose(f);
86 }
87
88 ret = malloc(size + 1);
89 if (ret) {
90 memcpy(ret, data, size);
91 ret[size] = 0;
92 }
93 free(data);
94 return ret;
95 }
96
97 static void
98 shared_var_info(const struct glsl_type *type, unsigned *size, unsigned *align)
99 {
100 assert(glsl_type_is_vector_or_scalar(type));
101
102 uint32_t comp_size = glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
103 unsigned length = glsl_get_vector_elements(type);
104 *size = comp_size * length,
105 *align = comp_size;
106 }
107
108 nir_shader *
109 libresoc_shader_compile_to_nir(struct libresoc_device *device,
110 struct libresoc_shader_module *module,
111 const char *entrypoint_name,
112 gl_shader_stage stage,
113 const VkSpecializationInfo *spec_info,
114 const VkPipelineCreateFlags flags,
115 unsigned subgroup_size, unsigned ballot_bit_size)
116 {
117 nir_shader *nir;
118 const nir_shader_compiler_options *nir_options =
119 &nir_options_llvm;
120
121 if (module->nir) {
122 /* Some things such as our meta clear/blit code will give us a NIR
123 * shader directly. In that case, we just ignore the SPIR-V entirely
124 * and just use the NIR shader */
125 nir = module->nir;
126 nir->options = nir_options;
127 nir_validate_shader(nir, "in internal shader");
128
129 assert(exec_list_length(&nir->functions) == 1);
130 } else {
131 uint32_t *spirv = (uint32_t *) module->data;
132 assert(module->size % 4 == 0);
133
134 if (device->instance->debug_flags & LIBRESOC_DEBUG_DUMP_SPIRV)
135 libresoc_print_spirv(module->data, module->size, stderr);
136
137 uint32_t num_spec_entries = 0;
138 struct nir_spirv_specialization *spec_entries = NULL;
139 if (spec_info && spec_info->mapEntryCount > 0) {
140 num_spec_entries = spec_info->mapEntryCount;
141 spec_entries = calloc(num_spec_entries, sizeof(*spec_entries));
142 for (uint32_t i = 0; i < num_spec_entries; i++) {
143 VkSpecializationMapEntry entry = spec_info->pMapEntries[i];
144 const void *data = spec_info->pData + entry.offset;
145 assert(data + entry.size <= spec_info->pData + spec_info->dataSize);
146
147 spec_entries[i].id = spec_info->pMapEntries[i].constantID;
148 switch (entry.size) {
149 case 8:
150 spec_entries[i].value.u64 = *(const uint64_t *)data;
151 break;
152 case 4:
153 spec_entries[i].value.u32 = *(const uint32_t *)data;
154 break;
155 case 2:
156 spec_entries[i].value.u16 = *(const uint16_t *)data;
157 break;
158 case 1:
159 spec_entries[i].value.u8 = *(const uint8_t *)data;
160 break;
161 default:
162 assert(!"Invalid spec constant size");
163 break;
164 }
165 }
166 }
167
168 const struct spirv_to_nir_options spirv_options = {0};
169 nir = spirv_to_nir(spirv, module->size / 4,
170 spec_entries, num_spec_entries,
171 stage, entrypoint_name,
172 &spirv_options, nir_options);
173 assert(nir->info.stage == stage);
174 nir_validate_shader(nir, "after spirv_to_nir");
175
176 free(spec_entries);
177
178 /* We have to lower away local constant initializers right before we
179 * inline functions. That way they get properly initialized at the top
180 * of the function and not at the top of its caller.
181 */
182 NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
183 NIR_PASS_V(nir, nir_lower_returns);
184 NIR_PASS_V(nir, nir_inline_functions);
185 NIR_PASS_V(nir, nir_copy_prop);
186 NIR_PASS_V(nir, nir_opt_deref);
187
188 /* Pick off the single entrypoint that we want */
189 /* TODO: enable following code if I know what it is doing
190 foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
191 if (func->is_entrypoint)
192 func->name = ralloc_strdup(func, "main");
193 else
194 exec_node_remove(&func->node);
195 }
196 assert(exec_list_length(&nir->functions) == 1);
197 */
198
199 /* Make sure we lower constant initializers on output variables so that
200 * nir_remove_dead_variables below sees the corresponding stores
201 */
202 NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_shader_out);
203
204 /* Now that we've deleted all but the main function, we can go ahead and
205 * lower the rest of the constant initializers.
206 */
207 NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);
208
209 /* Split member structs. We do this before lower_io_to_temporaries so that
210 * it doesn't lower system values to temporaries by accident.
211 */
212 NIR_PASS_V(nir, nir_split_var_copies);
213 NIR_PASS_V(nir, nir_split_per_member_structs);
214
215 if (nir->info.stage == MESA_SHADER_FRAGMENT)
216 NIR_PASS_V(nir, nir_lower_io_to_vector, nir_var_shader_out);
217 if (nir->info.stage == MESA_SHADER_FRAGMENT)
218 NIR_PASS_V(nir, nir_lower_input_attachments,
219 &(nir_input_attachment_options) {
220 .use_fragcoord_sysval = true,
221 .use_layer_id_sysval = false,
222 });
223
224 NIR_PASS_V(nir, nir_remove_dead_variables,
225 nir_var_shader_in | nir_var_shader_out | nir_var_system_value | nir_var_mem_shared,
226 NULL);
227
228 NIR_PASS_V(nir, nir_propagate_invariant);
229
230 NIR_PASS_V(nir, nir_lower_system_values);
231 NIR_PASS_V(nir, nir_lower_compute_system_values, NULL);
232
233 NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
234
235 // if (device->instance->debug_flags & RADV_DEBUG_DISCARD_TO_DEMOTE)
236 // NIR_PASS_V(nir, nir_lower_discard_to_demote);
237
238 nir_lower_doubles_options lower_doubles =
239 nir->options->lower_doubles_options;
240 //TODO: if required enable following
241 //lower_doubles |= nir_lower_dfloor;
242
243
244 NIR_PASS_V(nir, nir_lower_doubles, NULL, lower_doubles);
245
246 /* Vulkan uses the separate-shader linking model */
247 nir->info.separate_shader = true;
248
249 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
250
251 if (nir->info.stage == MESA_SHADER_GEOMETRY)
252 nir_lower_gs_intrinsics(nir, true);
253
254 static const nir_lower_tex_options tex_options = {
255 .lower_txp = ~0,
256 .lower_tg4_offsets = true,
257 };
258
259 nir_lower_tex(nir, &tex_options);
260
261 nir_lower_vars_to_ssa(nir);
262
263 if (nir->info.stage == MESA_SHADER_VERTEX ||
264 nir->info.stage == MESA_SHADER_GEOMETRY ||
265 nir->info.stage == MESA_SHADER_FRAGMENT) {
266 NIR_PASS_V(nir, nir_lower_io_to_temporaries,
267 nir_shader_get_entrypoint(nir), true, true);
268 } else if (nir->info.stage == MESA_SHADER_TESS_EVAL) {
269 NIR_PASS_V(nir, nir_lower_io_to_temporaries,
270 nir_shader_get_entrypoint(nir), true, false);
271 }
272
273 nir_split_var_copies(nir);
274
275 nir_lower_global_vars_to_local(nir);
276 nir_remove_dead_variables(nir, nir_var_function_temp, NULL);
277 // bool gfx7minus = device->physical_device->rad_info.chip_class <= GFX7;
278 // nir_lower_subgroups(nir, &(struct nir_lower_subgroups_options) {
279 // .subgroup_size = subgroup_size,
280 // .ballot_bit_size = ballot_bit_size,
281 // .lower_to_scalar = 1,
282 // .lower_subgroup_masks = 1,
283 // .lower_shuffle = 1,
284 // .lower_shuffle_to_32bit = 1,
285 // .lower_vote_eq_to_ballot = 1,
286 // .lower_quad_broadcast_dynamic = 1,
287 // .lower_quad_broadcast_dynamic_to_const = gfx7minus,
288 // .lower_shuffle_to_swizzle_amd = 1,
289 // });
290
291 nir_lower_load_const_to_scalar(nir);
292
293 // if (!(flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT))
294 // radv_optimize_nir(nir, false, true);
295
296 /* call radv_nir_lower_ycbcr_textures() late as there might still be
297 * tex with undef texture/sampler before first optimization */
298 // NIR_PASS_V(nir, radv_nir_lower_ycbcr_textures, layout);
299
300 /* We call nir_lower_var_copies() after the first radv_optimize_nir()
301 * to remove any copies introduced by nir_opt_find_array_copies().
302 */
303 nir_lower_var_copies(nir);
304
305 /* Lower deref operations for compute shared memory. */
306 if (nir->info.stage == MESA_SHADER_COMPUTE) {
307 NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
308 nir_var_mem_shared, shared_var_info);
309 NIR_PASS_V(nir, nir_lower_explicit_io,
310 nir_var_mem_shared, nir_address_format_32bit_offset);
311 }
312
313 /* Lower large variables that are always constant with load_constant
314 * intrinsics, which get turned into PC-relative loads from a data
315 * section next to the shader.
316 */
317 NIR_PASS_V(nir, nir_opt_large_constants,
318 glsl_get_natural_size_align_bytes, 16);
319
320 /* Indirect lowering must be called after the radv_optimize_nir() loop
321 * has been called at least once. Otherwise indirect lowering can
322 * bloat the instruction count of the loop and cause it to be
323 * considered too large for unrolling.
324 */
325 // ac_lower_indirect_derefs(nir, device->physical_device->rad_info.chip_class);
326 // radv_optimize_nir(nir, flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT, false);
327
328 if (device->instance->debug_flags & LIBRESOC_DEBUG_DUMP_NIR)
329 nir_print_shader(nir, stderr);
330 }
331 return nir;
332 }
333
334 VkResult
335 libresoc_CreateShaderModule(VkDevice _device,
336 const VkShaderModuleCreateInfo *pCreateInfo,
337 const VkAllocationCallbacks *pAllocator,
338 VkShaderModule *pShaderModule)
339 {
340 LIBRESOC_FROM_HANDLE(libresoc_device, device, _device);
341 struct libresoc_shader_module *module;
342
343 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO);
344 assert(pCreateInfo->flags == 0);
345
346 module = vk_alloc2(&device->vk.alloc, pAllocator,
347 sizeof(*module) + pCreateInfo->codeSize, 8,
348 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
349 if (module == NULL)
350 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
351
352 vk_object_base_init(&device->vk, &module->base,
353 VK_OBJECT_TYPE_SHADER_MODULE);
354
355 module->nir = NULL;
356 module->size = pCreateInfo->codeSize;
357 memcpy(module->data, pCreateInfo->pCode, module->size);
358
359 _mesa_sha1_compute(module->data, module->size, module->sha1);
360
361 *pShaderModule = libresoc_shader_module_to_handle(module);
362
363 return VK_SUCCESS;
364 }
365
366 void
367 libresoc_DestroyShaderModule(VkDevice _device,
368 VkShaderModule _module,
369 const VkAllocationCallbacks *pAllocator)
370 {
371 LIBRESOC_FROM_HANDLE(libresoc_device, device, _device);
372 LIBRESOC_FROM_HANDLE(libresoc_shader_module, module, _module);
373
374 if (!module)
375 return;
376
377 vk_object_base_finish(&module->base);
378 vk_free2(&device->vk.alloc, pAllocator, module);
379 }