2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28 #include "libresoc_shader.h"
30 static const struct nir_shader_compiler_options nir_options_llvm
= {
31 .vertex_id_zero_based
= false,
36 .lower_device_index_to_zero
= true,
40 .lower_bitfield_insert_to_bitfield_select
= true,
41 .lower_bitfield_extract
= true,
43 .lower_pack_snorm_2x16
= true,
44 .lower_pack_snorm_4x8
= true,
45 .lower_pack_unorm_2x16
= true,
46 .lower_pack_unorm_4x8
= true,
47 .lower_unpack_snorm_2x16
= true,
48 .lower_unpack_snorm_4x8
= true,
49 .lower_unpack_unorm_2x16
= true,
50 .lower_unpack_unorm_4x8
= true,
51 .lower_extract_byte
= true,
52 .lower_extract_word
= true,
55 .lower_mul_2x32_64
= true,
57 .use_scoped_barrier
= true,
58 .max_unroll_iterations
= 32,
59 .use_interpolated_input_intrinsics
= true,
60 /* nir_lower_int64() isn't actually called for the LLVM backend, but
61 * this helps the loop unrolling heuristics. */
62 .lower_int64_options
= nir_lower_imul64
|
63 nir_lower_imul_high64
|
64 nir_lower_imul_2x32_64
|
68 .lower_doubles_options
= nir_lower_drcp
|
75 libresoc_dump_nir_shaders(struct nir_shader
* const *shaders
,
81 FILE *f
= open_memstream(&data
, &size
);
83 for (int i
= 0; i
< shader_count
; ++i
)
84 nir_print_shader(shaders
[i
], f
);
88 ret
= malloc(size
+ 1);
90 memcpy(ret
, data
, size
);
98 shared_var_info(const struct glsl_type
*type
, unsigned *size
, unsigned *align
)
100 assert(glsl_type_is_vector_or_scalar(type
));
102 uint32_t comp_size
= glsl_type_is_boolean(type
) ? 4 : glsl_get_bit_size(type
) / 8;
103 unsigned length
= glsl_get_vector_elements(type
);
104 *size
= comp_size
* length
,
109 libresoc_shader_compile_to_nir(struct libresoc_device
*device
,
110 struct libresoc_shader_module
*module
,
111 const char *entrypoint_name
,
112 gl_shader_stage stage
,
113 const VkSpecializationInfo
*spec_info
,
114 const VkPipelineCreateFlags flags
,
115 unsigned subgroup_size
, unsigned ballot_bit_size
)
118 const nir_shader_compiler_options
*nir_options
=
122 /* Some things such as our meta clear/blit code will give us a NIR
123 * shader directly. In that case, we just ignore the SPIR-V entirely
124 * and just use the NIR shader */
126 nir
->options
= nir_options
;
127 nir_validate_shader(nir
, "in internal shader");
129 assert(exec_list_length(&nir
->functions
) == 1);
131 uint32_t *spirv
= (uint32_t *) module
->data
;
132 assert(module
->size
% 4 == 0);
134 if (device
->instance
->debug_flags
& LIBRESOC_DEBUG_DUMP_SPIRV
)
135 libresoc_print_spirv(module
->data
, module
->size
, stderr
);
137 uint32_t num_spec_entries
= 0;
138 struct nir_spirv_specialization
*spec_entries
= NULL
;
139 if (spec_info
&& spec_info
->mapEntryCount
> 0) {
140 num_spec_entries
= spec_info
->mapEntryCount
;
141 spec_entries
= calloc(num_spec_entries
, sizeof(*spec_entries
));
142 for (uint32_t i
= 0; i
< num_spec_entries
; i
++) {
143 VkSpecializationMapEntry entry
= spec_info
->pMapEntries
[i
];
144 const void *data
= spec_info
->pData
+ entry
.offset
;
145 assert(data
+ entry
.size
<= spec_info
->pData
+ spec_info
->dataSize
);
147 spec_entries
[i
].id
= spec_info
->pMapEntries
[i
].constantID
;
148 switch (entry
.size
) {
150 spec_entries
[i
].value
.u64
= *(const uint64_t *)data
;
153 spec_entries
[i
].value
.u32
= *(const uint32_t *)data
;
156 spec_entries
[i
].value
.u16
= *(const uint16_t *)data
;
159 spec_entries
[i
].value
.u8
= *(const uint8_t *)data
;
162 assert(!"Invalid spec constant size");
168 const struct spirv_to_nir_options spirv_options
= {0};
169 nir
= spirv_to_nir(spirv
, module
->size
/ 4,
170 spec_entries
, num_spec_entries
,
171 stage
, entrypoint_name
,
172 &spirv_options
, nir_options
);
173 assert(nir
->info
.stage
== stage
);
174 nir_validate_shader(nir
, "after spirv_to_nir");
178 /* We have to lower away local constant initializers right before we
179 * inline functions. That way they get properly initialized at the top
180 * of the function and not at the top of its caller.
182 NIR_PASS_V(nir
, nir_lower_variable_initializers
, nir_var_function_temp
);
183 NIR_PASS_V(nir
, nir_lower_returns
);
184 NIR_PASS_V(nir
, nir_inline_functions
);
185 NIR_PASS_V(nir
, nir_copy_prop
);
186 NIR_PASS_V(nir
, nir_opt_deref
);
188 /* Pick off the single entrypoint that we want */
189 /* TODO: enable following code if I know what it is doing
190 foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
191 if (func->is_entrypoint)
192 func->name = ralloc_strdup(func, "main");
194 exec_node_remove(&func->node);
196 assert(exec_list_length(&nir->functions) == 1);
199 /* Make sure we lower constant initializers on output variables so that
200 * nir_remove_dead_variables below sees the corresponding stores
202 NIR_PASS_V(nir
, nir_lower_variable_initializers
, nir_var_shader_out
);
204 /* Now that we've deleted all but the main function, we can go ahead and
205 * lower the rest of the constant initializers.
207 NIR_PASS_V(nir
, nir_lower_variable_initializers
, ~0);
209 /* Split member structs. We do this before lower_io_to_temporaries so that
210 * it doesn't lower system values to temporaries by accident.
212 NIR_PASS_V(nir
, nir_split_var_copies
);
213 NIR_PASS_V(nir
, nir_split_per_member_structs
);
215 if (nir
->info
.stage
== MESA_SHADER_FRAGMENT
)
216 NIR_PASS_V(nir
, nir_lower_io_to_vector
, nir_var_shader_out
);
217 if (nir
->info
.stage
== MESA_SHADER_FRAGMENT
)
218 NIR_PASS_V(nir
, nir_lower_input_attachments
,
219 &(nir_input_attachment_options
) {
220 .use_fragcoord_sysval
= true,
221 .use_layer_id_sysval
= false,
224 NIR_PASS_V(nir
, nir_remove_dead_variables
,
225 nir_var_shader_in
| nir_var_shader_out
| nir_var_system_value
| nir_var_mem_shared
,
228 NIR_PASS_V(nir
, nir_propagate_invariant
);
230 NIR_PASS_V(nir
, nir_lower_system_values
);
231 NIR_PASS_V(nir
, nir_lower_compute_system_values
, NULL
);
233 NIR_PASS_V(nir
, nir_lower_clip_cull_distance_arrays
);
235 // if (device->instance->debug_flags & RADV_DEBUG_DISCARD_TO_DEMOTE)
236 // NIR_PASS_V(nir, nir_lower_discard_to_demote);
238 nir_lower_doubles_options lower_doubles
=
239 nir
->options
->lower_doubles_options
;
240 //TODO: if required enable following
241 //lower_doubles |= nir_lower_dfloor;
244 NIR_PASS_V(nir
, nir_lower_doubles
, NULL
, lower_doubles
);
246 /* Vulkan uses the separate-shader linking model */
247 nir
->info
.separate_shader
= true;
249 nir_shader_gather_info(nir
, nir_shader_get_entrypoint(nir
));
251 if (nir
->info
.stage
== MESA_SHADER_GEOMETRY
)
252 nir_lower_gs_intrinsics(nir
, true);
254 static const nir_lower_tex_options tex_options
= {
256 .lower_tg4_offsets
= true,
259 nir_lower_tex(nir
, &tex_options
);
261 nir_lower_vars_to_ssa(nir
);
263 if (nir
->info
.stage
== MESA_SHADER_VERTEX
||
264 nir
->info
.stage
== MESA_SHADER_GEOMETRY
||
265 nir
->info
.stage
== MESA_SHADER_FRAGMENT
) {
266 NIR_PASS_V(nir
, nir_lower_io_to_temporaries
,
267 nir_shader_get_entrypoint(nir
), true, true);
268 } else if (nir
->info
.stage
== MESA_SHADER_TESS_EVAL
) {
269 NIR_PASS_V(nir
, nir_lower_io_to_temporaries
,
270 nir_shader_get_entrypoint(nir
), true, false);
273 nir_split_var_copies(nir
);
275 nir_lower_global_vars_to_local(nir
);
276 nir_remove_dead_variables(nir
, nir_var_function_temp
, NULL
);
277 // bool gfx7minus = device->physical_device->rad_info.chip_class <= GFX7;
278 // nir_lower_subgroups(nir, &(struct nir_lower_subgroups_options) {
279 // .subgroup_size = subgroup_size,
280 // .ballot_bit_size = ballot_bit_size,
281 // .lower_to_scalar = 1,
282 // .lower_subgroup_masks = 1,
283 // .lower_shuffle = 1,
284 // .lower_shuffle_to_32bit = 1,
285 // .lower_vote_eq_to_ballot = 1,
286 // .lower_quad_broadcast_dynamic = 1,
287 // .lower_quad_broadcast_dynamic_to_const = gfx7minus,
288 // .lower_shuffle_to_swizzle_amd = 1,
291 nir_lower_load_const_to_scalar(nir
);
293 // if (!(flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT))
294 // radv_optimize_nir(nir, false, true);
296 /* call radv_nir_lower_ycbcr_textures() late as there might still be
297 * tex with undef texture/sampler before first optimization */
298 // NIR_PASS_V(nir, radv_nir_lower_ycbcr_textures, layout);
300 /* We call nir_lower_var_copies() after the first radv_optimize_nir()
301 * to remove any copies introduced by nir_opt_find_array_copies().
303 nir_lower_var_copies(nir
);
305 /* Lower deref operations for compute shared memory. */
306 if (nir
->info
.stage
== MESA_SHADER_COMPUTE
) {
307 NIR_PASS_V(nir
, nir_lower_vars_to_explicit_types
,
308 nir_var_mem_shared
, shared_var_info
);
309 NIR_PASS_V(nir
, nir_lower_explicit_io
,
310 nir_var_mem_shared
, nir_address_format_32bit_offset
);
313 /* Lower large variables that are always constant with load_constant
314 * intrinsics, which get turned into PC-relative loads from a data
315 * section next to the shader.
317 NIR_PASS_V(nir
, nir_opt_large_constants
,
318 glsl_get_natural_size_align_bytes
, 16);
320 /* Indirect lowering must be called after the radv_optimize_nir() loop
321 * has been called at least once. Otherwise indirect lowering can
322 * bloat the instruction count of the loop and cause it to be
323 * considered too large for unrolling.
325 // ac_lower_indirect_derefs(nir, device->physical_device->rad_info.chip_class);
326 // radv_optimize_nir(nir, flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT, false);
328 if (device
->instance
->debug_flags
& LIBRESOC_DEBUG_DUMP_NIR
)
329 nir_print_shader(nir
, stderr
);
335 libresoc_CreateShaderModule(VkDevice _device
,
336 const VkShaderModuleCreateInfo
*pCreateInfo
,
337 const VkAllocationCallbacks
*pAllocator
,
338 VkShaderModule
*pShaderModule
)
340 LIBRESOC_FROM_HANDLE(libresoc_device
, device
, _device
);
341 struct libresoc_shader_module
*module
;
343 assert(pCreateInfo
->sType
== VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO
);
344 assert(pCreateInfo
->flags
== 0);
346 module
= vk_alloc2(&device
->vk
.alloc
, pAllocator
,
347 sizeof(*module
) + pCreateInfo
->codeSize
, 8,
348 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
350 return vk_error(device
->instance
, VK_ERROR_OUT_OF_HOST_MEMORY
);
352 vk_object_base_init(&device
->vk
, &module
->base
,
353 VK_OBJECT_TYPE_SHADER_MODULE
);
356 module
->size
= pCreateInfo
->codeSize
;
357 memcpy(module
->data
, pCreateInfo
->pCode
, module
->size
);
359 _mesa_sha1_compute(module
->data
, module
->size
, module
->sha1
);
361 *pShaderModule
= libresoc_shader_module_to_handle(module
);
367 libresoc_DestroyShaderModule(VkDevice _device
,
368 VkShaderModule _module
,
369 const VkAllocationCallbacks
*pAllocator
)
371 LIBRESOC_FROM_HANDLE(libresoc_device
, device
, _device
);
372 LIBRESOC_FROM_HANDLE(libresoc_shader_module
, module
, _module
);
377 vk_object_base_finish(&module
->base
);
378 vk_free2(&device
->vk
.alloc
, pAllocator
, module
);