Updated code generation so that for vertex shader output position is written at last...
[mesa.git] / src / libre-soc / vulkan / libresoc_shader.c
index d74b6b13d07f7c84f40c741a60ed47e961488c76..fef2f00d5678d32dbbbd5b998d5255c97e6b8345 100644 (file)
@@ -28,7 +28,7 @@
 #include "libresoc_shader.h"
 
 static const struct nir_shader_compiler_options nir_options_llvm = {
-       .vertex_id_zero_based = true,
+       .vertex_id_zero_based = false,
        .lower_scmp = true,
        .lower_flrp16 = true,
        .lower_flrp32 = true,
@@ -94,6 +94,17 @@ libresoc_dump_nir_shaders(struct nir_shader * const *shaders,
        return ret;
 }
 
+static void
+shared_var_info(const struct glsl_type *type, unsigned *size, unsigned *align)
+{
+       assert(glsl_type_is_vector_or_scalar(type));
+
+       uint32_t comp_size = glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
+       unsigned length = glsl_get_vector_elements(type);
+       *size = comp_size * length,
+       *align = comp_size;
+}
+
 nir_shader *
 libresoc_shader_compile_to_nir(struct libresoc_device *device,
                           struct libresoc_shader_module *module,
@@ -162,10 +173,160 @@ libresoc_shader_compile_to_nir(struct libresoc_device *device,
                assert(nir->info.stage == stage);
                nir_validate_shader(nir, "after spirv_to_nir");
 
+               free(spec_entries);
+
+               /* We have to lower away local constant initializers right before we
+                * inline functions.  That way they get properly initialized at the top
+                * of the function and not at the top of its caller.
+                */
+               NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
+               NIR_PASS_V(nir, nir_lower_returns);
+               NIR_PASS_V(nir, nir_inline_functions);
+               NIR_PASS_V(nir, nir_copy_prop);
+               NIR_PASS_V(nir, nir_opt_deref);
+
+               /* Pick off the single entrypoint that we want */
+        /* TODO: enable following code if I know what it is doing
+               foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
+                       if (func->is_entrypoint)
+                               func->name = ralloc_strdup(func, "main");
+                       else
+                               exec_node_remove(&func->node);
+               }
+               assert(exec_list_length(&nir->functions) == 1);
+        */
+
+               /* Make sure we lower constant initializers on output variables so that
+                * nir_remove_dead_variables below sees the corresponding stores
+                */
+               NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_shader_out);
+
+               /* Now that we've deleted all but the main function, we can go ahead and
+                * lower the rest of the constant initializers.
+                */
+               NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);
+
+               /* Split member structs.  We do this before lower_io_to_temporaries so that
+                * it doesn't lower system values to temporaries by accident.
+                */
+               NIR_PASS_V(nir, nir_split_var_copies);
+               NIR_PASS_V(nir, nir_split_per_member_structs);
+
+               if (nir->info.stage == MESA_SHADER_FRAGMENT)
+                        NIR_PASS_V(nir, nir_lower_io_to_vector, nir_var_shader_out);
+               if (nir->info.stage == MESA_SHADER_FRAGMENT)
+                       NIR_PASS_V(nir, nir_lower_input_attachments,
+                                  &(nir_input_attachment_options) {
+                                       .use_fragcoord_sysval = true,
+                                       .use_layer_id_sysval = false,
+                                  });
+
+               NIR_PASS_V(nir, nir_remove_dead_variables,
+                          nir_var_shader_in | nir_var_shader_out | nir_var_system_value | nir_var_mem_shared,
+                          NULL);
+
+               NIR_PASS_V(nir, nir_propagate_invariant);
+
+               NIR_PASS_V(nir, nir_lower_system_values);
+               NIR_PASS_V(nir, nir_lower_compute_system_values, NULL);
+
+               NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
+
+               // if (device->instance->debug_flags & RADV_DEBUG_DISCARD_TO_DEMOTE)
+               //      NIR_PASS_V(nir, nir_lower_discard_to_demote);
+
+               nir_lower_doubles_options lower_doubles =
+                       nir->options->lower_doubles_options;
+            //TODO: if required enable following
+                       //lower_doubles |= nir_lower_dfloor;
+
+
+               NIR_PASS_V(nir, nir_lower_doubles, NULL, lower_doubles);
+
+       /* Vulkan uses the separate-shader linking model */
+       nir->info.separate_shader = true;
+
+       nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
+
+       if (nir->info.stage == MESA_SHADER_GEOMETRY)
+               nir_lower_gs_intrinsics(nir, true);
+
+       static const nir_lower_tex_options tex_options = {
+         .lower_txp = ~0,
+         .lower_tg4_offsets = true,
+       };
+
+       nir_lower_tex(nir, &tex_options);
+
+       nir_lower_vars_to_ssa(nir);
+
+       if (nir->info.stage == MESA_SHADER_VERTEX ||
+           nir->info.stage == MESA_SHADER_GEOMETRY ||
+           nir->info.stage == MESA_SHADER_FRAGMENT) {
+               NIR_PASS_V(nir, nir_lower_io_to_temporaries,
+                          nir_shader_get_entrypoint(nir), true, true);
+       } else if (nir->info.stage == MESA_SHADER_TESS_EVAL) {
+               NIR_PASS_V(nir, nir_lower_io_to_temporaries,
+                          nir_shader_get_entrypoint(nir), true, false);
+       }
+
+       nir_split_var_copies(nir);
+
+       nir_lower_global_vars_to_local(nir);
+       nir_remove_dead_variables(nir, nir_var_function_temp, NULL);
+       // bool gfx7minus = device->physical_device->rad_info.chip_class <= GFX7;
+       // nir_lower_subgroups(nir, &(struct nir_lower_subgroups_options) {
+       //              .subgroup_size = subgroup_size,
+       //              .ballot_bit_size = ballot_bit_size,
+       //              .lower_to_scalar = 1,
+       //              .lower_subgroup_masks = 1,
+       //              .lower_shuffle = 1,
+       //              .lower_shuffle_to_32bit = 1,
+       //              .lower_vote_eq_to_ballot = 1,
+       //              .lower_quad_broadcast_dynamic = 1,
+       //              .lower_quad_broadcast_dynamic_to_const = gfx7minus,
+       //              .lower_shuffle_to_swizzle_amd = 1,
+       //      });
+
+       nir_lower_load_const_to_scalar(nir);
+
+       // if (!(flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT))
+       //      radv_optimize_nir(nir, false, true);
+
+       /* call radv_nir_lower_ycbcr_textures() late as there might still be
+        * tex with undef texture/sampler before first optimization */
+       // NIR_PASS_V(nir, radv_nir_lower_ycbcr_textures, layout);
+
+       /* We call nir_lower_var_copies() after the first radv_optimize_nir()
+        * to remove any copies introduced by nir_opt_find_array_copies().
+        */
+       nir_lower_var_copies(nir);
+
+       /* Lower deref operations for compute shared memory. */
+       if (nir->info.stage == MESA_SHADER_COMPUTE) {
+               NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
+                          nir_var_mem_shared, shared_var_info);
+               NIR_PASS_V(nir, nir_lower_explicit_io,
+                          nir_var_mem_shared, nir_address_format_32bit_offset);
+       }
+
+       /* Lower large variables that are always constant with load_constant
+        * intrinsics, which get turned into PC-relative loads from a data
+        * section next to the shader.
+        */
+       NIR_PASS_V(nir, nir_opt_large_constants,
+                  glsl_get_natural_size_align_bytes, 16);
+
+       /* Indirect lowering must be called after the radv_optimize_nir() loop
+        * has been called at least once. Otherwise indirect lowering can
+        * bloat the instruction count of the loop and cause it to be
+        * considered too large for unrolling.
+        */
+       // ac_lower_indirect_derefs(nir, device->physical_device->rad_info.chip_class);
+       // radv_optimize_nir(nir, flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT, false);
+
                if (device->instance->debug_flags & LIBRESOC_DEBUG_DUMP_NIR)
                        nir_print_shader(nir, stderr);
-        libresoc_nir_translate(&device->instance->llvm_ref, nir);
-               free(spec_entries);
        }
 return nir;
 }