src/libre-soc/vulkan/libresoc_shader.c

   1 /*
   2  * Copyright © 2016 Red Hat.
   3  * Copyright © 2016 Bas Nieuwenhuizen
   4  *
   5  * based in part on anv driver which is:
   6  * Copyright © 2015 Intel Corporation
   7  *
   8  * Permission is hereby granted, free of charge, to any person obtaining a
   9  * copy of this software and associated documentation files (the "Software"),
  10  * to deal in the Software without restriction, including without limitation
  11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  12  * and/or sell copies of the Software, and to permit persons to whom the
  13  * Software is furnished to do so, subject to the following conditions:
  14  *
  15  * The above copyright notice and this permission notice (including the next
  16  * paragraph) shall be included in all copies or substantial portions of the
  17  * Software.
  18  *
  19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  20  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  22  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  23  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  24  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  25  * IN THE SOFTWARE.
  26  */
  27
  28 #include "libresoc_shader.h"
  29
  30 static const struct nir_shader_compiler_options nir_options_llvm = {
  31         .vertex_id_zero_based = false,
  32         .lower_scmp = true,
  33         .lower_flrp16 = true,
  34         .lower_flrp32 = true,
  35         .lower_flrp64 = true,
  36         .lower_device_index_to_zero = true,
  37         .lower_fsat = true,
  38         .lower_fdiv = true,
  39         .lower_fmod = true,
  40         .lower_bitfield_insert_to_bitfield_select = true,
  41         .lower_bitfield_extract = true,
  42         .lower_sub = true,
  43         .lower_pack_snorm_2x16 = true,
  44         .lower_pack_snorm_4x8 = true,
  45         .lower_pack_unorm_2x16 = true,
  46         .lower_pack_unorm_4x8 = true,
  47         .lower_unpack_snorm_2x16 = true,
  48         .lower_unpack_snorm_4x8 = true,
  49         .lower_unpack_unorm_2x16 = true,
  50         .lower_unpack_unorm_4x8 = true,
  51         .lower_extract_byte = true,
  52         .lower_extract_word = true,
  53         .lower_ffma = true,
  54         .lower_fpow = true,
  55         .lower_mul_2x32_64 = true,
  56         .lower_rotate = true,
  57         .use_scoped_barrier = true,
  58         .max_unroll_iterations = 32,
  59         .use_interpolated_input_intrinsics = true,
  60         /* nir_lower_int64() isn't actually called for the LLVM backend, but
  61          * this helps the loop unrolling heuristics. */
  62         .lower_int64_options = nir_lower_imul64 |
  63                                nir_lower_imul_high64 |
  64                                nir_lower_imul_2x32_64 |
  65                                nir_lower_divmod64 |
  66                                nir_lower_minmax64 |
  67                                nir_lower_iabs64,
  68         .lower_doubles_options = nir_lower_drcp |
  69                                  nir_lower_dsqrt |
  70                                  nir_lower_drsq |
  71                                  nir_lower_ddiv,
  72 };
  73
  74 static char *
  75 libresoc_dump_nir_shaders(struct nir_shader * const *shaders,
  76                       int shader_count)
  77 {
  78         char *data = NULL;
  79         char *ret = NULL;
  80         size_t size = 0;
  81         FILE *f = open_memstream(&data, &size);
  82         if (f) {
  83                 for (int i = 0; i < shader_count; ++i)
  84                         nir_print_shader(shaders[i], f);
  85                 fclose(f);
  86         }
  87
  88         ret = malloc(size + 1);
  89         if (ret) {
  90                 memcpy(ret, data, size);
  91                 ret[size] = 0;
  92         }
  93         free(data);
  94         return ret;
  95 }
  96
  97 static void
  98 shared_var_info(const struct glsl_type *type, unsigned *size, unsigned *align)
  99 {
 100         assert(glsl_type_is_vector_or_scalar(type));
 101
 102         uint32_t comp_size = glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
 103         unsigned length = glsl_get_vector_elements(type);
 104         *size = comp_size * length,
 105         *align = comp_size;
 106 }
 107
 108 nir_shader *
 109 libresoc_shader_compile_to_nir(struct libresoc_device *device,
 110                            struct libresoc_shader_module *module,
 111                            const char *entrypoint_name,
 112                            gl_shader_stage stage,
 113                            const VkSpecializationInfo *spec_info,
 114                            const VkPipelineCreateFlags flags,
 115                            unsigned subgroup_size, unsigned ballot_bit_size)
 116 {
 117         nir_shader *nir;
 118         const nir_shader_compiler_options *nir_options =
 119                 &nir_options_llvm;
 120
 121         if (module->nir) {
 122                 /* Some things such as our meta clear/blit code will give us a NIR
 123                  * shader directly.  In that case, we just ignore the SPIR-V entirely
 124                  * and just use the NIR shader */
 125                 nir = module->nir;
 126                 nir->options = nir_options;
 127                 nir_validate_shader(nir, "in internal shader");
 128
 129                 assert(exec_list_length(&nir->functions) == 1);
 130         } else {
 131                 uint32_t *spirv = (uint32_t *) module->data;
 132                 assert(module->size % 4 == 0);
 133
 134                 if (device->instance->debug_flags & LIBRESOC_DEBUG_DUMP_SPIRV)
 135                         libresoc_print_spirv(module->data, module->size, stderr);
 136
 137                 uint32_t num_spec_entries = 0;
 138                 struct nir_spirv_specialization *spec_entries = NULL;
 139                 if (spec_info && spec_info->mapEntryCount > 0) {
 140                         num_spec_entries = spec_info->mapEntryCount;
 141                         spec_entries = calloc(num_spec_entries, sizeof(*spec_entries));
 142                         for (uint32_t i = 0; i < num_spec_entries; i++) {
 143                                 VkSpecializationMapEntry entry = spec_info->pMapEntries[i];
 144                                 const void *data = spec_info->pData + entry.offset;
 145                                 assert(data + entry.size <= spec_info->pData + spec_info->dataSize);
 146
 147                                 spec_entries[i].id = spec_info->pMapEntries[i].constantID;
 148                                 switch (entry.size) {
 149                                 case 8:
 150                                         spec_entries[i].value.u64 = *(const uint64_t *)data;
 151                                         break;
 152                                 case 4:
 153                                         spec_entries[i].value.u32 = *(const uint32_t *)data;
 154                                         break;
 155                                 case 2:
 156                                         spec_entries[i].value.u16 = *(const uint16_t *)data;
 157                                         break;
 158                                 case 1:
 159                                         spec_entries[i].value.u8 = *(const uint8_t *)data;
 160                                         break;
 161                                 default:
 162                                         assert(!"Invalid spec constant size");
 163                                         break;
 164                                 }
 165                         }
 166                 }
 167
 168                 const struct spirv_to_nir_options spirv_options = {0};
 169                 nir = spirv_to_nir(spirv, module->size / 4,
 170                                    spec_entries, num_spec_entries,
 171                                    stage, entrypoint_name,
 172                                    &spirv_options, nir_options);
 173                 assert(nir->info.stage == stage);
 174                 nir_validate_shader(nir, "after spirv_to_nir");
 175
 176                 free(spec_entries);
 177
 178                 /* We have to lower away local constant initializers right before we
 179                  * inline functions.  That way they get properly initialized at the top
 180                  * of the function and not at the top of its caller.
 181                  */
 182                 NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
 183                 NIR_PASS_V(nir, nir_lower_returns);
 184                 NIR_PASS_V(nir, nir_inline_functions);
 185                 NIR_PASS_V(nir, nir_copy_prop);
 186                 NIR_PASS_V(nir, nir_opt_deref);
 187
 188                 /* Pick off the single entrypoint that we want */
 189         /* TODO: enable following code if I know what it is doing
 190                 foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
 191                         if (func->is_entrypoint)
 192                                 func->name = ralloc_strdup(func, "main");
 193                         else
 194                                 exec_node_remove(&func->node);
 195                 }
 196                 assert(exec_list_length(&nir->functions) == 1);
 197         */
 198
 199                 /* Make sure we lower constant initializers on output variables so that
 200                  * nir_remove_dead_variables below sees the corresponding stores
 201                  */
 202                 NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_shader_out);
 203
 204                 /* Now that we've deleted all but the main function, we can go ahead and
 205                  * lower the rest of the constant initializers.
 206                  */
 207                 NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);
 208
 209                 /* Split member structs.  We do this before lower_io_to_temporaries so that
 210                  * it doesn't lower system values to temporaries by accident.
 211                  */
 212                 NIR_PASS_V(nir, nir_split_var_copies);
 213                 NIR_PASS_V(nir, nir_split_per_member_structs);
 214
 215                 if (nir->info.stage == MESA_SHADER_FRAGMENT)
 216                         NIR_PASS_V(nir, nir_lower_io_to_vector, nir_var_shader_out);
 217                 if (nir->info.stage == MESA_SHADER_FRAGMENT)
 218                         NIR_PASS_V(nir, nir_lower_input_attachments,
 219                                    &(nir_input_attachment_options) {
 220                                         .use_fragcoord_sysval = true,
 221                                         .use_layer_id_sysval = false,
 222                                    });
 223
 224                 NIR_PASS_V(nir, nir_remove_dead_variables,
 225                            nir_var_shader_in | nir_var_shader_out | nir_var_system_value | nir_var_mem_shared,
 226                            NULL);
 227
 228                 NIR_PASS_V(nir, nir_propagate_invariant);
 229
 230                 NIR_PASS_V(nir, nir_lower_system_values);
 231                 NIR_PASS_V(nir, nir_lower_compute_system_values, NULL);
 232
 233                 NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
 234
 235                 // if (device->instance->debug_flags & RADV_DEBUG_DISCARD_TO_DEMOTE)
 236                 //      NIR_PASS_V(nir, nir_lower_discard_to_demote);
 237
 238                 nir_lower_doubles_options lower_doubles =
 239                         nir->options->lower_doubles_options;
 240             //TODO: if required enable following
 241                         //lower_doubles |= nir_lower_dfloor;
 242
 243
 244                 NIR_PASS_V(nir, nir_lower_doubles, NULL, lower_doubles);
 245
 246         /* Vulkan uses the separate-shader linking model */
 247         nir->info.separate_shader = true;
 248
 249         nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
 250
 251         if (nir->info.stage == MESA_SHADER_GEOMETRY)
 252                 nir_lower_gs_intrinsics(nir, true);
 253
 254         static const nir_lower_tex_options tex_options = {
 255           .lower_txp = ~0,
 256           .lower_tg4_offsets = true,
 257         };
 258
 259         nir_lower_tex(nir, &tex_options);
 260
 261         nir_lower_vars_to_ssa(nir);
 262
 263         if (nir->info.stage == MESA_SHADER_VERTEX ||
 264             nir->info.stage == MESA_SHADER_GEOMETRY ||
 265             nir->info.stage == MESA_SHADER_FRAGMENT) {
 266                 NIR_PASS_V(nir, nir_lower_io_to_temporaries,
 267                            nir_shader_get_entrypoint(nir), true, true);
 268         } else if (nir->info.stage == MESA_SHADER_TESS_EVAL) {
 269                 NIR_PASS_V(nir, nir_lower_io_to_temporaries,
 270                            nir_shader_get_entrypoint(nir), true, false);
 271         }
 272
 273         nir_split_var_copies(nir);
 274
 275         nir_lower_global_vars_to_local(nir);
 276         nir_remove_dead_variables(nir, nir_var_function_temp, NULL);
 277         // bool gfx7minus = device->physical_device->rad_info.chip_class <= GFX7;
 278         // nir_lower_subgroups(nir, &(struct nir_lower_subgroups_options) {
 279         //              .subgroup_size = subgroup_size,
 280         //              .ballot_bit_size = ballot_bit_size,
 281         //              .lower_to_scalar = 1,
 282         //              .lower_subgroup_masks = 1,
 283         //              .lower_shuffle = 1,
 284         //              .lower_shuffle_to_32bit = 1,
 285         //              .lower_vote_eq_to_ballot = 1,
 286         //              .lower_quad_broadcast_dynamic = 1,
 287         //              .lower_quad_broadcast_dynamic_to_const = gfx7minus,
 288         //              .lower_shuffle_to_swizzle_amd = 1,
 289         //      });
 290
 291         nir_lower_load_const_to_scalar(nir);
 292
 293         // if (!(flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT))
 294         //      radv_optimize_nir(nir, false, true);
 295
 296         /* call radv_nir_lower_ycbcr_textures() late as there might still be
 297          * tex with undef texture/sampler before first optimization */
 298         // NIR_PASS_V(nir, radv_nir_lower_ycbcr_textures, layout);
 299
 300         /* We call nir_lower_var_copies() after the first radv_optimize_nir()
 301          * to remove any copies introduced by nir_opt_find_array_copies().
 302          */
 303         nir_lower_var_copies(nir);
 304
 305         /* Lower deref operations for compute shared memory. */
 306         if (nir->info.stage == MESA_SHADER_COMPUTE) {
 307                 NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
 308                            nir_var_mem_shared, shared_var_info);
 309                 NIR_PASS_V(nir, nir_lower_explicit_io,
 310                            nir_var_mem_shared, nir_address_format_32bit_offset);
 311         }
 312
 313         /* Lower large variables that are always constant with load_constant
 314          * intrinsics, which get turned into PC-relative loads from a data
 315          * section next to the shader.
 316          */
 317         NIR_PASS_V(nir, nir_opt_large_constants,
 318                    glsl_get_natural_size_align_bytes, 16);
 319
 320         /* Indirect lowering must be called after the radv_optimize_nir() loop
 321          * has been called at least once. Otherwise indirect lowering can
 322          * bloat the instruction count of the loop and cause it to be
 323          * considered too large for unrolling.
 324          */
 325         // ac_lower_indirect_derefs(nir, device->physical_device->rad_info.chip_class);
 326         // radv_optimize_nir(nir, flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT, false);
 327
 328                 if (device->instance->debug_flags & LIBRESOC_DEBUG_DUMP_NIR)
 329                         nir_print_shader(nir, stderr);
 330         }
 331 return nir;
 332 }
 333
 334 VkResult
 335 libresoc_CreateShaderModule(VkDevice _device,
 336                 const VkShaderModuleCreateInfo *pCreateInfo,
 337                 const VkAllocationCallbacks *pAllocator,
 338                 VkShaderModule *pShaderModule)
 339 {
 340         LIBRESOC_FROM_HANDLE(libresoc_device, device, _device);
 341         struct libresoc_shader_module *module;
 342
 343         assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO);
 344         assert(pCreateInfo->flags == 0);
 345
 346         module = vk_alloc2(&device->vk.alloc, pAllocator,
 347                              sizeof(*module) + pCreateInfo->codeSize, 8,
 348                              VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
 349         if (module == NULL)
 350                 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
 351
 352         vk_object_base_init(&device->vk, &module->base,
 353                             VK_OBJECT_TYPE_SHADER_MODULE);
 354
 355         module->nir = NULL;
 356         module->size = pCreateInfo->codeSize;
 357         memcpy(module->data, pCreateInfo->pCode, module->size);
 358
 359         _mesa_sha1_compute(module->data, module->size, module->sha1);
 360
 361         *pShaderModule = libresoc_shader_module_to_handle(module);
 362
 363         return VK_SUCCESS;
 364 }
 365
 366 void
 367 libresoc_DestroyShaderModule(VkDevice _device,
 368                 VkShaderModule _module,
 369                 const VkAllocationCallbacks *pAllocator)
 370 {
 371         LIBRESOC_FROM_HANDLE(libresoc_device, device, _device);
 372         LIBRESOC_FROM_HANDLE(libresoc_shader_module, module, _module);
 373
 374         if (!module)
 375                 return;
 376
 377         vk_object_base_finish(&module->base);
 378         vk_free2(&device->vk.alloc, pAllocator, module);
 379 }