r600/sfn: rework getting a vector and uniforms from the value pool
[mesa.git] / src / amd / vulkan / radv_shader.c
index 70a51ee01d06614246176f70472f1113e6560d6f..7dfa89d65e9902a6659840b10fc2e0cf8b121051 100644 (file)
@@ -160,12 +160,15 @@ VkResult radv_CreateShaderModule(
        assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO);
        assert(pCreateInfo->flags == 0);
 
-       module = vk_alloc2(&device->alloc, pAllocator,
+       module = vk_alloc2(&device->vk.alloc, pAllocator,
                             sizeof(*module) + pCreateInfo->codeSize, 8,
                             VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
        if (module == NULL)
                return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
 
+       vk_object_base_init(&device->vk, &module->base,
+                           VK_OBJECT_TYPE_SHADER_MODULE);
+
        module->nir = NULL;
        module->size = pCreateInfo->codeSize;
        memcpy(module->data, pCreateInfo->pCode, module->size);
@@ -188,7 +191,8 @@ void radv_DestroyShaderModule(
        if (!module)
                return;
 
-       vk_free2(&device->alloc, pAllocator, module);
+       vk_object_base_finish(&module->base);
+       vk_free2(&device->vk.alloc, pAllocator, module);
 }
 
 void
@@ -320,19 +324,34 @@ radv_shader_compile_to_nir(struct radv_device *device,
                struct nir_spirv_specialization *spec_entries = NULL;
                if (spec_info && spec_info->mapEntryCount > 0) {
                        num_spec_entries = spec_info->mapEntryCount;
-                       spec_entries = malloc(num_spec_entries * sizeof(*spec_entries));
+                       spec_entries = calloc(num_spec_entries, sizeof(*spec_entries));
                        for (uint32_t i = 0; i < num_spec_entries; i++) {
                                VkSpecializationMapEntry entry = spec_info->pMapEntries[i];
                                const void *data = spec_info->pData + entry.offset;
                                assert(data + entry.size <= spec_info->pData + spec_info->dataSize);
 
                                spec_entries[i].id = spec_info->pMapEntries[i].constantID;
-                               if (spec_info->dataSize == 8)
-                                       spec_entries[i].data64 = *(const uint64_t *)data;
-                               else
-                                       spec_entries[i].data32 = *(const uint32_t *)data;
+                               switch (entry.size) {
+                               case 8:
+                                       spec_entries[i].value.u64 = *(const uint64_t *)data;
+                                       break;
+                               case 4:
+                                       spec_entries[i].value.u32 = *(const uint32_t *)data;
+                                       break;
+                               case 2:
+                                       spec_entries[i].value.u16 = *(const uint16_t *)data;
+                                       break;
+                               case 1:
+                                       spec_entries[i].value.u8 = *(const uint8_t *)data;
+                                       break;
+                               default:
+                                       assert(!"Invalid spec constant size");
+                                       break;
+                               }
                        }
                }
+               bool int8_int16_enable = !device->physical_device->use_aco ||
+                                        device->physical_device->rad_info.chip_class >= GFX8;
                const struct spirv_to_nir_options spirv_options = {
                        .lower_ubo_ssbo_access_to_offsets = true,
                        .caps = {
@@ -350,16 +369,17 @@ radv_shader_compile_to_nir(struct radv_device *device,
                                .device_group = true,
                                .draw_parameters = true,
                                .float_controls = true,
-                               .float16 = !device->physical_device->use_aco,
+                               .float16 = device->physical_device->rad_info.has_double_rate_fp16 && !device->physical_device->use_aco,
                                .float64 = true,
                                .geometry_streams = true,
                                .image_ms_array = true,
                                .image_read_without_format = true,
                                .image_write_without_format = true,
-                               .int8 = !device->physical_device->use_aco,
-                               .int16 = !device->physical_device->use_aco,
+                               .int8 = int8_int16_enable,
+                               .int16 = int8_int16_enable,
                                .int64 = true,
                                .int64_atomics = true,
+                               .min_lod = true,
                                .multiview = true,
                                .physical_storage_buffer_address = true,
                                .post_depth_coverage = true,
@@ -367,8 +387,8 @@ radv_shader_compile_to_nir(struct radv_device *device,
                                .shader_clock = true,
                                .shader_viewport_index_layer = true,
                                .stencil_export = true,
-                               .storage_8bit = !device->physical_device->use_aco,
-                               .storage_16bit = !device->physical_device->use_aco,
+                               .storage_8bit = int8_int16_enable,
+                               .storage_16bit = int8_int16_enable,
                                .storage_image_ms = true,
                                .subgroup_arithmetic = true,
                                .subgroup_ballot = true,
@@ -453,8 +473,7 @@ radv_shader_compile_to_nir(struct radv_device *device,
 
        nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
 
-       if (nir->info.stage == MESA_SHADER_GEOMETRY &&
-           device->physical_device->use_aco)
+       if (nir->info.stage == MESA_SHADER_GEOMETRY)
                nir_lower_gs_intrinsics(nir, true);
 
        static const nir_lower_tex_options tex_options = {
@@ -629,7 +648,7 @@ radv_alloc_shader_memory(struct radv_device *device,
                        }
                        offset = align_u64(s->bo_offset + s->code_size, 256);
                }
-               if (slab->size - offset >= shader->code_size) {
+               if (offset <= slab->size && slab->size - offset >= shader->code_size) {
                        shader->bo = slab->bo;
                        shader->bo_offset = offset;
                        list_addtail(&shader->slab_list, &slab->shaders);
@@ -641,7 +660,7 @@ radv_alloc_shader_memory(struct radv_device *device,
        mtx_unlock(&device->shader_slab_mutex);
        struct radv_shader_slab *slab = calloc(1, sizeof(struct radv_shader_slab));
 
-       slab->size = 256 * 1024;
+       slab->size = MAX2(256 * 1024, shader->code_size);
        slab->bo = device->ws->buffer_create(device->ws, slab->size, 256,
                                             RADEON_DOMAIN_VRAM,
                                             RADEON_FLAG_NO_INTERPROCESS_SHARING |
@@ -799,8 +818,8 @@ static void radv_postprocess_config(const struct radv_physical_device *pdevice,
                        }
 
                        config_out->rsrc1 |= S_00B128_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10);
-                       config_out->rsrc2 |= S_00B12C_SHARED_VGPR_CNT(num_shared_vgpr_blocks);
                }
+               config_out->rsrc2 |= S_00B12C_SHARED_VGPR_CNT(num_shared_vgpr_blocks);
                break;
        case MESA_SHADER_FRAGMENT:
                config_out->rsrc1 |= S_00B028_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10);
@@ -1026,15 +1045,20 @@ radv_shader_variant_create(struct radv_device *device,
                ac_rtld_close(&rtld_binary);
        } else {
                struct radv_shader_binary_legacy* bin = (struct radv_shader_binary_legacy *)binary;
-               memcpy(dest_ptr, bin->data, bin->code_size);
+               memcpy(dest_ptr, bin->data + bin->stats_size, bin->code_size);
 
                /* Add end-of-code markers for the UMR disassembler. */
                uint32_t *ptr32 = (uint32_t *)dest_ptr + bin->code_size / 4;
                for (unsigned i = 0; i < DEBUGGER_NUM_MARKERS; i++)
                        ptr32[i] = DEBUGGER_END_OF_CODE_MARKER;
 
-               variant->ir_string = bin->ir_size ? strdup((const char*)(bin->data + bin->code_size)) : NULL;
-               variant->disasm_string = bin->disasm_size ? strdup((const char*)(bin->data + bin->code_size + bin->ir_size)) : NULL;
+               variant->ir_string = bin->ir_size ? strdup((const char*)(bin->data + bin->stats_size + bin->code_size)) : NULL;
+               variant->disasm_string = bin->disasm_size ? strdup((const char*)(bin->data + bin->stats_size + bin->code_size + bin->ir_size)) : NULL;
+
+               if (bin->stats_size) {
+                       variant->statistics = calloc(bin->stats_size, 1);
+                       memcpy(variant->statistics, bin->data, bin->stats_size);
+               }
        }
        return variant;
 }
@@ -1072,6 +1096,7 @@ shader_variant_compile(struct radv_device *device,
                       struct radv_nir_compiler_options *options,
                       bool gs_copy_shader,
                       bool keep_shader_info,
+                      bool keep_statistic_info,
                       struct radv_shader_binary **binary_out)
 {
        enum radeon_family chip_family = device->physical_device->rad_info.family;
@@ -1083,6 +1108,7 @@ shader_variant_compile(struct radv_device *device,
        options->dump_preoptir = options->dump_shader &&
                                 device->instance->debug_flags & RADV_DEBUG_PREOPTIR;
        options->record_ir = keep_shader_info;
+       options->record_stats = keep_statistic_info;
        options->check_ir = device->instance->debug_flags & RADV_DEBUG_CHECKIR;
        options->tess_offchip_block_dw_size = device->tess_offchip_block_dw_size;
        options->address32_hi = device->physical_device->rad_info.address32_hi;
@@ -1120,7 +1146,11 @@ shader_variant_compile(struct radv_device *device,
        }
 
        if (options->dump_shader) {
-               fprintf(stderr, "disasm:\n%s\n", variant->disasm_string);
+               fprintf(stderr, "%s", radv_get_shader_name(info, shaders[0]->info.stage));
+               for (int i = 1; i < shader_count; ++i)
+                       fprintf(stderr, " + %s", radv_get_shader_name(info, shaders[i]->info.stage));
+
+               fprintf(stderr, "\ndisasm:\n%s\n", variant->disasm_string);
        }
 
 
@@ -1155,7 +1185,7 @@ radv_shader_variant_compile(struct radv_device *device,
                           struct radv_pipeline_layout *layout,
                           const struct radv_shader_variant_key *key,
                           struct radv_shader_info *info,
-                          bool keep_shader_info,
+                          bool keep_shader_info, bool keep_statistic_info,
                           struct radv_shader_binary **binary_out)
 {
        struct radv_nir_compiler_options options = {0};
@@ -1168,7 +1198,7 @@ radv_shader_variant_compile(struct radv_device *device,
        options.robust_buffer_access = device->robust_buffer_access;
 
        return shader_variant_compile(device, module, shaders, shader_count, shaders[shader_count - 1]->info.stage, info,
-                                    &options, false, keep_shader_info, binary_out);
+                                    &options, false, keep_shader_info, keep_statistic_info, binary_out);
 }
 
 struct radv_shader_variant *
@@ -1176,7 +1206,7 @@ radv_create_gs_copy_shader(struct radv_device *device,
                           struct nir_shader *shader,
                           struct radv_shader_info *info,
                           struct radv_shader_binary **binary_out,
-                          bool keep_shader_info,
+                          bool keep_shader_info, bool keep_statistic_info,
                           bool multiview)
 {
        struct radv_nir_compiler_options options = {0};
@@ -1185,7 +1215,7 @@ radv_create_gs_copy_shader(struct radv_device *device,
        options.key.has_multiview_view_index = multiview;
 
        return shader_variant_compile(device, NULL, &shader, 1, MESA_SHADER_VERTEX,
-                                     info, &options, true, keep_shader_info, binary_out);
+                                     info, &options, true, keep_shader_info, keep_statistic_info, binary_out);
 }
 
 void
@@ -1203,6 +1233,7 @@ radv_shader_variant_destroy(struct radv_device *device,
        free(variant->nir_string);
        free(variant->disasm_string);
        free(variant->ir_string);
+       free(variant->statistics);
        free(variant);
 }
 
@@ -1332,13 +1363,23 @@ generate_shader_stats(struct radv_device *device,
                                   "Code Size: %d bytes\n"
                                   "LDS: %d blocks\n"
                                   "Scratch: %d bytes per wave\n"
-                                  "Max Waves: %d\n"
-                                  "********************\n\n\n",
+                                  "Max Waves: %d\n",
                                   conf->num_sgprs, conf->num_vgprs,
                                   conf->spilled_sgprs, conf->spilled_vgprs,
                                   variant->info.private_mem_vgprs, variant->exec_size,
                                   conf->lds_size, conf->scratch_bytes_per_wave,
                                   max_simd_waves);
+
+       if (variant->statistics) {
+               _mesa_string_buffer_printf(buf, "*** COMPILER STATS ***\n");
+               for (unsigned i = 0; i < variant->statistics->count; i++) {
+                       struct radv_compiler_statistic_info *info = &variant->statistics->infos[i];
+                       uint32_t value = variant->statistics->values[i];
+                       _mesa_string_buffer_printf(buf, "%s: %lu\n", info->name, value);
+               }
+       }
+
+       _mesa_string_buffer_printf(buf, "********************\n\n\n");
 }
 
 void