X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fintel%2Fvulkan%2Fanv_pipeline.c;h=88bc58f3771ce0b5feb591411f5a02e8109918e9;hb=8b8eaa84a3e80d1df1c2467dc31432824cffd610;hp=7c324d54408c92e3a15988940ae8554cdbdf8a81;hpb=925df46b7e00ee7ca4128ef2cc2f3fad68ce8d4e;p=mesa.git diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 7c324d54408..88bc58f3771 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -55,12 +55,14 @@ VkResult anv_CreateShaderModule( assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO); assert(pCreateInfo->flags == 0); - module = vk_alloc2(&device->alloc, pAllocator, + module = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*module) + pCreateInfo->codeSize, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (module == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + vk_object_base_init(&device->vk, &module->base, + VK_OBJECT_TYPE_SHADER_MODULE); module->size = pCreateInfo->codeSize; memcpy(module->data, pCreateInfo->pCode, module->size); @@ -82,7 +84,8 @@ void anv_DestroyShaderModule( if (!module) return; - vk_free2(&device->alloc, pAllocator, module); + vk_object_base_finish(&module->base); + vk_free2(&device->vk.alloc, pAllocator, module); } #define SPIR_V_MAGIC_NUMBER 0x07230203 @@ -140,17 +143,30 @@ anv_shader_compile_to_nir(struct anv_device *device, struct nir_spirv_specialization *spec_entries = NULL; if (spec_info && spec_info->mapEntryCount > 0) { num_spec_entries = spec_info->mapEntryCount; - spec_entries = malloc(num_spec_entries * sizeof(*spec_entries)); + spec_entries = calloc(num_spec_entries, sizeof(*spec_entries)); for (uint32_t i = 0; i < num_spec_entries; i++) { VkSpecializationMapEntry entry = spec_info->pMapEntries[i]; const void *data = spec_info->pData + entry.offset; assert(data + entry.size <= spec_info->pData + spec_info->dataSize); spec_entries[i].id = spec_info->pMapEntries[i].constantID; - if (spec_info->dataSize == 8) - spec_entries[i].data64 = *(const uint64_t *)data; - else - spec_entries[i].data32 = *(const uint32_t *)data; + switch (entry.size) { + case 8: + spec_entries[i].value.u64 = *(const uint64_t *)data; + break; + case 4: + spec_entries[i].value.u32 = *(const uint32_t *)data; + break; + case 2: + spec_entries[i].value.u16 = *(const uint16_t *)data; + break; + case 1: + spec_entries[i].value.u8 = *(const uint8_t *)data; + break; + default: + assert(!"Invalid spec constant size"); + break; + } } } @@ -294,7 +310,7 @@ void anv_DestroyPipeline( return; anv_reloc_list_finish(&pipeline->batch_relocs, - pAllocator ? pAllocator : &device->alloc); + pAllocator ? pAllocator : &device->vk.alloc); ralloc_free(pipeline->mem_ctx); @@ -327,7 +343,8 @@ void anv_DestroyPipeline( unreachable("invalid pipeline type"); } - vk_free2(&device->alloc, pAllocator, pipeline); + vk_object_base_finish(&pipeline->base); + vk_free2(&device->vk.alloc, pAllocator, pipeline); } static const uint32_t vk_to_gen_primitive_type[] = { @@ -687,12 +704,12 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline, if (pipeline->type == ANV_PIPELINE_GRAPHICS) { NIR_PASS_V(nir, anv_nir_lower_multiview, - anv_pipeline_to_graphics(pipeline)->subpass->view_mask); + anv_pipeline_to_graphics(pipeline)); } nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); - NIR_PASS_V(nir, brw_nir_lower_image_load_store, compiler->devinfo); + NIR_PASS_V(nir, brw_nir_lower_image_load_store, compiler->devinfo, NULL); NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_global, nir_address_format_64bit_global); @@ -735,16 +752,23 @@ anv_pipeline_link_vs(const struct brw_compiler *compiler, static void anv_pipeline_compile_vs(const struct brw_compiler *compiler, void *mem_ctx, - struct anv_device *device, + struct anv_graphics_pipeline *pipeline, struct anv_pipeline_stage *vs_stage) { + /* When using Primitive Replication for multiview, each view gets its own + * position slot. + */ + uint32_t pos_slots = pipeline->use_primitive_replication ? + anv_subpass_view_count(pipeline->subpass) : 1; + brw_compute_vue_map(compiler->devinfo, &vs_stage->prog_data.vs.base.vue_map, vs_stage->nir->info.outputs_written, - vs_stage->nir->info.separate_shader); + vs_stage->nir->info.separate_shader, + pos_slots); vs_stage->num_stats = 1; - vs_stage->code = brw_compile_vs(compiler, device, mem_ctx, + vs_stage->code = brw_compile_vs(compiler, pipeline->base.device, mem_ctx, &vs_stage->key.vs, &vs_stage->prog_data.vs, vs_stage->nir, -1, @@ -887,7 +911,7 @@ anv_pipeline_compile_gs(const struct brw_compiler *compiler, brw_compute_vue_map(compiler->devinfo, &gs_stage->prog_data.gs.base.vue_map, gs_stage->nir->info.outputs_written, - gs_stage->nir->info.separate_shader); + gs_stage->nir->info.separate_shader, 1); gs_stage->num_stats = 1; gs_stage->code = brw_compile_gs(compiler, device, mem_ctx, @@ -1058,6 +1082,56 @@ anv_pipeline_add_executable(struct anv_pipeline *pipeline, size_t stream_size = 0; FILE *stream = open_memstream(&stream_data, &stream_size); + uint32_t push_size = 0; + for (unsigned i = 0; i < 4; i++) + push_size += stage->bind_map.push_ranges[i].length; + if (push_size > 0) { + fprintf(stream, "Push constant ranges:\n"); + for (unsigned i = 0; i < 4; i++) { + if (stage->bind_map.push_ranges[i].length == 0) + continue; + + fprintf(stream, " RANGE%d (%dB): ", i, + stage->bind_map.push_ranges[i].length * 32); + + switch (stage->bind_map.push_ranges[i].set) { + case ANV_DESCRIPTOR_SET_NULL: + fprintf(stream, "NULL"); + break; + + case ANV_DESCRIPTOR_SET_PUSH_CONSTANTS: + fprintf(stream, "Vulkan push constants and API params"); + break; + + case ANV_DESCRIPTOR_SET_DESCRIPTORS: + fprintf(stream, "Descriptor buffer for set %d (start=%dB)", + stage->bind_map.push_ranges[i].index, + stage->bind_map.push_ranges[i].start * 32); + break; + + case ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS: + unreachable("gl_NumWorkgroups is never pushed"); + + case ANV_DESCRIPTOR_SET_SHADER_CONSTANTS: + fprintf(stream, "Inline shader constant data (start=%dB)", + stage->bind_map.push_ranges[i].start * 32); + break; + + case ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS: + unreachable("Color attachments can't be pushed"); + + default: + fprintf(stream, "UBO (set=%d binding=%d start=%dB)", + stage->bind_map.push_ranges[i].set, + stage->bind_map.push_ranges[i].index, + stage->bind_map.push_ranges[i].start * 32); + break; + } + fprintf(stream, "\n"); + } + fprintf(stream, "\n"); + } + /* Creating this is far cheaper than it looks. It's perfectly fine to * do it for every binary. */ @@ -1117,6 +1191,27 @@ anv_pipeline_add_executables(struct anv_pipeline *pipeline, } } +static void +anv_pipeline_init_from_cached_graphics(struct anv_graphics_pipeline *pipeline) +{ + /* TODO: Cache this pipeline-wide information. */ + + /* Primitive replication depends on information from all the shaders. + * Recover this bit from the fact that we have more than one position slot + * in the vertex shader when using it. + */ + assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT); + int pos_slots = 0; + const struct brw_vue_prog_data *vue_prog_data = + (const void *) pipeline->shaders[MESA_SHADER_VERTEX]->prog_data; + const struct brw_vue_map *vue_map = &vue_prog_data->vue_map; + for (int i = 0; i < vue_map->num_slots; i++) { + if (vue_map->slot_to_varying[i] == VARYING_SLOT_POS) + pos_slots++; + } + pipeline->use_primitive_replication = pos_slots > 1; +} + static VkResult anv_pipeline_compile_graphics(struct anv_graphics_pipeline *pipeline, struct anv_pipeline_cache *cache, @@ -1245,6 +1340,7 @@ anv_pipeline_compile_graphics(struct anv_graphics_pipeline *pipeline, anv_pipeline_add_executables(&pipeline->base, &stages[s], pipeline->shaders[s]); } + anv_pipeline_init_from_cached_graphics(pipeline); goto done; } else if (found > 0) { /* We found some but not all of our shaders. This shouldn't happen @@ -1333,6 +1429,23 @@ anv_pipeline_compile_graphics(struct anv_graphics_pipeline *pipeline, next_stage = &stages[s]; } + if (pipeline->base.device->info.gen >= 12 && + pipeline->subpass->view_mask != 0) { + /* For some pipelines HW Primitive Replication can be used instead of + * instancing to implement Multiview. This depend on how viewIndex is + * used in all the active shaders, so this check can't be done per + * individual shaders. + */ + nir_shader *shaders[MESA_SHADER_STAGES] = {}; + for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) + shaders[s] = stages[s].nir; + + pipeline->use_primitive_replication = + anv_check_for_primitive_replication(shaders, pipeline); + } else { + pipeline->use_primitive_replication = false; + } + struct anv_pipeline_stage *prev_stage = NULL; for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) { if (!stages[s].entrypoint) @@ -1352,7 +1465,7 @@ anv_pipeline_compile_graphics(struct anv_graphics_pipeline *pipeline, switch (s) { case MESA_SHADER_VERTEX: - anv_pipeline_compile_vs(compiler, stage_ctx, pipeline->base.device, + anv_pipeline_compile_vs(compiler, stage_ctx, pipeline, &stages[s]); break; case MESA_SHADER_TESS_CTRL: @@ -1551,6 +1664,7 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline, nir_var_mem_shared, shared_type_info); NIR_PASS_V(stage.nir, nir_lower_explicit_io, nir_var_mem_shared, nir_address_format_32bit_offset); + NIR_PASS_V(stage.nir, brw_nir_lower_cs_intrinsics); stage.num_stats = 1; stage.code = brw_compile_cs(compiler, pipeline->base.device, mem_ctx, @@ -1614,6 +1728,23 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline, return VK_SUCCESS; } +uint32_t +anv_cs_workgroup_size(const struct anv_compute_pipeline *pipeline) +{ + const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline); + return cs_prog_data->local_size[0] * + cs_prog_data->local_size[1] * + cs_prog_data->local_size[2]; +} + +uint32_t +anv_cs_threads(const struct anv_compute_pipeline *pipeline) +{ + const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline); + return DIV_ROUND_UP(anv_cs_workgroup_size(pipeline), + cs_prog_data->simd_size); +} + /** * Copy pipeline state not marked as dynamic. * Dynamic state is pipeline state which hasn't been provided at pipeline @@ -1852,8 +1983,10 @@ anv_pipeline_init(struct anv_graphics_pipeline *pipeline, anv_pipeline_validate_create_info(pCreateInfo); if (alloc == NULL) - alloc = &device->alloc; + alloc = &device->vk.alloc; + vk_object_base_init(&device->vk, &pipeline->base.base, + VK_OBJECT_TYPE_PIPELINE); pipeline->base.device = device; pipeline->base.type = ANV_PIPELINE_GRAPHICS; @@ -1866,8 +1999,8 @@ anv_pipeline_init(struct anv_graphics_pipeline *pipeline, return result; pipeline->base.batch.alloc = alloc; - pipeline->base.batch.next = pipeline->base.batch.start = pipeline->base.batch_data; - pipeline->base.batch.end = pipeline->base.batch.start + sizeof(pipeline->base.batch_data); + pipeline->base.batch.next = pipeline->base.batch.start = pipeline->batch_data; + pipeline->base.batch.end = pipeline->base.batch.start + sizeof(pipeline->batch_data); pipeline->base.batch.relocs = &pipeline->base.batch_relocs; pipeline->base.batch.status = VK_SUCCESS; @@ -1965,7 +2098,7 @@ anv_pipeline_init(struct anv_graphics_pipeline *pipeline, * the instance divisor by the number of views ensure that we repeat the * client's per-instance data once for each view. */ - if (pipeline->subpass->view_mask) { + if (pipeline->subpass->view_mask && !pipeline->use_primitive_replication) { const uint32_t view_count = anv_subpass_view_count(pipeline->subpass); for (uint32_t vb = 0; vb < MAX_VBS; vb++) { if (pipeline->vb[vb].instanced) @@ -2075,6 +2208,16 @@ VkResult anv_GetPipelineExecutableStatisticsKHR( stat->value.u64 = exe->stats.instructions; } + vk_outarray_append(&out, stat) { + WRITE_STR(stat->name, "SEND Count"); + WRITE_STR(stat->description, + "Number of instructions in the final generated shader " + "executable which access external units such as the " + "constant cache or the sampler."); + stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; + stat->value.u64 = exe->stats.sends; + } + vk_outarray_append(&out, stat) { WRITE_STR(stat->name, "Loop Count"); WRITE_STR(stat->description, @@ -2134,7 +2277,7 @@ VkResult anv_GetPipelineExecutableStatisticsKHR( "Number of bytes of workgroup shared memory used by this " "compute shader including any padding."); stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; - stat->value.u64 = prog_data->total_scratch; + stat->value.u64 = brw_cs_prog_data_const(prog_data)->slm_size; } }