assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO);
assert(pCreateInfo->flags == 0);
- module = vk_alloc2(&device->alloc, pAllocator,
+ module = vk_alloc2(&device->vk.alloc, pAllocator,
sizeof(*module) + pCreateInfo->codeSize, 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (module == NULL)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ vk_object_base_init(&device->vk, &module->base,
+ VK_OBJECT_TYPE_SHADER_MODULE);
module->size = pCreateInfo->codeSize;
memcpy(module->data, pCreateInfo->pCode, module->size);
if (!module)
return;
- vk_free2(&device->alloc, pAllocator, module);
+ vk_object_base_finish(&module->base);
+ vk_free2(&device->vk.alloc, pAllocator, module);
}
#define SPIR_V_MAGIC_NUMBER 0x07230203
-static const uint64_t stage_to_debug[] = {
- [MESA_SHADER_VERTEX] = DEBUG_VS,
- [MESA_SHADER_TESS_CTRL] = DEBUG_TCS,
- [MESA_SHADER_TESS_EVAL] = DEBUG_TES,
- [MESA_SHADER_GEOMETRY] = DEBUG_GS,
- [MESA_SHADER_FRAGMENT] = DEBUG_WM,
- [MESA_SHADER_COMPUTE] = DEBUG_CS,
-};
-
struct anv_spirv_debug_data {
struct anv_device *device;
const struct anv_shader_module *module;
const char *message)
{
struct anv_spirv_debug_data *debug_data = private_data;
+ struct anv_instance *instance = debug_data->device->physical->instance;
+
static const VkDebugReportFlagsEXT vk_flags[] = {
[NIR_SPIRV_DEBUG_LEVEL_INFO] = VK_DEBUG_REPORT_INFORMATION_BIT_EXT,
[NIR_SPIRV_DEBUG_LEVEL_WARNING] = VK_DEBUG_REPORT_WARNING_BIT_EXT,
snprintf(buffer, sizeof(buffer), "SPIR-V offset %lu: %s", (unsigned long) spirv_offset, message);
- vk_debug_report(&debug_data->device->instance->debug_report_callbacks,
+ vk_debug_report(&instance->debug_report_callbacks,
vk_flags[level],
VK_DEBUG_REPORT_OBJECT_TYPE_SHADER_MODULE_EXT,
(uint64_t) (uintptr_t) debug_data->module,
gl_shader_stage stage,
const VkSpecializationInfo *spec_info)
{
- const struct anv_physical_device *pdevice =
- &device->instance->physicalDevice;
+ const struct anv_physical_device *pdevice = device->physical;
const struct brw_compiler *compiler = pdevice->compiler;
const nir_shader_compiler_options *nir_options =
compiler->glsl_compiler_options[stage].NirOptions;
struct nir_spirv_specialization *spec_entries = NULL;
if (spec_info && spec_info->mapEntryCount > 0) {
num_spec_entries = spec_info->mapEntryCount;
- spec_entries = malloc(num_spec_entries * sizeof(*spec_entries));
+ spec_entries = calloc(num_spec_entries, sizeof(*spec_entries));
for (uint32_t i = 0; i < num_spec_entries; i++) {
VkSpecializationMapEntry entry = spec_info->pMapEntries[i];
const void *data = spec_info->pData + entry.offset;
assert(data + entry.size <= spec_info->pData + spec_info->dataSize);
spec_entries[i].id = spec_info->pMapEntries[i].constantID;
- if (spec_info->dataSize == 8)
- spec_entries[i].data64 = *(const uint64_t *)data;
- else
- spec_entries[i].data32 = *(const uint32_t *)data;
+ switch (entry.size) {
+ case 8:
+ spec_entries[i].value.u64 = *(const uint64_t *)data;
+ break;
+ case 4:
+ spec_entries[i].value.u32 = *(const uint32_t *)data;
+ break;
+ case 2:
+ spec_entries[i].value.u16 = *(const uint16_t *)data;
+ break;
+ case 1:
+ spec_entries[i].value.u8 = *(const uint8_t *)data;
+ break;
+ default:
+ assert(!"Invalid spec constant size");
+ break;
+ }
}
}
};
struct spirv_to_nir_options spirv_options = {
.frag_coord_is_sysval = true,
- .use_scoped_memory_barrier = true,
.caps = {
.demote_to_helper_invocation = true,
.derivative_group = true,
.int16 = pdevice->info.gen >= 8,
.int64 = pdevice->info.gen >= 8,
.int64_atomics = pdevice->info.gen >= 9 && pdevice->use_softpin,
+ .integer_functions2 = pdevice->info.gen >= 8,
.min_lod = true,
.multiview = true,
.physical_storage_buffer_address = pdevice->has_a64_buffer_access,
free(spec_entries);
- if (unlikely(INTEL_DEBUG & stage_to_debug[stage])) {
+ if (unlikely(INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage))) {
fprintf(stderr, "NIR (from SPIR-V) for %s shader:\n",
gl_shader_stage_name(stage));
nir_print_shader(nir, stderr);
* inline functions. That way they get properly initialized at the top
* of the function and not at the top of its caller.
*/
- NIR_PASS_V(nir, nir_lower_constant_initializers, nir_var_function_temp);
+ NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
NIR_PASS_V(nir, nir_lower_returns);
NIR_PASS_V(nir, nir_inline_functions);
NIR_PASS_V(nir, nir_opt_deref);
* nir_remove_dead_variables and split_per_member_structs below see the
* corresponding stores.
*/
- NIR_PASS_V(nir, nir_lower_constant_initializers, ~0);
+ NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);
/* Split member structs. We do this before lower_io_to_temporaries so that
* it doesn't lower system values to temporaries by accident.
NIR_PASS_V(nir, nir_split_per_member_structs);
NIR_PASS_V(nir, nir_remove_dead_variables,
- nir_var_shader_in | nir_var_shader_out | nir_var_system_value);
+ nir_var_shader_in | nir_var_shader_out | nir_var_system_value,
+ NULL);
NIR_PASS_V(nir, nir_propagate_invariant);
NIR_PASS_V(nir, nir_lower_io_to_temporaries,
return;
anv_reloc_list_finish(&pipeline->batch_relocs,
- pAllocator ? pAllocator : &device->alloc);
+ pAllocator ? pAllocator : &device->vk.alloc);
ralloc_free(pipeline->mem_ctx);
- if (pipeline->blend_state.map)
- anv_state_pool_free(&device->dynamic_state_pool, pipeline->blend_state);
+ switch (pipeline->type) {
+ case ANV_PIPELINE_GRAPHICS: {
+ struct anv_graphics_pipeline *gfx_pipeline =
+ anv_pipeline_to_graphics(pipeline);
- for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
- if (pipeline->shaders[s])
- anv_shader_bin_unref(device, pipeline->shaders[s]);
+ if (gfx_pipeline->blend_state.map)
+ anv_state_pool_free(&device->dynamic_state_pool, gfx_pipeline->blend_state);
+
+ for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
+ if (gfx_pipeline->shaders[s])
+ anv_shader_bin_unref(device, gfx_pipeline->shaders[s]);
+ }
+ break;
+ }
+
+ case ANV_PIPELINE_COMPUTE: {
+ struct anv_compute_pipeline *compute_pipeline =
+ anv_pipeline_to_compute(pipeline);
+
+ if (compute_pipeline->cs)
+ anv_shader_bin_unref(device, compute_pipeline->cs);
+
+ break;
+ }
+
+ default:
+ unreachable("invalid pipeline type");
}
- vk_free2(&device->alloc, pAllocator, pipeline);
+ vk_object_base_finish(&pipeline->base);
+ vk_free2(&device->vk.alloc, pAllocator, pipeline);
}
static const uint32_t vk_to_gen_primitive_type[] = {
}
static void
-anv_pipeline_hash_graphics(struct anv_pipeline *pipeline,
+anv_pipeline_hash_graphics(struct anv_graphics_pipeline *pipeline,
struct anv_pipeline_layout *layout,
struct anv_pipeline_stage *stages,
unsigned char *sha1_out)
if (layout)
_mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
- const bool rba = pipeline->device->robust_buffer_access;
+ const bool rba = pipeline->base.device->robust_buffer_access;
_mesa_sha1_update(&ctx, &rba, sizeof(rba));
for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
}
static void
-anv_pipeline_hash_compute(struct anv_pipeline *pipeline,
+anv_pipeline_hash_compute(struct anv_compute_pipeline *pipeline,
struct anv_pipeline_layout *layout,
struct anv_pipeline_stage *stage,
unsigned char *sha1_out)
if (layout)
_mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
- const bool rba = pipeline->device->robust_buffer_access;
+ const bool rba = pipeline->base.device->robust_buffer_access;
_mesa_sha1_update(&ctx, &rba, sizeof(rba));
_mesa_sha1_update(&ctx, stage->shader_sha1,
struct anv_pipeline_stage *stage)
{
const struct brw_compiler *compiler =
- pipeline->device->instance->physicalDevice.compiler;
+ pipeline->device->physical->compiler;
const nir_shader_compiler_options *nir_options =
compiler->glsl_compiler_options[stage->stage].NirOptions;
nir_shader *nir;
struct anv_pipeline_stage *stage,
struct anv_pipeline_layout *layout)
{
- const struct anv_physical_device *pdevice =
- &pipeline->device->instance->physicalDevice;
+ const struct anv_physical_device *pdevice = pipeline->device->physical;
const struct brw_compiler *compiler = pdevice->compiler;
struct brw_stage_prog_data *prog_data = &stage->prog_data.base;
nir_shader *nir = stage->nir;
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
- NIR_PASS_V(nir, nir_lower_wpos_center, pipeline->sample_shading_enable);
+ NIR_PASS_V(nir, nir_lower_wpos_center,
+ anv_pipeline_to_graphics(pipeline)->sample_shading_enable);
NIR_PASS_V(nir, nir_lower_input_attachments, true);
}
NIR_PASS_V(nir, anv_nir_lower_ycbcr_textures, layout);
- if (nir->info.stage != MESA_SHADER_COMPUTE)
- NIR_PASS_V(nir, anv_nir_lower_multiview, pipeline->subpass->view_mask);
+ if (pipeline->type == ANV_PIPELINE_GRAPHICS) {
+ NIR_PASS_V(nir, anv_nir_lower_multiview,
+ anv_pipeline_to_graphics(pipeline));
+ }
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
- if (nir->info.num_ssbos > 0 || nir->info.num_images > 0)
- pipeline->needs_data_cache = true;
-
- NIR_PASS_V(nir, brw_nir_lower_image_load_store, compiler->devinfo);
+ NIR_PASS_V(nir, brw_nir_lower_image_load_store, compiler->devinfo, NULL);
NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_global,
nir_address_format_64bit_global);
nir_lower_non_uniform_texture_access |
nir_lower_non_uniform_image_access);
- anv_nir_compute_push_layout(pdevice, nir, prog_data,
- &stage->bind_map, mem_ctx);
+ anv_nir_compute_push_layout(pdevice, pipeline->device->robust_buffer_access,
+ nir, prog_data, &stage->bind_map, mem_ctx);
stage->nir = nir;
}
static void
anv_pipeline_compile_vs(const struct brw_compiler *compiler,
void *mem_ctx,
- struct anv_device *device,
+ struct anv_graphics_pipeline *pipeline,
struct anv_pipeline_stage *vs_stage)
{
+ /* When using Primitive Replication for multiview, each view gets its own
+ * position slot.
+ */
+ uint32_t pos_slots = pipeline->use_primitive_replication ?
+ anv_subpass_view_count(pipeline->subpass) : 1;
+
brw_compute_vue_map(compiler->devinfo,
&vs_stage->prog_data.vs.base.vue_map,
vs_stage->nir->info.outputs_written,
- vs_stage->nir->info.separate_shader);
+ vs_stage->nir->info.separate_shader,
+ pos_slots);
vs_stage->num_stats = 1;
- vs_stage->code = brw_compile_vs(compiler, device, mem_ctx,
+ vs_stage->code = brw_compile_vs(compiler, pipeline->base.device, mem_ctx,
&vs_stage->key.vs,
&vs_stage->prog_data.vs,
vs_stage->nir, -1,
brw_compute_vue_map(compiler->devinfo,
&gs_stage->prog_data.gs.base.vue_map,
gs_stage->nir->info.outputs_written,
- gs_stage->nir->info.separate_shader);
+ gs_stage->nir->info.separate_shader, 1);
gs_stage->num_stats = 1;
gs_stage->code = brw_compile_gs(compiler, device, mem_ctx,
size_t stream_size = 0;
FILE *stream = open_memstream(&stream_data, &stream_size);
+ uint32_t push_size = 0;
+ for (unsigned i = 0; i < 4; i++)
+ push_size += stage->bind_map.push_ranges[i].length;
+ if (push_size > 0) {
+ fprintf(stream, "Push constant ranges:\n");
+ for (unsigned i = 0; i < 4; i++) {
+ if (stage->bind_map.push_ranges[i].length == 0)
+ continue;
+
+ fprintf(stream, " RANGE%d (%dB): ", i,
+ stage->bind_map.push_ranges[i].length * 32);
+
+ switch (stage->bind_map.push_ranges[i].set) {
+ case ANV_DESCRIPTOR_SET_NULL:
+ fprintf(stream, "NULL");
+ break;
+
+ case ANV_DESCRIPTOR_SET_PUSH_CONSTANTS:
+ fprintf(stream, "Vulkan push constants and API params");
+ break;
+
+ case ANV_DESCRIPTOR_SET_DESCRIPTORS:
+ fprintf(stream, "Descriptor buffer for set %d (start=%dB)",
+ stage->bind_map.push_ranges[i].index,
+ stage->bind_map.push_ranges[i].start * 32);
+ break;
+
+ case ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS:
+ unreachable("gl_NumWorkgroups is never pushed");
+
+ case ANV_DESCRIPTOR_SET_SHADER_CONSTANTS:
+ fprintf(stream, "Inline shader constant data (start=%dB)",
+ stage->bind_map.push_ranges[i].start * 32);
+ break;
+
+ case ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS:
+ unreachable("Color attachments can't be pushed");
+
+ default:
+ fprintf(stream, "UBO (set=%d binding=%d start=%dB)",
+ stage->bind_map.push_ranges[i].set,
+ stage->bind_map.push_ranges[i].index,
+ stage->bind_map.push_ranges[i].start * 32);
+ break;
+ }
+ fprintf(stream, "\n");
+ }
+ fprintf(stream, "\n");
+ }
+
/* Creating this is far cheaper than it looks. It's perfectly fine to
* do it for every binary.
*/
free(stream_data);
}
- pipeline->executables[pipeline->num_executables++] =
- (struct anv_pipeline_executable) {
- .stage = stage->stage,
- .stats = *stats,
- .nir = nir,
- .disasm = disasm,
- };
+ const struct anv_pipeline_executable exe = {
+ .stage = stage->stage,
+ .stats = *stats,
+ .nir = nir,
+ .disasm = disasm,
+ };
+ util_dynarray_append(&pipeline->executables,
+ struct anv_pipeline_executable, exe);
}
static void
}
}
+static void
+anv_pipeline_init_from_cached_graphics(struct anv_graphics_pipeline *pipeline)
+{
+ /* TODO: Cache this pipeline-wide information. */
+
+ /* Primitive replication depends on information from all the shaders.
+ * Recover this bit from the fact that we have more than one position slot
+ * in the vertex shader when using it.
+ */
+ assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT);
+ int pos_slots = 0;
+ const struct brw_vue_prog_data *vue_prog_data =
+ (const void *) pipeline->shaders[MESA_SHADER_VERTEX]->prog_data;
+ const struct brw_vue_map *vue_map = &vue_prog_data->vue_map;
+ for (int i = 0; i < vue_map->num_slots; i++) {
+ if (vue_map->slot_to_varying[i] == VARYING_SLOT_POS)
+ pos_slots++;
+ }
+ pipeline->use_primitive_replication = pos_slots > 1;
+}
+
static VkResult
-anv_pipeline_compile_graphics(struct anv_pipeline *pipeline,
+anv_pipeline_compile_graphics(struct anv_graphics_pipeline *pipeline,
struct anv_pipeline_cache *cache,
const VkGraphicsPipelineCreateInfo *info)
{
};
int64_t pipeline_start = os_time_get_nano();
- const struct brw_compiler *compiler =
- pipeline->device->instance->physicalDevice.compiler;
+ const struct brw_compiler *compiler = pipeline->base.device->physical->compiler;
struct anv_pipeline_stage stages[MESA_SHADER_STAGES] = {};
pipeline->active_stages = 0;
stages[stage].spec_info,
stages[stage].shader_sha1);
- const struct gen_device_info *devinfo = &pipeline->device->info;
+ const struct gen_device_info *devinfo = &pipeline->base.device->info;
switch (stage) {
case MESA_SHADER_VERTEX:
populate_vs_prog_key(devinfo, sinfo->flags, &stages[stage].key.vs);
}
const bool skip_cache_lookup =
- (pipeline->flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
+ (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
if (!skip_cache_lookup) {
unsigned found = 0;
bool cache_hit;
struct anv_shader_bin *bin =
- anv_device_search_for_kernel(pipeline->device, cache,
+ anv_device_search_for_kernel(pipeline->base.device, cache,
&stages[s].cache_key,
sizeof(stages[s].cache_key), &cache_hit);
if (bin) {
if (!stages[s].entrypoint)
continue;
- anv_pipeline_add_executables(pipeline, &stages[s],
+ anv_pipeline_add_executables(&pipeline->base, &stages[s],
pipeline->shaders[s]);
}
+ anv_pipeline_init_from_cached_graphics(pipeline);
goto done;
} else if (found > 0) {
/* We found some but not all of our shaders. This shouldn't happen
*/
assert(found < __builtin_popcount(pipeline->active_stages));
- vk_debug_report(&pipeline->device->instance->debug_report_callbacks,
+ vk_debug_report(&pipeline->base.device->physical->instance->debug_report_callbacks,
VK_DEBUG_REPORT_WARNING_BIT_EXT |
VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT,
VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_CACHE_EXT,
for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
stages[s].feedback.flags = 0;
if (pipeline->shaders[s]) {
- anv_shader_bin_unref(pipeline->device, pipeline->shaders[s]);
+ anv_shader_bin_unref(pipeline->base.device, pipeline->shaders[s]);
pipeline->shaders[s] = NULL;
}
}
.sampler_to_descriptor = stages[s].sampler_to_descriptor
};
- stages[s].nir = anv_pipeline_stage_get_nir(pipeline, cache,
+ stages[s].nir = anv_pipeline_stage_get_nir(&pipeline->base, cache,
pipeline_ctx,
&stages[s]);
if (stages[s].nir == NULL) {
next_stage = &stages[s];
}
+ if (pipeline->base.device->info.gen >= 12 &&
+ pipeline->subpass->view_mask != 0) {
+ /* For some pipelines HW Primitive Replication can be used instead of
+ * instancing to implement Multiview. This depend on how viewIndex is
+ * used in all the active shaders, so this check can't be done per
+ * individual shaders.
+ */
+ nir_shader *shaders[MESA_SHADER_STAGES] = {};
+ for (unsigned s = 0; s < MESA_SHADER_STAGES; s++)
+ shaders[s] = stages[s].nir;
+
+ pipeline->use_primitive_replication =
+ anv_check_for_primitive_replication(shaders, pipeline);
+ } else {
+ pipeline->use_primitive_replication = false;
+ }
+
struct anv_pipeline_stage *prev_stage = NULL;
for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
if (!stages[s].entrypoint)
s == MESA_SHADER_GEOMETRY)
xfb_info = nir_gather_xfb_info(stages[s].nir, stage_ctx);
- anv_pipeline_lower_nir(pipeline, stage_ctx, &stages[s], layout);
+ anv_pipeline_lower_nir(&pipeline->base, stage_ctx, &stages[s], layout);
switch (s) {
case MESA_SHADER_VERTEX:
- anv_pipeline_compile_vs(compiler, stage_ctx, pipeline->device,
+ anv_pipeline_compile_vs(compiler, stage_ctx, pipeline,
&stages[s]);
break;
case MESA_SHADER_TESS_CTRL:
- anv_pipeline_compile_tcs(compiler, stage_ctx, pipeline->device,
+ anv_pipeline_compile_tcs(compiler, stage_ctx, pipeline->base.device,
&stages[s], prev_stage);
break;
case MESA_SHADER_TESS_EVAL:
- anv_pipeline_compile_tes(compiler, stage_ctx, pipeline->device,
+ anv_pipeline_compile_tes(compiler, stage_ctx, pipeline->base.device,
&stages[s], prev_stage);
break;
case MESA_SHADER_GEOMETRY:
- anv_pipeline_compile_gs(compiler, stage_ctx, pipeline->device,
+ anv_pipeline_compile_gs(compiler, stage_ctx, pipeline->base.device,
&stages[s], prev_stage);
break;
case MESA_SHADER_FRAGMENT:
- anv_pipeline_compile_fs(compiler, stage_ctx, pipeline->device,
+ anv_pipeline_compile_fs(compiler, stage_ctx, pipeline->base.device,
&stages[s], prev_stage);
break;
default:
&stages[s].bind_map);
struct anv_shader_bin *bin =
- anv_device_upload_kernel(pipeline->device, cache,
+ anv_device_upload_kernel(pipeline->base.device, cache, s,
&stages[s].cache_key,
sizeof(stages[s].cache_key),
stages[s].code,
goto fail;
}
- anv_pipeline_add_executables(pipeline, &stages[s], bin);
+ anv_pipeline_add_executables(&pipeline->base, &stages[s], bin);
pipeline->shaders[s] = bin;
ralloc_free(stage_ctx);
/* This can happen if we decided to implicitly disable the fragment
* shader. See anv_pipeline_compile_fs().
*/
- anv_shader_bin_unref(pipeline->device,
+ anv_shader_bin_unref(pipeline->base.device,
pipeline->shaders[MESA_SHADER_FRAGMENT]);
pipeline->shaders[MESA_SHADER_FRAGMENT] = NULL;
pipeline->active_stages &= ~VK_SHADER_STAGE_FRAGMENT_BIT;
for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
if (pipeline->shaders[s])
- anv_shader_bin_unref(pipeline->device, pipeline->shaders[s]);
+ anv_shader_bin_unref(pipeline->base.device, pipeline->shaders[s]);
}
return result;
}
VkResult
-anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
+anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
struct anv_pipeline_cache *cache,
const VkComputePipelineCreateInfo *info,
const struct anv_shader_module *module,
};
int64_t pipeline_start = os_time_get_nano();
- const struct brw_compiler *compiler =
- pipeline->device->instance->physicalDevice.compiler;
+ const struct brw_compiler *compiler = pipeline->base.device->physical->compiler;
struct anv_pipeline_stage stage = {
.stage = MESA_SHADER_COMPUTE,
vk_find_struct_const(info->stage.pNext,
PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT);
- populate_cs_prog_key(&pipeline->device->info, info->stage.flags,
+ populate_cs_prog_key(&pipeline->base.device->info, info->stage.flags,
rss_info, &stage.key.cs);
ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
const bool skip_cache_lookup =
- (pipeline->flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
+ (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
anv_pipeline_hash_compute(pipeline, layout, &stage, stage.cache_key.sha1);
bool cache_hit = false;
if (!skip_cache_lookup) {
- bin = anv_device_search_for_kernel(pipeline->device, cache,
+ bin = anv_device_search_for_kernel(pipeline->base.device, cache,
&stage.cache_key,
sizeof(stage.cache_key),
&cache_hit);
.set = ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS,
};
- stage.nir = anv_pipeline_stage_get_nir(pipeline, cache, mem_ctx, &stage);
+ stage.nir = anv_pipeline_stage_get_nir(&pipeline->base, cache, mem_ctx, &stage);
if (stage.nir == NULL) {
ralloc_free(mem_ctx);
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
NIR_PASS_V(stage.nir, anv_nir_add_base_work_group_id);
- anv_pipeline_lower_nir(pipeline, mem_ctx, &stage, layout);
+ anv_pipeline_lower_nir(&pipeline->base, mem_ctx, &stage, layout);
NIR_PASS_V(stage.nir, nir_lower_vars_to_explicit_types,
nir_var_mem_shared, shared_type_info);
NIR_PASS_V(stage.nir, nir_lower_explicit_io,
nir_var_mem_shared, nir_address_format_32bit_offset);
+ NIR_PASS_V(stage.nir, brw_nir_lower_cs_intrinsics);
stage.num_stats = 1;
- stage.code = brw_compile_cs(compiler, pipeline->device, mem_ctx,
+ stage.code = brw_compile_cs(compiler, pipeline->base.device, mem_ctx,
&stage.key.cs, &stage.prog_data.cs,
stage.nir, -1, stage.stats, NULL);
if (stage.code == NULL) {
}
const unsigned code_size = stage.prog_data.base.program_size;
- bin = anv_device_upload_kernel(pipeline->device, cache,
+ bin = anv_device_upload_kernel(pipeline->base.device, cache,
+ MESA_SHADER_COMPUTE,
&stage.cache_key, sizeof(stage.cache_key),
stage.code, code_size,
stage.nir->constant_data,
stage.feedback.duration = os_time_get_nano() - stage_start;
}
- anv_pipeline_add_executables(pipeline, &stage, bin);
+ anv_pipeline_add_executables(&pipeline->base, &stage, bin);
ralloc_free(mem_ctx);
create_feedback->pPipelineStageCreationFeedbacks[0] = stage.feedback;
}
- pipeline->active_stages = VK_SHADER_STAGE_COMPUTE_BIT;
- pipeline->shaders[MESA_SHADER_COMPUTE] = bin;
+ pipeline->cs = bin;
return VK_SUCCESS;
}
+struct anv_cs_parameters
+anv_cs_parameters(const struct anv_compute_pipeline *pipeline)
+{
+ const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
+
+ struct anv_cs_parameters cs_params = {};
+
+ cs_params.group_size = cs_prog_data->local_size[0] *
+ cs_prog_data->local_size[1] *
+ cs_prog_data->local_size[2];
+ cs_params.simd_size =
+ brw_cs_simd_size_for_group_size(&pipeline->base.device->info,
+ cs_prog_data, cs_params.group_size);
+ cs_params.threads = DIV_ROUND_UP(cs_params.group_size, cs_params.simd_size);
+
+ return cs_params;
+}
+
/**
* Copy pipeline state not marked as dynamic.
* Dynamic state is pipeline state which hasn't been provided at pipeline
* @param[in] pCreateInfo Source of non_dynamic state to be copied.
*/
static void
-copy_non_dynamic_state(struct anv_pipeline *pipeline,
+copy_non_dynamic_state(struct anv_graphics_pipeline *pipeline,
const VkGraphicsPipelineCreateInfo *pCreateInfo)
{
anv_cmd_dirty_mask_t states = ANV_CMD_DIRTY_DYNAMIC_ALL;
const struct gen_device_info *devinfo = &pipeline->device->info;
const struct gen_l3_weights w =
- gen_get_default_l3_weights(devinfo, pipeline->needs_data_cache, needs_slm);
+ gen_get_default_l3_weights(devinfo, true, needs_slm);
- pipeline->urb.l3_config = gen_get_l3_config(devinfo, w);
- pipeline->urb.total_size =
- gen_get_l3_config_urb_size(devinfo, pipeline->urb.l3_config);
+ pipeline->l3_config = gen_get_l3_config(devinfo, w);
}
VkResult
-anv_pipeline_init(struct anv_pipeline *pipeline,
+anv_pipeline_init(struct anv_graphics_pipeline *pipeline,
struct anv_device *device,
struct anv_pipeline_cache *cache,
const VkGraphicsPipelineCreateInfo *pCreateInfo,
anv_pipeline_validate_create_info(pCreateInfo);
if (alloc == NULL)
- alloc = &device->alloc;
+ alloc = &device->vk.alloc;
- pipeline->device = device;
+ vk_object_base_init(&device->vk, &pipeline->base.base,
+ VK_OBJECT_TYPE_PIPELINE);
+ pipeline->base.device = device;
+ pipeline->base.type = ANV_PIPELINE_GRAPHICS;
ANV_FROM_HANDLE(anv_render_pass, render_pass, pCreateInfo->renderPass);
assert(pCreateInfo->subpass < render_pass->subpass_count);
pipeline->subpass = &render_pass->subpasses[pCreateInfo->subpass];
- result = anv_reloc_list_init(&pipeline->batch_relocs, alloc);
+ result = anv_reloc_list_init(&pipeline->base.batch_relocs, alloc);
if (result != VK_SUCCESS)
return result;
- pipeline->batch.alloc = alloc;
- pipeline->batch.next = pipeline->batch.start = pipeline->batch_data;
- pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data);
- pipeline->batch.relocs = &pipeline->batch_relocs;
- pipeline->batch.status = VK_SUCCESS;
+ pipeline->base.batch.alloc = alloc;
+ pipeline->base.batch.next = pipeline->base.batch.start = pipeline->batch_data;
+ pipeline->base.batch.end = pipeline->base.batch.start + sizeof(pipeline->batch_data);
+ pipeline->base.batch.relocs = &pipeline->base.batch_relocs;
+ pipeline->base.batch.status = VK_SUCCESS;
- pipeline->mem_ctx = ralloc_context(NULL);
- pipeline->flags = pCreateInfo->flags;
+ pipeline->base.mem_ctx = ralloc_context(NULL);
+ pipeline->base.flags = pCreateInfo->flags;
assert(pCreateInfo->pRasterizationState);
pCreateInfo->pMultisampleState &&
pCreateInfo->pMultisampleState->sampleShadingEnable;
- pipeline->needs_data_cache = false;
-
/* When we free the pipeline, we detect stages based on the NULL status
* of various prog_data pointers. Make them NULL by default.
*/
memset(pipeline->shaders, 0, sizeof(pipeline->shaders));
- pipeline->num_executables = 0;
+
+ util_dynarray_init(&pipeline->base.executables, pipeline->base.mem_ctx);
result = anv_pipeline_compile_graphics(pipeline, cache, pCreateInfo);
if (result != VK_SUCCESS) {
- ralloc_free(pipeline->mem_ctx);
- anv_reloc_list_finish(&pipeline->batch_relocs, alloc);
+ ralloc_free(pipeline->base.mem_ctx);
+ anv_reloc_list_finish(&pipeline->base.batch_relocs, alloc);
return result;
}
assert(pipeline->shaders[MESA_SHADER_VERTEX]);
- anv_pipeline_setup_l3_config(pipeline, false);
+ anv_pipeline_setup_l3_config(&pipeline->base, false);
const VkPipelineVertexInputStateCreateInfo *vi_info =
pCreateInfo->pVertexInputState;
* the instance divisor by the number of views ensure that we repeat the
* client's per-instance data once for each view.
*/
- if (pipeline->subpass->view_mask) {
+ if (pipeline->subpass->view_mask && !pipeline->use_primitive_replication) {
const uint32_t view_count = anv_subpass_view_count(pipeline->subpass);
for (uint32_t vb = 0; vb < MAX_VBS; vb++) {
if (pipeline->vb[vb].instanced)
ANV_FROM_HANDLE(anv_pipeline, pipeline, pPipelineInfo->pipeline);
VK_OUTARRAY_MAKE(out, pProperties, pExecutableCount);
- for (uint32_t i = 0; i < pipeline->num_executables; i++) {
+ util_dynarray_foreach (&pipeline->executables, struct anv_pipeline_executable, exe) {
vk_outarray_append(&out, props) {
- gl_shader_stage stage = pipeline->executables[i].stage;
+ gl_shader_stage stage = exe->stage;
props->stages = mesa_to_vk_shader_stage(stage);
- unsigned simd_width = pipeline->executables[i].stats.dispatch_width;
+ unsigned simd_width = exe->stats.dispatch_width;
if (stage == MESA_SHADER_FRAGMENT) {
WRITE_STR(props->name, "%s%d %s",
simd_width ? "SIMD" : "vec",
return vk_outarray_status(&out);
}
+static const struct anv_pipeline_executable *
+anv_pipeline_get_executable(struct anv_pipeline *pipeline, uint32_t index)
+{
+ assert(index < util_dynarray_num_elements(&pipeline->executables,
+ struct anv_pipeline_executable));
+ return util_dynarray_element(
+ &pipeline->executables, struct anv_pipeline_executable, index);
+}
+
VkResult anv_GetPipelineExecutableStatisticsKHR(
VkDevice device,
const VkPipelineExecutableInfoKHR* pExecutableInfo,
ANV_FROM_HANDLE(anv_pipeline, pipeline, pExecutableInfo->pipeline);
VK_OUTARRAY_MAKE(out, pStatistics, pStatisticCount);
- assert(pExecutableInfo->executableIndex < pipeline->num_executables);
const struct anv_pipeline_executable *exe =
- &pipeline->executables[pExecutableInfo->executableIndex];
- const struct brw_stage_prog_data *prog_data =
- pipeline->shaders[exe->stage]->prog_data;
+ anv_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
+
+ const struct brw_stage_prog_data *prog_data;
+ switch (pipeline->type) {
+ case ANV_PIPELINE_GRAPHICS: {
+ prog_data = anv_pipeline_to_graphics(pipeline)->shaders[exe->stage]->prog_data;
+ break;
+ }
+ case ANV_PIPELINE_COMPUTE: {
+ prog_data = anv_pipeline_to_compute(pipeline)->cs->prog_data;
+ break;
+ }
+ default:
+ unreachable("invalid pipeline type");
+ }
vk_outarray_append(&out, stat) {
WRITE_STR(stat->name, "Instruction Count");
stat->value.u64 = exe->stats.instructions;
}
+ vk_outarray_append(&out, stat) {
+ WRITE_STR(stat->name, "SEND Count");
+ WRITE_STR(stat->description,
+ "Number of instructions in the final generated shader "
+ "executable which access external units such as the "
+ "constant cache or the sampler.");
+ stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+ stat->value.u64 = exe->stats.sends;
+ }
+
vk_outarray_append(&out, stat) {
WRITE_STR(stat->name, "Loop Count");
WRITE_STR(stat->description,
"Number of bytes of workgroup shared memory used by this "
"compute shader including any padding.");
stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
- stat->value.u64 = prog_data->total_scratch;
+ stat->value.u64 = brw_cs_prog_data_const(prog_data)->slm_size;
}
}
pInternalRepresentationCount);
bool incomplete_text = false;
- assert(pExecutableInfo->executableIndex < pipeline->num_executables);
const struct anv_pipeline_executable *exe =
- &pipeline->executables[pExecutableInfo->executableIndex];
+ anv_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
if (exe->nir) {
vk_outarray_append(&out, ir) {