struct brw_stage_prog_data *prog_data);
void anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline,
nir_shader *shader,
- struct brw_stage_prog_data *prog_data);
+ struct brw_stage_prog_data *prog_data,
+ struct anv_pipeline_bind_map *map);
#ifdef __cplusplus
}
void
anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline,
nir_shader *shader,
- struct brw_stage_prog_data *prog_data)
+ struct brw_stage_prog_data *prog_data,
+ struct anv_pipeline_bind_map *map)
{
struct anv_pipeline_layout *layout = pipeline->layout;
nir_foreach_block(function->impl, get_used_bindings_block, &state);
}
- struct anv_pipeline_bind_map map = {
- .surface_count = 0,
- .sampler_count = 0,
- .image_count = 0,
- };
-
for (uint32_t set = 0; set < layout->num_sets; set++) {
struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
BITSET_FOREACH_SET(b, _tmp, state.set[set].used,
set_layout->binding_count) {
if (set_layout->binding[b].stage[shader->stage].surface_index >= 0)
- map.surface_count += set_layout->binding[b].array_size;
+ map->surface_count += set_layout->binding[b].array_size;
if (set_layout->binding[b].stage[shader->stage].sampler_index >= 0)
- map.sampler_count += set_layout->binding[b].array_size;
+ map->sampler_count += set_layout->binding[b].array_size;
if (set_layout->binding[b].stage[shader->stage].image_index >= 0)
- map.image_count += set_layout->binding[b].array_size;
+ map->image_count += set_layout->binding[b].array_size;
}
}
- map.surface_to_descriptor =
- malloc(map.surface_count * sizeof(struct anv_pipeline_binding));
- map.sampler_to_descriptor =
- malloc(map.sampler_count * sizeof(struct anv_pipeline_binding));
-
- pipeline->bindings[shader->stage] = map;
-
unsigned surface = 0;
unsigned sampler = 0;
unsigned image = 0;
if (set_layout->binding[b].stage[shader->stage].surface_index >= 0) {
state.set[set].surface_offsets[b] = surface;
for (unsigned i = 0; i < array_size; i++) {
- map.surface_to_descriptor[surface + i].set = set;
- map.surface_to_descriptor[surface + i].offset = set_offset + i;
+ map->surface_to_descriptor[surface + i].set = set;
+ map->surface_to_descriptor[surface + i].offset = set_offset + i;
}
surface += array_size;
}
if (set_layout->binding[b].stage[shader->stage].sampler_index >= 0) {
state.set[set].sampler_offsets[b] = sampler;
for (unsigned i = 0; i < array_size; i++) {
- map.sampler_to_descriptor[sampler + i].set = set;
- map.sampler_to_descriptor[sampler + i].offset = set_offset + i;
+ map->sampler_to_descriptor[sampler + i].set = set;
+ map->sampler_to_descriptor[sampler + i].offset = set_offset + i;
}
sampler += array_size;
}
}
}
- if (map.image_count > 0) {
- assert(map.image_count <= MAX_IMAGES);
+ if (map->image_count > 0) {
+ assert(map->image_count <= MAX_IMAGES);
nir_foreach_variable(var, &shader->uniforms) {
if (glsl_type_is_image(var->type) ||
(glsl_type_is_array(var->type) &&
const gl_constant_value **param =
prog_data->param + (shader->num_uniforms / 4);
const struct brw_image_param *image_param = null_data->images;
- for (uint32_t i = 0; i < map.image_count; i++) {
+ for (uint32_t i = 0; i < map->image_count; i++) {
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET,
(const union gl_constant_value *)&image_param->surface_idx, 1);
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET,
image_param ++;
}
- shader->num_uniforms += map.image_count * BRW_IMAGE_PARAM_SIZE * 4;
+ shader->num_uniforms += map->image_count * BRW_IMAGE_PARAM_SIZE * 4;
}
ralloc_free(mem_ctx);
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
- for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
- free(pipeline->bindings[s].surface_to_descriptor);
- free(pipeline->bindings[s].sampler_to_descriptor);
- }
-
anv_reloc_list_finish(&pipeline->batch_relocs,
pAllocator ? pAllocator : &device->alloc);
if (pipeline->blend_state.map)
const char *entrypoint,
gl_shader_stage stage,
const VkSpecializationInfo *spec_info,
- struct brw_stage_prog_data *prog_data)
+ struct brw_stage_prog_data *prog_data,
+ struct anv_pipeline_bind_map *map)
{
const struct brw_compiler *compiler =
pipeline->device->instance->physicalDevice.compiler;
/* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
if (pipeline->layout)
- anv_nir_apply_pipeline_layout(pipeline, nir, prog_data);
+ anv_nir_apply_pipeline_layout(pipeline, nir, prog_data, map);
/* All binding table offsets provided by apply_pipeline_layout() are
* relative to the start of the bindint table (plus MAX_RTS for VS).
static void
anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline,
gl_shader_stage stage,
- const struct brw_stage_prog_data *prog_data)
+ const struct brw_stage_prog_data *prog_data,
+ struct anv_pipeline_bind_map *map)
{
struct brw_device_info *devinfo = &pipeline->device->info;
uint32_t max_threads[] = {
pipeline->total_scratch =
align_u32(pipeline->total_scratch, 1024) +
prog_data->total_scratch * max_threads[stage];
+ pipeline->bindings[stage] = *map;
}
static VkResult
const struct brw_compiler *compiler =
pipeline->device->instance->physicalDevice.compiler;
const struct brw_stage_prog_data *stage_prog_data;
+ struct anv_pipeline_bind_map map;
struct brw_vs_prog_key key;
uint32_t kernel = NO_KERNEL;
unsigned char sha1[20];
if (module->size > 0) {
anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, spec_info);
- kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data);
+ kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map);
}
if (kernel == NO_KERNEL) {
struct brw_vs_prog_data prog_data = { 0, };
+ struct anv_pipeline_binding surface_to_descriptor[256];
+ struct anv_pipeline_binding sampler_to_descriptor[256];
+
+ map = (struct anv_pipeline_bind_map) {
+ .surface_to_descriptor = surface_to_descriptor,
+ .sampler_to_descriptor = sampler_to_descriptor
+ };
nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint,
MESA_SHADER_VERTEX, spec_info,
- &prog_data.base.base);
+ &prog_data.base.base, &map);
if (nir == NULL)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
kernel = anv_pipeline_cache_upload_kernel(cache,
module->size > 0 ? sha1 : NULL,
shader_code, code_size,
- &stage_prog_data,
- sizeof(prog_data));
+ &stage_prog_data, sizeof(prog_data),
+ &map);
ralloc_free(mem_ctx);
}
}
anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_VERTEX,
- stage_prog_data);
+ stage_prog_data, &map);
return VK_SUCCESS;
}
const struct brw_compiler *compiler =
pipeline->device->instance->physicalDevice.compiler;
const struct brw_stage_prog_data *stage_prog_data;
+ struct anv_pipeline_bind_map map;
struct brw_gs_prog_key key;
uint32_t kernel = NO_KERNEL;
unsigned char sha1[20];
if (module->size > 0) {
anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, spec_info);
- kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data);
+ kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map);
}
if (kernel == NO_KERNEL) {
struct brw_gs_prog_data prog_data = { 0, };
+ struct anv_pipeline_binding surface_to_descriptor[256];
+ struct anv_pipeline_binding sampler_to_descriptor[256];
+
+ map = (struct anv_pipeline_bind_map) {
+ .surface_to_descriptor = surface_to_descriptor,
+ .sampler_to_descriptor = sampler_to_descriptor
+ };
nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint,
MESA_SHADER_GEOMETRY, spec_info,
- &prog_data.base.base);
+ &prog_data.base.base, &map);
if (nir == NULL)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
kernel = anv_pipeline_cache_upload_kernel(cache,
module->size > 0 ? sha1 : NULL,
shader_code, code_size,
- &stage_prog_data, sizeof(prog_data));
+ &stage_prog_data, sizeof(prog_data),
+ &map);
ralloc_free(mem_ctx);
}
pipeline->gs_kernel = kernel;
anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_GEOMETRY,
- stage_prog_data);
+ stage_prog_data, &map);
return VK_SUCCESS;
}
const struct brw_compiler *compiler =
pipeline->device->instance->physicalDevice.compiler;
const struct brw_stage_prog_data *stage_prog_data;
+ struct anv_pipeline_bind_map map;
struct brw_wm_prog_key key;
uint32_t kernel = NO_KERNEL;
unsigned char sha1[20];
if (module->size > 0) {
anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, spec_info);
- kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data);
+ kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map);
}
if (kernel == NO_KERNEL) {
struct brw_wm_prog_data prog_data = { 0, };
+ struct anv_pipeline_binding surface_to_descriptor[256];
+ struct anv_pipeline_binding sampler_to_descriptor[256];
- prog_data.binding_table.render_target_start = 0;
+ map = (struct anv_pipeline_bind_map) {
+ .surface_to_descriptor = surface_to_descriptor,
+ .sampler_to_descriptor = sampler_to_descriptor
+ };
nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint,
MESA_SHADER_FRAGMENT, spec_info,
- &prog_data.base);
+ &prog_data.base, &map);
if (nir == NULL)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
kernel = anv_pipeline_cache_upload_kernel(cache,
module->size > 0 ? sha1 : NULL,
shader_code, code_size,
- &stage_prog_data, sizeof(prog_data));
+ &stage_prog_data, sizeof(prog_data),
+ &map);
ralloc_free(mem_ctx);
}
}
anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_FRAGMENT,
- stage_prog_data);
+ stage_prog_data, &map);
return VK_SUCCESS;
}
const struct brw_compiler *compiler =
pipeline->device->instance->physicalDevice.compiler;
const struct brw_stage_prog_data *stage_prog_data;
+ struct anv_pipeline_bind_map map;
struct brw_cs_prog_key key;
uint32_t kernel = NO_KERNEL;
unsigned char sha1[20];
if (module->size > 0) {
anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, spec_info);
- kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data);
+ kernel = anv_pipeline_cache_search(cache, sha1, &stage_prog_data, &map);
}
if (module->size == 0 || kernel == NO_KERNEL) {
struct brw_cs_prog_data prog_data = { 0, };
+ struct anv_pipeline_binding surface_to_descriptor[256];
+ struct anv_pipeline_binding sampler_to_descriptor[256];
- prog_data.binding_table.work_groups_start = 0;
+ map = (struct anv_pipeline_bind_map) {
+ .surface_to_descriptor = surface_to_descriptor,
+ .sampler_to_descriptor = sampler_to_descriptor
+ };
nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint,
MESA_SHADER_COMPUTE, spec_info,
- &prog_data.base);
+ &prog_data.base, &map);
if (nir == NULL)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
kernel = anv_pipeline_cache_upload_kernel(cache,
module->size > 0 ? sha1 : NULL,
shader_code, code_size,
- &stage_prog_data, sizeof(prog_data));
+ &stage_prog_data, sizeof(prog_data),
+ &map);
+
ralloc_free(mem_ctx);
}
pipeline->cs_simd = kernel;
anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_COMPUTE,
- stage_prog_data);
+ stage_prog_data, &map);
return VK_SUCCESS;
}
unsigned char sha1[20];
uint32_t prog_data_size;
uint32_t kernel_size;
+ uint32_t surface_count;
+ uint32_t sampler_count;
+ uint32_t image_count;
+
char prog_data[0];
/* kernel follows prog_data at next 64 byte aligned address */
* doesn't include the alignment padding bytes.
*/
- return sizeof(*entry) + entry->prog_data_size + entry->kernel_size;
+ const uint32_t map_size =
+ entry->surface_count * sizeof(struct anv_pipeline_binding) +
+ entry->sampler_count * sizeof(struct anv_pipeline_binding);
+
+ return sizeof(*entry) + entry->prog_data_size + map_size;
}
void
uint32_t
anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
const unsigned char *sha1,
- const struct brw_stage_prog_data **prog_data)
+ const struct brw_stage_prog_data **prog_data,
+ struct anv_pipeline_bind_map *map)
{
const uint32_t mask = cache->table_size - 1;
const uint32_t start = (*(uint32_t *) sha1);
struct cache_entry *entry =
cache->program_stream.block_pool->map + offset;
if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) {
- if (prog_data)
- *prog_data = (const struct brw_stage_prog_data *) entry->prog_data;
-
- const uint32_t preamble_size =
- align_u32(sizeof(*entry) + entry->prog_data_size, 64);
-
- return offset + preamble_size;
+ if (prog_data) {
+ assert(map);
+ void *p = entry->prog_data;
+ *prog_data = p;
+ p += entry->prog_data_size;
+ map->surface_count = entry->surface_count;
+ map->sampler_count = entry->sampler_count;
+ map->image_count = entry->image_count;
+ map->surface_to_descriptor = p;
+ p += map->surface_count * sizeof(struct anv_pipeline_binding);
+ map->sampler_to_descriptor = p;
+ }
+
+ return offset + align_u32(entry_size(entry), 64);
}
}
}
}
- cache->total_size += entry_size(entry);
+ cache->total_size += entry_size(entry) + entry->kernel_size;
cache->kernel_count++;
}
const unsigned char *sha1,
const void *kernel, size_t kernel_size,
const struct brw_stage_prog_data **prog_data,
- size_t prog_data_size)
+ size_t prog_data_size,
+ struct anv_pipeline_bind_map *map)
{
pthread_mutex_lock(&cache->mutex);
struct cache_entry *entry;
+ const uint32_t map_size =
+ map->surface_count * sizeof(struct anv_pipeline_binding) +
+ map->sampler_count * sizeof(struct anv_pipeline_binding);
+
const uint32_t preamble_size =
- align_u32(sizeof(*entry) + prog_data_size, 64);
+ align_u32(sizeof(*entry) + prog_data_size + map_size, 64);
const uint32_t size = preamble_size + kernel_size;
entry = state.map;
entry->prog_data_size = prog_data_size;
- memcpy(entry->prog_data, *prog_data, prog_data_size);
- *prog_data = (const struct brw_stage_prog_data *) entry->prog_data;
+ entry->surface_count = map->surface_count;
+ entry->sampler_count = map->sampler_count;
+ entry->image_count = map->image_count;
entry->kernel_size = kernel_size;
+ void *p = entry->prog_data;
+ memcpy(p, *prog_data, prog_data_size);
+ p += prog_data_size;
+
+ memcpy(p, map->surface_to_descriptor,
+ map->surface_count * sizeof(struct anv_pipeline_binding));
+ map->surface_to_descriptor = p;
+ p += map->surface_count * sizeof(struct anv_pipeline_binding);
+
+ memcpy(p, map->sampler_to_descriptor,
+ map->sampler_count * sizeof(struct anv_pipeline_binding));
+ map->sampler_to_descriptor = p;
+
if (sha1 && env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", false)) {
- assert(anv_pipeline_cache_search(cache, sha1, NULL) == NO_KERNEL);
+ assert(anv_pipeline_cache_search(cache, sha1, NULL, NULL) == NO_KERNEL);
memcpy(entry->sha1, sha1, sizeof(entry->sha1));
anv_pipeline_cache_add_entry(cache, entry, state.offset);
if (!cache->device->info.has_llc)
anv_state_clflush(state);
+ *prog_data = (const struct brw_stage_prog_data *) entry->prog_data;
+
return state.offset + preamble_size;
}
if (memcmp(header.uuid, uuid, VK_UUID_SIZE) != 0)
return;
- const void *end = data + size;
- const void *p = data + header.header_size;
+ void *end = (void *) data + size;
+ void *p = (void *) data + header.header_size;
while (p < end) {
- /* The kernels aren't 64 byte aligned in the serialized format so
- * they're always right after the prog_data.
- */
- const struct cache_entry *entry = p;
- const void *kernel = &entry->prog_data[entry->prog_data_size];
-
- const struct brw_stage_prog_data *prog_data =
- (const struct brw_stage_prog_data *) entry->prog_data;
+ struct cache_entry *entry = p;
+
+ void *data = entry->prog_data;
+ const struct brw_stage_prog_data *prog_data = data;
+ data += entry->prog_data_size;
+
+ struct anv_pipeline_binding *surface_to_descriptor = data;
+ data += entry->surface_count * sizeof(struct anv_pipeline_binding);
+ struct anv_pipeline_binding *sampler_to_descriptor = data;
+ data += entry->sampler_count * sizeof(struct anv_pipeline_binding);
+ void *kernel = data;
+
+ struct anv_pipeline_bind_map map = {
+ .surface_count = entry->surface_count,
+ .sampler_count = entry->sampler_count,
+ .image_count = entry->image_count,
+ .surface_to_descriptor = surface_to_descriptor,
+ .sampler_to_descriptor = sampler_to_descriptor
+ };
anv_pipeline_cache_upload_kernel(cache, entry->sha1,
kernel, entry->kernel_size,
&prog_data,
- entry->prog_data_size);
+ entry->prog_data_size, &map);
p = kernel + entry->kernel_size;
}
}
continue;
entry = cache->program_stream.block_pool->map + cache->hash_table[i];
- if (end < p + entry_size(entry))
+ const uint32_t size = entry_size(entry);
+ if (end < p + size + entry->kernel_size)
break;
- memcpy(p, entry, sizeof(*entry) + entry->prog_data_size);
- p += sizeof(*entry) + entry->prog_data_size;
+ memcpy(p, entry, size);
+ p += size;
- void *kernel = (void *) entry +
- align_u32(sizeof(*entry) + entry->prog_data_size, 64);
+ void *kernel = (void *) entry + align_u32(size, 64);
memcpy(p, kernel, entry->kernel_size);
p += entry->kernel_size;
struct cache_entry *entry =
src->program_stream.block_pool->map + offset;
- if (anv_pipeline_cache_search(dst, entry->sha1, NULL) != NO_KERNEL)
+ if (anv_pipeline_cache_search(dst, entry->sha1, NULL, NULL) != NO_KERNEL)
continue;
anv_pipeline_cache_add_entry(dst, entry, offset);
uint32_t * hash_table;
};
+struct anv_pipeline_bind_map;
+
void anv_pipeline_cache_init(struct anv_pipeline_cache *cache,
struct anv_device *device);
void anv_pipeline_cache_finish(struct anv_pipeline_cache *cache);
uint32_t anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
const unsigned char *sha1,
- const struct brw_stage_prog_data **prog_data);
+ const struct brw_stage_prog_data **prog_data,
+ struct anv_pipeline_bind_map *map);
uint32_t anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
const unsigned char *sha1,
const void *kernel,
size_t kernel_size,
const struct brw_stage_prog_data **prog_data,
- size_t prog_data_size);
+ size_t prog_data_size,
+ struct anv_pipeline_bind_map *map);
struct anv_device {
VK_LOADER_DATA _loader_data;