From: Kristian Høgsberg Kristensen Date: Fri, 4 Mar 2016 16:15:16 +0000 (-0800) Subject: anv: Store prog data in pipeline cache stream X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=2b29342fae14d8626ca58f8a7ec358b70886ced3;p=mesa.git anv: Store prog data in pipeline cache stream We have to keep it there for the cache to work, so let's not have an extra copy in struct anv_pipeline too. --- diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index 6ff5f35bc6a..5ec242fbf2a 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -757,7 +757,7 @@ anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, } if (stage == MESA_SHADER_COMPUTE && - cmd_buffer->state.compute_pipeline->cs_prog_data.uses_num_work_groups) { + get_cs_prog_data(cmd_buffer->state.compute_pipeline)->uses_num_work_groups) { struct anv_bo *bo = cmd_buffer->state.num_workgroups_bo; uint32_t bo_offset = cmd_buffer->state.num_workgroups_offset; @@ -996,7 +996,7 @@ anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, { struct anv_push_constants *data = cmd_buffer->state.push_constants[stage]; - struct brw_stage_prog_data *prog_data = + const struct brw_stage_prog_data *prog_data = cmd_buffer->state.pipeline->prog_data[stage]; /* If we don't actually have any push constants, bail. */ @@ -1027,7 +1027,7 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer) struct anv_push_constants *data = cmd_buffer->state.push_constants[MESA_SHADER_COMPUTE]; struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; + const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline); const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; const unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index c93b1a07246..868215cd22d 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -406,7 +406,7 @@ anv_pipeline_compile(struct anv_pipeline *pipeline, static void anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline, gl_shader_stage stage, - struct brw_stage_prog_data *prog_data) + const struct brw_stage_prog_data *prog_data) { struct brw_device_info *devinfo = &pipeline->device->info; uint32_t max_threads[] = { @@ -436,7 +436,7 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, { const struct brw_compiler *compiler = pipeline->device->instance->physicalDevice.compiler; - struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data; + const struct brw_stage_prog_data *stage_prog_data; struct brw_vs_prog_key key; uint32_t kernel; unsigned char sha1[20], *hash; @@ -446,17 +446,17 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, if (module->size > 0) { hash = sha1; anv_hash_shader(hash, &key, sizeof(key), module, entrypoint, spec_info); - kernel = anv_pipeline_cache_search(cache, hash, prog_data); + kernel = anv_pipeline_cache_search(cache, hash, &stage_prog_data); } else { hash = NULL; } if (module->size == 0 || kernel == NO_KERNEL) { - memset(prog_data, 0, sizeof(*prog_data)); + struct brw_vs_prog_data prog_data = { 0, }; nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, MESA_SHADER_VERTEX, spec_info, - &prog_data->base.base); + &prog_data.base.base); if (nir == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -465,31 +465,36 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, if (module->nir == NULL) ralloc_steal(mem_ctx, nir); - prog_data->inputs_read = nir->info.inputs_read; + prog_data.inputs_read = nir->info.inputs_read; if (nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ)) pipeline->writes_point_size = true; brw_compute_vue_map(&pipeline->device->info, - &prog_data->base.vue_map, + &prog_data.base.vue_map, nir->info.outputs_written, nir->info.separate_shader); unsigned code_size; const unsigned *shader_code = - brw_compile_vs(compiler, NULL, mem_ctx, &key, prog_data, nir, + brw_compile_vs(compiler, NULL, mem_ctx, &key, &prog_data, nir, NULL, false, -1, &code_size, NULL); if (shader_code == NULL) { ralloc_free(mem_ctx); return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); } + stage_prog_data = &prog_data.base.base; kernel = anv_pipeline_cache_upload_kernel(cache, hash, shader_code, code_size, - prog_data, sizeof(*prog_data)); + &stage_prog_data, + sizeof(prog_data)); ralloc_free(mem_ctx); } - if (prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8) { + const struct brw_vs_prog_data *vs_prog_data = + (const struct brw_vs_prog_data *) stage_prog_data; + + if (vs_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8) { pipeline->vs_simd8 = kernel; pipeline->vs_vec4 = NO_KERNEL; } else { @@ -498,7 +503,7 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, } anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_VERTEX, - &prog_data->base.base); + stage_prog_data); return VK_SUCCESS; } @@ -513,7 +518,7 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, { const struct brw_compiler *compiler = pipeline->device->instance->physicalDevice.compiler; - struct brw_gs_prog_data *prog_data = &pipeline->gs_prog_data; + const struct brw_stage_prog_data *stage_prog_data; struct brw_gs_prog_key key; uint32_t kernel; unsigned char sha1[20], *hash; @@ -523,17 +528,17 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, if (module->size > 0) { hash = sha1; anv_hash_shader(hash, &key, sizeof(key), module, entrypoint, spec_info); - kernel = anv_pipeline_cache_search(cache, hash, prog_data); + kernel = anv_pipeline_cache_search(cache, hash, &stage_prog_data); } else { hash = NULL; } if (module->size == 0 || kernel == NO_KERNEL) { - memset(prog_data, 0, sizeof(*prog_data)); + struct brw_gs_prog_data prog_data = { 0, }; nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, MESA_SHADER_GEOMETRY, spec_info, - &prog_data->base.base); + &prog_data.base.base); if (nir == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -546,13 +551,13 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, pipeline->writes_point_size = true; brw_compute_vue_map(&pipeline->device->info, - &prog_data->base.vue_map, + &prog_data.base.vue_map, nir->info.outputs_written, nir->info.separate_shader); unsigned code_size; const unsigned *shader_code = - brw_compile_gs(compiler, NULL, mem_ctx, &key, prog_data, nir, + brw_compile_gs(compiler, NULL, mem_ctx, &key, &prog_data, nir, NULL, -1, &code_size, NULL); if (shader_code == NULL) { ralloc_free(mem_ctx); @@ -560,9 +565,10 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, } /* TODO: SIMD8 GS */ + stage_prog_data = &prog_data.base.base; kernel = anv_pipeline_cache_upload_kernel(cache, hash, shader_code, code_size, - prog_data, sizeof(*prog_data)); + &stage_prog_data, sizeof(prog_data)); ralloc_free(mem_ctx); } @@ -570,7 +576,7 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, pipeline->gs_kernel = kernel; anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_GEOMETRY, - &prog_data->base.base); + stage_prog_data); return VK_SUCCESS; } @@ -586,7 +592,7 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, { const struct brw_compiler *compiler = pipeline->device->instance->physicalDevice.compiler; - struct brw_wm_prog_data *prog_data = &pipeline->wm_prog_data; + const struct brw_stage_prog_data *stage_prog_data; struct brw_wm_prog_key key; uint32_t kernel; unsigned char sha1[20], *hash; @@ -599,19 +605,19 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, if (module->size > 0) { hash = sha1; anv_hash_shader(hash, &key, sizeof(key), module, entrypoint, spec_info); - kernel = anv_pipeline_cache_search(cache, hash, prog_data); + kernel = anv_pipeline_cache_search(cache, hash, &stage_prog_data); } else { hash = NULL; } if (module->size == 0 || kernel == NO_KERNEL) { - memset(prog_data, 0, sizeof(*prog_data)); + struct brw_wm_prog_data prog_data = { 0, }; - prog_data->binding_table.render_target_start = 0; + prog_data.binding_table.render_target_start = 0; nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, MESA_SHADER_FRAGMENT, spec_info, - &prog_data->base); + &prog_data.base); if (nir == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -635,27 +641,31 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, unsigned code_size; const unsigned *shader_code = - brw_compile_fs(compiler, NULL, mem_ctx, &key, prog_data, nir, + brw_compile_fs(compiler, NULL, mem_ctx, &key, &prog_data, nir, NULL, -1, -1, pipeline->use_repclear, &code_size, NULL); if (shader_code == NULL) { ralloc_free(mem_ctx); return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); } + stage_prog_data = &prog_data.base; kernel = anv_pipeline_cache_upload_kernel(cache, hash, shader_code, code_size, - prog_data, sizeof(*prog_data)); + &stage_prog_data, sizeof(prog_data)); ralloc_free(mem_ctx); } - if (prog_data->no_8) + const struct brw_wm_prog_data *wm_prog_data = + (const struct brw_wm_prog_data *) stage_prog_data; + + if (wm_prog_data->no_8) pipeline->ps_simd8 = NO_KERNEL; else pipeline->ps_simd8 = kernel; - if (prog_data->no_8 || prog_data->prog_offset_16) { - pipeline->ps_simd16 = kernel + prog_data->prog_offset_16; + if (wm_prog_data->no_8 || wm_prog_data->prog_offset_16) { + pipeline->ps_simd16 = kernel + wm_prog_data->prog_offset_16; } else { pipeline->ps_simd16 = NO_KERNEL; } @@ -664,18 +674,18 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, pipeline->ps_grf_start2 = 0; if (pipeline->ps_simd8 != NO_KERNEL) { pipeline->ps_ksp0 = pipeline->ps_simd8; - pipeline->ps_grf_start0 = prog_data->base.dispatch_grf_start_reg; + pipeline->ps_grf_start0 = wm_prog_data->base.dispatch_grf_start_reg; if (pipeline->ps_simd16 != NO_KERNEL) { pipeline->ps_ksp2 = pipeline->ps_simd16; - pipeline->ps_grf_start2 = prog_data->dispatch_grf_start_reg_16; + pipeline->ps_grf_start2 = wm_prog_data->dispatch_grf_start_reg_16; } } else if (pipeline->ps_simd16 != NO_KERNEL) { pipeline->ps_ksp0 = pipeline->ps_simd16; - pipeline->ps_grf_start0 = prog_data->dispatch_grf_start_reg_16; + pipeline->ps_grf_start0 = wm_prog_data->dispatch_grf_start_reg_16; } anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_FRAGMENT, - &prog_data->base); + stage_prog_data); return VK_SUCCESS; } @@ -690,7 +700,7 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline, { const struct brw_compiler *compiler = pipeline->device->instance->physicalDevice.compiler; - struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + const struct brw_stage_prog_data *stage_prog_data; struct brw_cs_prog_key key; uint32_t kernel; unsigned char sha1[20], *hash; @@ -700,23 +710,23 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline, if (module->size > 0) { hash = sha1; anv_hash_shader(hash, &key, sizeof(key), module, entrypoint, spec_info); - kernel = anv_pipeline_cache_search(cache, hash, prog_data); + kernel = anv_pipeline_cache_search(cache, hash, &stage_prog_data); } else { hash = NULL; } if (module->size == 0 || kernel == NO_KERNEL) { - memset(prog_data, 0, sizeof(*prog_data)); + struct brw_cs_prog_data prog_data = { 0, }; - prog_data->binding_table.work_groups_start = 0; + prog_data.binding_table.work_groups_start = 0; nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, MESA_SHADER_COMPUTE, spec_info, - &prog_data->base); + &prog_data.base); if (nir == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - prog_data->base.total_shared = nir->num_shared; + prog_data.base.total_shared = nir->num_shared; void *mem_ctx = ralloc_context(NULL); @@ -725,23 +735,24 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline, unsigned code_size; const unsigned *shader_code = - brw_compile_cs(compiler, NULL, mem_ctx, &key, prog_data, nir, + brw_compile_cs(compiler, NULL, mem_ctx, &key, &prog_data, nir, -1, &code_size, NULL); if (shader_code == NULL) { ralloc_free(mem_ctx); return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); } + stage_prog_data = &prog_data.base; kernel = anv_pipeline_cache_upload_kernel(cache, hash, shader_code, code_size, - prog_data, sizeof(*prog_data)); + &stage_prog_data, sizeof(prog_data)); ralloc_free(mem_ctx); } pipeline->cs_simd = kernel; anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_COMPUTE, - &prog_data->base); + stage_prog_data); return VK_SUCCESS; } @@ -751,10 +762,12 @@ gen7_compute_urb_partition(struct anv_pipeline *pipeline) { const struct brw_device_info *devinfo = &pipeline->device->info; bool vs_present = pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT; - unsigned vs_size = vs_present ? pipeline->vs_prog_data.base.urb_entry_size : 1; + unsigned vs_size = vs_present ? + get_vs_prog_data(pipeline)->base.urb_entry_size : 1; unsigned vs_entry_size_bytes = vs_size * 64; bool gs_present = pipeline->active_stages & VK_SHADER_STAGE_GEOMETRY_BIT; - unsigned gs_size = gs_present ? pipeline->gs_prog_data.base.urb_entry_size : 1; + unsigned gs_size = gs_present ? + get_gs_prog_data(pipeline)->base.urb_entry_size : 1; unsigned gs_entry_size_bytes = gs_size * 64; /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS): @@ -1136,7 +1149,6 @@ anv_pipeline_init(struct anv_pipeline *pipeline, if (!(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT)) { /* Vertex is only optional if disable_vs is set */ assert(extra->disable_vs); - memset(&pipeline->vs_prog_data, 0, sizeof(pipeline->vs_prog_data)); } gen7_compute_urb_partition(pipeline); @@ -1152,7 +1164,7 @@ anv_pipeline_init(struct anv_pipeline *pipeline, */ inputs_read = ~0ull; } else { - inputs_read = pipeline->vs_prog_data.inputs_read; + inputs_read = get_vs_prog_data(pipeline)->inputs_read; } pipeline->vb_used = 0; diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c index 48f36706fef..024fdf7d5a9 100644 --- a/src/intel/vulkan/anv_pipeline_cache.c +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -110,7 +110,8 @@ anv_hash_shader(unsigned char *hash, const void *key, size_t key_size, uint32_t anv_pipeline_cache_search(struct anv_pipeline_cache *cache, - const unsigned char *sha1, void *prog_data) + const unsigned char *sha1, + const struct brw_stage_prog_data **prog_data) { const uint32_t mask = cache->table_size - 1; const uint32_t start = (*(uint32_t *) sha1); @@ -126,7 +127,7 @@ anv_pipeline_cache_search(struct anv_pipeline_cache *cache, cache->program_stream.block_pool->map + offset; if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) { if (prog_data) - memcpy(prog_data, entry->prog_data, entry->prog_data_size); + *prog_data = (const struct brw_stage_prog_data *) entry->prog_data; const uint32_t preamble_size = align_u32(sizeof(*entry) + entry->prog_data_size, 64); @@ -198,17 +199,14 @@ uint32_t anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, const unsigned char *sha1, const void *kernel, size_t kernel_size, - const void *prog_data, size_t prog_data_size) + const struct brw_stage_prog_data **prog_data, + size_t prog_data_size) { pthread_mutex_lock(&cache->mutex); struct cache_entry *entry; - /* Meta pipelines don't have SPIR-V, so we can't hash them. - * Consequentally, they just don't get cached. - */ - const uint32_t preamble_size = sha1 ? - align_u32(sizeof(*entry) + prog_data_size, 64) : - 0; + const uint32_t preamble_size = + align_u32(sizeof(*entry) + prog_data_size, 64); const uint32_t size = preamble_size + kernel_size; @@ -216,14 +214,16 @@ anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, const struct anv_state state = anv_state_stream_alloc(&cache->program_stream, size, 64); + entry = state.map; + entry->prog_data_size = prog_data_size; + memcpy(entry->prog_data, *prog_data, prog_data_size); + *prog_data = (const struct brw_stage_prog_data *) entry->prog_data; + entry->kernel_size = kernel_size; + if (sha1 && env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", false)) { assert(anv_pipeline_cache_search(cache, sha1, NULL) == NO_KERNEL); - entry = state.map; - memcpy(entry->sha1, sha1, sizeof(entry->sha1)); - entry->prog_data_size = prog_data_size; - memcpy(entry->prog_data, prog_data, prog_data_size); - entry->kernel_size = kernel_size; + memcpy(entry->sha1, sha1, sizeof(entry->sha1)); if (cache->kernel_count == cache->table_size / 2) anv_pipeline_cache_grow(cache); @@ -285,9 +285,13 @@ anv_pipeline_cache_load(struct anv_pipeline_cache *cache, const struct cache_entry *entry = p; const void *kernel = &entry->prog_data[entry->prog_data_size]; + const struct brw_stage_prog_data *prog_data = + (const struct brw_stage_prog_data *) entry->prog_data; + anv_pipeline_cache_upload_kernel(cache, entry->sha1, kernel, entry->kernel_size, - entry->prog_data, entry->prog_data_size); + &prog_data, + entry->prog_data_size); p = kernel + entry->kernel_size; } } @@ -406,9 +410,12 @@ anv_pipeline_cache_merge(struct anv_pipeline_cache *dst, const void *kernel = (void *) entry + align_u32(sizeof(*entry) + entry->prog_data_size, 64); + const struct brw_stage_prog_data *prog_data = + (const struct brw_stage_prog_data *) entry->prog_data; + anv_pipeline_cache_upload_kernel(dst, entry->sha1, kernel, entry->kernel_size, - entry->prog_data, entry->prog_data_size); + &prog_data, entry->prog_data_size); } } diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 32c8b13c952..70b6dd995a1 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -640,12 +640,13 @@ void anv_pipeline_cache_init(struct anv_pipeline_cache *cache, struct anv_device *device); void anv_pipeline_cache_finish(struct anv_pipeline_cache *cache); uint32_t anv_pipeline_cache_search(struct anv_pipeline_cache *cache, - const unsigned char *sha1, void *prog_data); + const unsigned char *sha1, + const struct brw_stage_prog_data **prog_data); uint32_t anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, const unsigned char *sha1, const void *kernel, size_t kernel_size, - const void *prog_data, + const struct brw_stage_prog_data **prog_data, size_t prog_data_size); struct anv_device { @@ -1404,12 +1405,8 @@ struct anv_pipeline { bool use_repclear; - struct brw_vs_prog_data vs_prog_data; - struct brw_wm_prog_data wm_prog_data; - struct brw_gs_prog_data gs_prog_data; - struct brw_cs_prog_data cs_prog_data; bool writes_point_size; - struct brw_stage_prog_data * prog_data[MESA_SHADER_STAGES]; + const struct brw_stage_prog_data * prog_data[MESA_SHADER_STAGES]; uint32_t scratch_start[MESA_SHADER_STAGES]; uint32_t total_scratch; struct { @@ -1457,6 +1454,30 @@ struct anv_pipeline { } gen9; }; +static inline const struct brw_vs_prog_data * +get_vs_prog_data(struct anv_pipeline *pipeline) +{ + return (const struct brw_vs_prog_data *) pipeline->prog_data[MESA_SHADER_VERTEX]; +} + +static inline const struct brw_gs_prog_data * +get_gs_prog_data(struct anv_pipeline *pipeline) +{ + return (const struct brw_gs_prog_data *) pipeline->prog_data[MESA_SHADER_GEOMETRY]; +} + +static inline const struct brw_wm_prog_data * +get_wm_prog_data(struct anv_pipeline *pipeline) +{ + return (const struct brw_wm_prog_data *) pipeline->prog_data[MESA_SHADER_FRAGMENT]; +} + +static inline const struct brw_cs_prog_data * +get_cs_prog_data(struct anv_pipeline *pipeline) +{ + return (const struct brw_cs_prog_data *) pipeline->prog_data[MESA_SHADER_COMPUTE]; +} + struct anv_graphics_pipeline_create_info { /** * If non-negative, overrides the color attachment count of the pipeline's diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c index 985907872fa..8dce586eec7 100644 --- a/src/intel/vulkan/gen7_cmd_buffer.c +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -283,7 +283,7 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) struct anv_state push_state = anv_cmd_buffer_cs_push_constants(cmd_buffer); - const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; + const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline); const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; @@ -395,11 +395,12 @@ void genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) { struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline); VkResult result; assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); - bool needs_slm = pipeline->cs_prog_data.base.total_shared > 0; + bool needs_slm = cs_prog_data->base.total_shared > 0; config_l3(cmd_buffer, needs_slm); if (cmd_buffer->state.current_pipeline != GPGPU) { diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c index 5235d399ce5..5f480edf809 100644 --- a/src/intel/vulkan/gen7_pipeline.c +++ b/src/intel/vulkan/gen7_pipeline.c @@ -250,7 +250,7 @@ genX(graphics_pipeline_create)( anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SAMPLE_MASK), .SampleMask = 0xff); - const struct brw_vue_prog_data *vue_prog_data = &pipeline->vs_prog_data.base; + const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); #if 0 /* From gen7_vs_state.c */ @@ -277,18 +277,18 @@ genX(graphics_pipeline_create)( anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), .KernelStartPointer = pipeline->vs_vec4, .ScratchSpaceBaseOffset = pipeline->scratch_start[MESA_SHADER_VERTEX], - .PerThreadScratchSpace = scratch_space(&vue_prog_data->base), + .PerThreadScratchSpace = scratch_space(&vs_prog_data->base.base), .DispatchGRFStartRegisterforURBData = - vue_prog_data->base.dispatch_grf_start_reg, - .VertexURBEntryReadLength = vue_prog_data->urb_read_length, + vs_prog_data->base.base.dispatch_grf_start_reg, + .VertexURBEntryReadLength = vs_prog_data->base.urb_read_length, .VertexURBEntryReadOffset = 0, .MaximumNumberofThreads = device->info.max_vs_threads - 1, .StatisticsEnable = true, .VSFunctionEnable = true); - const struct brw_gs_prog_data *gs_prog_data = &pipeline->gs_prog_data; + const struct brw_gs_prog_data *gs_prog_data = get_gs_prog_data(pipeline); if (pipeline->gs_kernel == NO_KERNEL || (extra && extra->disable_vs)) { anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), .GSEnable = false); @@ -338,7 +338,7 @@ genX(graphics_pipeline_create)( anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), .MaximumNumberofThreads = device->info.max_wm_threads - 1); } else { - const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data; + const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); if (wm_prog_data->urb_setup[VARYING_SLOT_BFC0] != -1 || wm_prog_data->urb_setup[VARYING_SLOT_BFC1] != -1) anv_finishme("two-sided color needs sbe swizzling setup"); diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index 8d8775fb01d..0d27c27f5b7 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -505,7 +505,7 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) struct anv_state push_state = anv_cmd_buffer_cs_push_constants(cmd_buffer); - const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; + const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline); const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; @@ -558,11 +558,12 @@ void genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) { struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline); VkResult result; assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); - bool needs_slm = pipeline->cs_prog_data.base.total_shared > 0; + bool needs_slm = cs_prog_data->base.total_shared > 0; config_l3(cmd_buffer, needs_slm); if (cmd_buffer->state.current_pipeline != GPGPU) { diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c index 4a5e8674189..5ce1307f090 100644 --- a/src/intel/vulkan/gen8_pipeline.c +++ b/src/intel/vulkan/gen8_pipeline.c @@ -354,6 +354,7 @@ genX(graphics_pipeline_create)( .MaximumPointWidth = 255.875, .MaximumVPIndex = pCreateInfo->pViewportState->viewportCount - 1); + const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM), .StatisticsEnable = true, .LineEndCapAntialiasingRegionWidth = _05pixels, @@ -363,15 +364,15 @@ genX(graphics_pipeline_create)( .PointRasterizationRule = RASTRULE_UPPER_RIGHT, .BarycentricInterpolationMode = pipeline->ps_ksp0 == NO_KERNEL ? - 0 : pipeline->wm_prog_data.barycentric_interp_modes); + 0 : wm_prog_data->barycentric_interp_modes); - const struct brw_gs_prog_data *gs_prog_data = &pipeline->gs_prog_data; - offset = 1; - length = (gs_prog_data->base.vue_map.num_slots + 1) / 2 - offset; - - if (pipeline->gs_kernel == NO_KERNEL) + if (pipeline->gs_kernel == NO_KERNEL) { anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), .Enable = false); - else + } else { + const struct brw_gs_prog_data *gs_prog_data = get_gs_prog_data(pipeline); + offset = 1; + length = (gs_prog_data->base.vue_map.num_slots + 1) / 2 - offset; + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), .SingleProgramFlow = false, .KernelStartPointer = pipeline->gs_kernel, @@ -412,11 +413,12 @@ genX(graphics_pipeline_create)( .VertexURBEntryOutputReadOffset = offset, .VertexURBEntryOutputLength = length); + } - const struct brw_vue_prog_data *vue_prog_data = &pipeline->vs_prog_data.base; + const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); /* Skip the VUE header and position slots */ offset = 1; - length = (vue_prog_data->vue_map.num_slots + 1) / 2 - offset; + length = (vs_prog_data->base.vue_map.num_slots + 1) / 2 - offset; uint32_t vs_start = pipeline->vs_simd8 != NO_KERNEL ? pipeline->vs_simd8 : pipeline->vs_vec4; @@ -435,7 +437,7 @@ genX(graphics_pipeline_create)( .VectorMaskEnable = false, .SamplerCount = 0, .BindingTableEntryCount = - vue_prog_data->base.binding_table.size_bytes / 4, + vs_prog_data->base.base.binding_table.size_bytes / 4, .ThreadDispatchPriority = false, .FloatingPointMode = IEEE754, .IllegalOpcodeExceptionEnable = false, @@ -443,11 +445,11 @@ genX(graphics_pipeline_create)( .SoftwareExceptionEnable = false, .ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_VERTEX], - .PerThreadScratchSpace = scratch_space(&vue_prog_data->base), + .PerThreadScratchSpace = scratch_space(&vs_prog_data->base.base), .DispatchGRFStartRegisterForURBData = - vue_prog_data->base.dispatch_grf_start_reg, - .VertexURBEntryReadLength = vue_prog_data->urb_read_length, + vs_prog_data->base.base.dispatch_grf_start_reg, + .VertexURBEntryReadLength = vs_prog_data->base.urb_read_length, .VertexURBEntryReadOffset = 0, .MaximumNumberofThreads = device->info.max_vs_threads - 1, @@ -461,8 +463,6 @@ genX(graphics_pipeline_create)( .UserClipDistanceClipTestEnableBitmask = 0, .UserClipDistanceCullTestEnableBitmask = 0); - const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data; - const int num_thread_bias = GEN_GEN == 8 ? 2 : 1; if (pipeline->ps_ksp0 == NO_KERNEL) { anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS)); diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 88cc13b580a..2606a66f2a7 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -319,11 +319,11 @@ void genX(CmdDraw)( { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); genX(cmd_buffer_flush_state)(cmd_buffer); - if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || - cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) + if (vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance) emit_base_vertex_instance(cmd_buffer, firstVertex, firstInstance); anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), @@ -346,11 +346,11 @@ void genX(CmdDrawIndexed)( { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); genX(cmd_buffer_flush_state)(cmd_buffer); - if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || - cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) + if (vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance) emit_base_vertex_instance(cmd_buffer, vertexOffset, firstInstance); anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), @@ -398,13 +398,13 @@ void genX(CmdDrawIndirect)( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); struct anv_bo *bo = buffer->bo; uint32_t bo_offset = buffer->offset + offset; genX(cmd_buffer_flush_state)(cmd_buffer); - if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || - cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) + if (vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance) emit_base_vertex_instance_bo(cmd_buffer, bo, bo_offset + 8); emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); @@ -429,14 +429,14 @@ void genX(CmdDrawIndexedIndirect)( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); struct anv_bo *bo = buffer->bo; uint32_t bo_offset = buffer->offset + offset; genX(cmd_buffer_flush_state)(cmd_buffer); /* TODO: We need to stomp base vertex to 0 somehow */ - if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex || - cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance) + if (vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance) emit_base_vertex_instance_bo(cmd_buffer, bo, bo_offset + 12); emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); @@ -460,7 +460,7 @@ void genX(CmdDispatch)( { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + const struct brw_cs_prog_data *prog_data = get_cs_prog_data(pipeline); if (prog_data->uses_num_work_groups) { struct anv_state state = @@ -507,7 +507,7 @@ void genX(CmdDispatchIndirect)( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + const struct brw_cs_prog_data *prog_data = get_cs_prog_data(pipeline); struct anv_bo *bo = buffer->bo; uint32_t bo_offset = buffer->offset + offset; struct anv_batch *batch = &cmd_buffer->batch; diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index 1605661f971..cc8841ea8a0 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -82,7 +82,7 @@ genX(compute_pipeline_create)( pipeline->use_repclear = false; - const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; + const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline); const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; diff --git a/src/intel/vulkan/genX_pipeline_util.h b/src/intel/vulkan/genX_pipeline_util.h index 28b2a032c46..cd138dfae61 100644 --- a/src/intel/vulkan/genX_pipeline_util.h +++ b/src/intel/vulkan/genX_pipeline_util.h @@ -52,6 +52,8 @@ emit_vertex_input(struct anv_pipeline *pipeline, const VkPipelineVertexInputStateCreateInfo *info, const struct anv_graphics_pipeline_create_info *extra) { + const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); + uint32_t elements; if (extra && extra->disable_vs) { /* If the VS is disabled, just assume the user knows what they're @@ -63,7 +65,7 @@ emit_vertex_input(struct anv_pipeline *pipeline, elements |= (1 << info->pVertexAttributeDescriptions[i].location); } else { /* Pull inputs_read out of the VS prog data */ - uint64_t inputs_read = pipeline->vs_prog_data.inputs_read; + uint64_t inputs_read = vs_prog_data->inputs_read; assert((inputs_read & ((1 << VERT_ATTRIB_GENERIC0) - 1)) == 0); elements = inputs_read >> VERT_ATTRIB_GENERIC0; } @@ -72,16 +74,16 @@ emit_vertex_input(struct anv_pipeline *pipeline, /* On BDW+, we only need to allocate space for base ids. Setting up * the actual vertex and instance id is a separate packet. */ - const bool needs_svgs_elem = pipeline->vs_prog_data.uses_basevertex || - pipeline->vs_prog_data.uses_baseinstance; + const bool needs_svgs_elem = vs_prog_data->uses_basevertex || + vs_prog_data->uses_baseinstance; #else /* On Haswell and prior, vertex and instance id are created by using the * ComponentControl fields, so we need an element for any of them. */ - const bool needs_svgs_elem = pipeline->vs_prog_data.uses_vertexid || - pipeline->vs_prog_data.uses_instanceid || - pipeline->vs_prog_data.uses_basevertex || - pipeline->vs_prog_data.uses_baseinstance; + const bool needs_svgs_elem = vs_prog_data->uses_vertexid || + vs_prog_data->uses_instanceid || + vs_prog_data->uses_basevertex || + vs_prog_data->uses_baseinstance; #endif uint32_t elem_count = __builtin_popcount(elements) + needs_svgs_elem; @@ -148,8 +150,8 @@ emit_vertex_input(struct anv_pipeline *pipeline, * This means, that if we have BaseInstance, we need BaseVertex as * well. Just do all or nothing. */ - uint32_t base_ctrl = (pipeline->vs_prog_data.uses_basevertex || - pipeline->vs_prog_data.uses_baseinstance) ? + uint32_t base_ctrl = (vs_prog_data->uses_basevertex || + vs_prog_data->uses_baseinstance) ? VFCOMP_STORE_SRC : VFCOMP_STORE_0; struct GENX(VERTEX_ELEMENT_STATE) element = { @@ -171,10 +173,10 @@ emit_vertex_input(struct anv_pipeline *pipeline, #if GEN_GEN >= 8 anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_SGVS), - .VertexIDEnable = pipeline->vs_prog_data.uses_vertexid, + .VertexIDEnable = vs_prog_data->uses_vertexid, .VertexIDComponentNumber = 2, .VertexIDElementOffset = id_slot, - .InstanceIDEnable = pipeline->vs_prog_data.uses_instanceid, + .InstanceIDEnable = vs_prog_data->uses_instanceid, .InstanceIDComponentNumber = 3, .InstanceIDElementOffset = id_slot); #endif @@ -222,17 +224,21 @@ emit_urb_setup(struct anv_pipeline *pipeline) static void emit_3dstate_sbe(struct anv_pipeline *pipeline) { + const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); + const struct brw_gs_prog_data *gs_prog_data = get_gs_prog_data(pipeline); + const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); const struct brw_vue_map *fs_input_map; + if (pipeline->gs_kernel == NO_KERNEL) - fs_input_map = &pipeline->vs_prog_data.base.vue_map; + fs_input_map = &vs_prog_data->base.vue_map; else - fs_input_map = &pipeline->gs_prog_data.base.vue_map; + fs_input_map = &gs_prog_data->base.vue_map; struct GENX(3DSTATE_SBE) sbe = { GENX(3DSTATE_SBE_header), .AttributeSwizzleEnable = true, .PointSpriteTextureCoordinateOrigin = UPPERLEFT, - .NumberofSFOutputAttributes = pipeline->wm_prog_data.num_varying_inputs, + .NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs, #if GEN_GEN >= 9 .Attribute0ActiveComponentFormat = ACF_XYZW, @@ -283,7 +289,7 @@ emit_3dstate_sbe(struct anv_pipeline *pipeline) int max_source_attr = 0; for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) { - int input_index = pipeline->wm_prog_data.urb_setup[attr]; + int input_index = wm_prog_data->urb_setup[attr]; if (input_index < 0) continue;