From 2d3182b429552651f54650fcc9ea53d41fabe6de Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 13 Dec 2019 22:05:11 -0800 Subject: [PATCH] turnip: Add support for descriptor arrays. I had a bigger rework I was working on, but this is simple and gets tests passing. Fixes 36 failures in dEQP-VK.binding_model.shader_access.primary_cmd_buf.sampler_mutable.fragment.* (now all passing) Reviewed-by: Jonathan Marek Tested-by: Marge Bot Part-of: --- src/freedreno/vulkan/tu_cmd_buffer.c | 151 +++++++++++++++-------- src/freedreno/vulkan/tu_descriptor_set.c | 15 +++ src/freedreno/vulkan/tu_pipeline.c | 54 +++++--- src/freedreno/vulkan/tu_private.h | 5 +- src/freedreno/vulkan/tu_shader.c | 104 +++++++++------- 5 files changed, 217 insertions(+), 112 deletions(-) diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c index f267f6ffeb2..0c67ea983db 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/src/freedreno/vulkan/tu_cmd_buffer.c @@ -2490,7 +2490,8 @@ struct tu_draw_state_group static struct tu_sampler* sampler_ptr(struct tu_descriptor_state *descriptors_state, - const struct tu_descriptor_map *map, unsigned i) + const struct tu_descriptor_map *map, unsigned i, + unsigned array_index) { assert(descriptors_state->valid & (1 << map->set[i])); @@ -2504,7 +2505,10 @@ sampler_ptr(struct tu_descriptor_state *descriptors_state, case VK_DESCRIPTOR_TYPE_SAMPLER: return (struct tu_sampler*) &set->mapped_ptr[layout->offset / 4]; case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - return (struct tu_sampler*) &set->mapped_ptr[layout->offset / 4 + A6XX_TEX_CONST_DWORDS]; + return (struct tu_sampler*) &set->mapped_ptr[layout->offset / 4 + A6XX_TEX_CONST_DWORDS + + array_index * + (A6XX_TEX_CONST_DWORDS + + sizeof(struct tu_sampler) / 4)]; default: unreachable("unimplemented descriptor type"); break; @@ -2516,7 +2520,7 @@ write_tex_const(struct tu_cmd_buffer *cmd, uint32_t *dst, struct tu_descriptor_state *descriptors_state, const struct tu_descriptor_map *map, - unsigned i) + unsigned i, unsigned array_index) { assert(descriptors_state->valid & (1 << map->set[i])); @@ -2528,11 +2532,19 @@ write_tex_const(struct tu_cmd_buffer *cmd, switch (layout->type) { case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - memcpy(dst, &set->mapped_ptr[layout->offset / 4], A6XX_TEX_CONST_DWORDS*4); + memcpy(dst, &set->mapped_ptr[layout->offset / 4 + + array_index * A6XX_TEX_CONST_DWORDS], + A6XX_TEX_CONST_DWORDS * 4); + break; + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + memcpy(dst, &set->mapped_ptr[layout->offset / 4 + + array_index * + (A6XX_TEX_CONST_DWORDS + + sizeof(struct tu_sampler) / 4)], + A6XX_TEX_CONST_DWORDS * 4); break; default: unreachable("unimplemented descriptor type"); @@ -2541,7 +2553,8 @@ write_tex_const(struct tu_cmd_buffer *cmd, if (layout->type == VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT) { const struct tu_tiling_config *tiling = &cmd->state.tiling_config; - uint32_t a = cmd->state.subpass->input_attachments[map->value[i]].attachment; + uint32_t a = cmd->state.subpass->input_attachments[map->value[i] + + array_index].attachment; assert(cmd->state.pass->attachments[a].needs_gmem); dst[0] &= ~(A6XX_TEX_CONST_0_SWAP__MASK | A6XX_TEX_CONST_0_TILE_MODE__MASK); @@ -2561,7 +2574,7 @@ write_tex_const(struct tu_cmd_buffer *cmd, static uint64_t buffer_ptr(struct tu_descriptor_state *descriptors_state, const struct tu_descriptor_map *map, - unsigned i) + unsigned i, unsigned array_index) { assert(descriptors_state->valid & (1 << map->set[i])); @@ -2574,11 +2587,12 @@ buffer_ptr(struct tu_descriptor_state *descriptors_state, switch (layout->type) { case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - return descriptors_state->dynamic_buffers[layout->dynamic_offset_offset]; + return descriptors_state->dynamic_buffers[layout->dynamic_offset_offset + + array_index]; case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - return (uint64_t) set->mapped_ptr[layout->offset / 4 + 1] << 32 | - set->mapped_ptr[layout->offset / 4]; + return (uint64_t) set->mapped_ptr[layout->offset / 4 + array_index * 2 + 1] << 32 | + set->mapped_ptr[layout->offset / 4 + array_index * 2]; default: unreachable("unimplemented descriptor type"); break; @@ -2663,7 +2677,22 @@ tu6_emit_user_consts(struct tu_cs *cs, const struct tu_pipeline *pipeline, continue; } - uint64_t va = buffer_ptr(descriptors_state, &link->ubo_map, i - 1); + /* Look through the UBO map to find our UBO index, and get the VA for + * that UBO. + */ + uint64_t va = 0; + uint32_t ubo_idx = i - 1; + uint32_t ubo_map_base = 0; + for (int j = 0; j < link->ubo_map.num; j++) { + if (ubo_idx >= ubo_map_base && + ubo_idx < ubo_map_base + link->ubo_map.array_size[j]) { + va = buffer_ptr(descriptors_state, &link->ubo_map, j, + ubo_idx - ubo_map_base); + break; + } + ubo_map_base += link->ubo_map.array_size[j]; + } + assert(va); tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 3); tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(state->range[i].offset / 16) | @@ -2684,9 +2713,8 @@ tu6_emit_ubos(struct tu_cs *cs, const struct tu_pipeline *pipeline, const struct tu_program_descriptor_linkage *link = &pipeline->program.link[type]; - uint32_t num = MIN2(link->ubo_map.num, link->const_state.num_ubos); + uint32_t num = MIN2(link->ubo_map.num_desc, link->const_state.num_ubos); uint32_t anum = align(num, 2); - uint32_t i; if (!num) return; @@ -2700,10 +2728,15 @@ tu6_emit_ubos(struct tu_cs *cs, const struct tu_pipeline *pipeline, tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); - for (i = 0; i < num; i++) - tu_cs_emit_qw(cs, buffer_ptr(descriptors_state, &link->ubo_map, i)); + unsigned emitted = 0; + for (unsigned i = 0; emitted < num && i < link->ubo_map.num; i++) { + for (unsigned j = 0; emitted < num && j < link->ubo_map.array_size[i]; j++) { + tu_cs_emit_qw(cs, buffer_ptr(descriptors_state, &link->ubo_map, i, j)); + emitted++; + } + } - for (; i < anum; i++) { + for (; emitted < anum; emitted++) { tu_cs_emit(cs, 0xffffffff); tu_cs_emit(cs, 0xffffffff); } @@ -2738,33 +2771,45 @@ tu6_emit_textures(struct tu_cmd_buffer *cmd, &pipeline->program.link[type]; VkResult result; - if (link->texture_map.num == 0 && link->sampler_map.num == 0) { + if (link->texture_map.num_desc == 0 && link->sampler_map.num_desc == 0) { *entry = (struct tu_cs_entry) {}; return VK_SUCCESS; } /* allocate and fill texture state */ struct ts_cs_memory tex_const; - result = tu_cs_alloc(device, draw_state, link->texture_map.num, A6XX_TEX_CONST_DWORDS, &tex_const); + result = tu_cs_alloc(device, draw_state, link->texture_map.num_desc, + A6XX_TEX_CONST_DWORDS, &tex_const); if (result != VK_SUCCESS) return result; + int tex_index = 0; for (unsigned i = 0; i < link->texture_map.num; i++) { - write_tex_const(cmd, - &tex_const.map[A6XX_TEX_CONST_DWORDS*i], - descriptors_state, &link->texture_map, i); + for (int j = 0; j < link->texture_map.array_size[i]; j++) { + write_tex_const(cmd, + &tex_const.map[A6XX_TEX_CONST_DWORDS * tex_index++], + descriptors_state, &link->texture_map, i, j); + } } /* allocate and fill sampler state */ - struct ts_cs_memory tex_samp; - result = tu_cs_alloc(device, draw_state, link->sampler_map.num, A6XX_TEX_SAMP_DWORDS, &tex_samp); - if (result != VK_SUCCESS) - return result; + struct ts_cs_memory tex_samp = { 0 }; + if (link->sampler_map.num_desc) { + result = tu_cs_alloc(device, draw_state, link->sampler_map.num_desc, + A6XX_TEX_SAMP_DWORDS, &tex_samp); + if (result != VK_SUCCESS) + return result; - for (unsigned i = 0; i < link->sampler_map.num; i++) { - struct tu_sampler *sampler = sampler_ptr(descriptors_state, &link->sampler_map, i); - memcpy(&tex_samp.map[A6XX_TEX_SAMP_DWORDS*i], sampler->state, sizeof(sampler->state)); - *needs_border |= sampler->needs_border; + int sampler_index = 0; + for (unsigned i = 0; i < link->sampler_map.num; i++) { + for (int j = 0; j < link->sampler_map.array_size[i]; j++) { + struct tu_sampler *sampler = sampler_ptr(descriptors_state, + &link->sampler_map, i, j); + memcpy(&tex_samp.map[A6XX_TEX_SAMP_DWORDS * sampler_index++], + sampler->state, sizeof(sampler->state)); + *needs_border |= sampler->needs_border; + } + } } unsigned tex_samp_reg, tex_const_reg, tex_count_reg; @@ -2798,17 +2843,19 @@ tu6_emit_textures(struct tu_cmd_buffer *cmd, if (result != VK_SUCCESS) return result; - /* output sampler state: */ - tu_cs_emit_pkt7(&cs, tu6_stage2opcode(type), 3); - tu_cs_emit(&cs, CP_LOAD_STATE6_0_DST_OFF(0) | - CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) | - CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | - CP_LOAD_STATE6_0_STATE_BLOCK(sb) | - CP_LOAD_STATE6_0_NUM_UNIT(link->sampler_map.num)); - tu_cs_emit_qw(&cs, tex_samp.iova); /* SRC_ADDR_LO/HI */ + if (link->sampler_map.num_desc) { + /* output sampler state: */ + tu_cs_emit_pkt7(&cs, tu6_stage2opcode(type), 3); + tu_cs_emit(&cs, CP_LOAD_STATE6_0_DST_OFF(0) | + CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) | + CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | + CP_LOAD_STATE6_0_STATE_BLOCK(sb) | + CP_LOAD_STATE6_0_NUM_UNIT(link->sampler_map.num_desc)); + tu_cs_emit_qw(&cs, tex_samp.iova); /* SRC_ADDR_LO/HI */ - tu_cs_emit_pkt4(&cs, tex_samp_reg, 2); - tu_cs_emit_qw(&cs, tex_samp.iova); /* SRC_ADDR_LO/HI */ + tu_cs_emit_pkt4(&cs, tex_samp_reg, 2); + tu_cs_emit_qw(&cs, tex_samp.iova); /* SRC_ADDR_LO/HI */ + } /* emit texture state: */ tu_cs_emit_pkt7(&cs, tu6_stage2opcode(type), 3); @@ -2816,14 +2863,14 @@ tu6_emit_textures(struct tu_cmd_buffer *cmd, CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | CP_LOAD_STATE6_0_STATE_BLOCK(sb) | - CP_LOAD_STATE6_0_NUM_UNIT(link->texture_map.num)); + CP_LOAD_STATE6_0_NUM_UNIT(link->texture_map.num_desc)); tu_cs_emit_qw(&cs, tex_const.iova); /* SRC_ADDR_LO/HI */ tu_cs_emit_pkt4(&cs, tex_const_reg, 2); tu_cs_emit_qw(&cs, tex_const.iova); /* SRC_ADDR_LO/HI */ tu_cs_emit_pkt4(&cs, tex_count_reg, 1); - tu_cs_emit(&cs, link->texture_map.num); + tu_cs_emit(&cs, link->texture_map.num_desc); *entry = tu_cs_end_sub_stream(draw_state, &cs); return VK_SUCCESS; @@ -2860,7 +2907,8 @@ tu6_emit_ibo(struct tu_cmd_buffer *cmd, if (idx & IBO_SSBO) { idx &= ~IBO_SSBO; - uint64_t va = buffer_ptr(descriptors_state, &link->ssbo_map, idx); + uint64_t va = buffer_ptr(descriptors_state, &link->ssbo_map, idx, + 0 /* XXX */); /* We don't expose robustBufferAccess, so leave the size unlimited. */ uint32_t sz = MAX_STORAGE_BUFFER_RANGE / 4; @@ -2992,21 +3040,26 @@ tu6_emit_border_color(struct tu_cmd_buffer *cmd, struct ts_cs_memory ptr; VkResult result = tu_cs_alloc(cmd->device, &cmd->sub_cs, - vs_sampler->num + fs_sampler->num, 128 / 4, + vs_sampler->num_desc + fs_sampler->num_desc, + 128 / 4, &ptr); if (result != VK_SUCCESS) return result; for (unsigned i = 0; i < vs_sampler->num; i++) { - struct tu_sampler *sampler = sampler_ptr(descriptors_state, vs_sampler, i); - memcpy(ptr.map, &border_color[sampler->border], 128); - ptr.map += 128 / 4; + for (unsigned j = 0; j < vs_sampler->array_size[i]; j++) { + struct tu_sampler *sampler = sampler_ptr(descriptors_state, vs_sampler, i, j); + memcpy(ptr.map, &border_color[sampler->border], 128); + ptr.map += 128 / 4; + } } for (unsigned i = 0; i < fs_sampler->num; i++) { - struct tu_sampler *sampler = sampler_ptr(descriptors_state, fs_sampler, i); - memcpy(ptr.map, &border_color[sampler->border], 128); - ptr.map += 128 / 4; + for (unsigned j = 0; j < fs_sampler->array_size[i]; j++) { + struct tu_sampler *sampler = sampler_ptr(descriptors_state, fs_sampler, i, j); + memcpy(ptr.map, &border_color[sampler->border], 128); + ptr.map += 128 / 4; + } } tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_BORDER_COLOR_BASE_ADDR_LO, 2); diff --git a/src/freedreno/vulkan/tu_descriptor_set.c b/src/freedreno/vulkan/tu_descriptor_set.c index 70fa3cfe3d2..26f49001d09 100644 --- a/src/freedreno/vulkan/tu_descriptor_set.c +++ b/src/freedreno/vulkan/tu_descriptor_set.c @@ -21,6 +21,21 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ + +/** + * @file + * + * The texture and sampler descriptors are laid out in a single global space + * across all shader stages, for both simplicity of implementation and because + * that seems to be how things have to be structured for border color + * handling. + * + * Each shader stage will declare its texture/sampler count based on the last + * descriptor set it uses. At draw emit time (though it really should be + * CmdBind time), we upload the descriptor sets used by each shader stage to + * their stage. + */ + #include "tu_private.h" #include diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c index ff523c984a4..f4f050ab8d9 100644 --- a/src/freedreno/vulkan/tu_pipeline.c +++ b/src/freedreno/vulkan/tu_pipeline.c @@ -43,6 +43,7 @@ struct tu_pipeline_builder { struct tu_device *device; struct tu_pipeline_cache *cache; + struct tu_pipeline_layout *layout; const VkAllocationCallbacks *alloc; const VkGraphicsPipelineCreateInfo *create_info; @@ -358,7 +359,8 @@ tu6_blend_op(VkBlendOp op) } static void -tu6_emit_vs_config(struct tu_cs *cs, const struct ir3_shader_variant *vs) +tu6_emit_vs_config(struct tu_cs *cs, struct tu_shader *shader, + const struct ir3_shader_variant *vs) { uint32_t sp_vs_ctrl = A6XX_SP_VS_CTRL_REG0_THREADSIZE(FOUR_QUADS) | @@ -368,8 +370,8 @@ tu6_emit_vs_config(struct tu_cs *cs, const struct ir3_shader_variant *vs) if (vs->need_pixlod) sp_vs_ctrl |= A6XX_SP_VS_CTRL_REG0_PIXLODENABLE; - uint32_t sp_vs_config = A6XX_SP_VS_CONFIG_NTEX(vs->num_samp) | - A6XX_SP_VS_CONFIG_NSAMP(vs->num_samp); + uint32_t sp_vs_config = A6XX_SP_VS_CONFIG_NTEX(shader->texture_map.num_desc) | + A6XX_SP_VS_CONFIG_NSAMP(shader->sampler_map.num_desc); if (vs->instrlen) sp_vs_config |= A6XX_SP_VS_CONFIG_ENABLED; @@ -386,7 +388,8 @@ tu6_emit_vs_config(struct tu_cs *cs, const struct ir3_shader_variant *vs) } static void -tu6_emit_hs_config(struct tu_cs *cs, const struct ir3_shader_variant *hs) +tu6_emit_hs_config(struct tu_cs *cs, struct tu_shader *shader, + const struct ir3_shader_variant *hs) { uint32_t sp_hs_config = 0; if (hs->instrlen) @@ -404,7 +407,8 @@ tu6_emit_hs_config(struct tu_cs *cs, const struct ir3_shader_variant *hs) } static void -tu6_emit_ds_config(struct tu_cs *cs, const struct ir3_shader_variant *ds) +tu6_emit_ds_config(struct tu_cs *cs, struct tu_shader *shader, + const struct ir3_shader_variant *ds) { uint32_t sp_ds_config = 0; if (ds->instrlen) @@ -419,7 +423,8 @@ tu6_emit_ds_config(struct tu_cs *cs, const struct ir3_shader_variant *ds) } static void -tu6_emit_gs_config(struct tu_cs *cs, const struct ir3_shader_variant *gs) +tu6_emit_gs_config(struct tu_cs *cs, struct tu_shader *shader, + const struct ir3_shader_variant *gs) { uint32_t sp_gs_config = 0; if (gs->instrlen) @@ -437,7 +442,8 @@ tu6_emit_gs_config(struct tu_cs *cs, const struct ir3_shader_variant *gs) } static void -tu6_emit_fs_config(struct tu_cs *cs, const struct ir3_shader_variant *fs) +tu6_emit_fs_config(struct tu_cs *cs, struct tu_shader *shader, + const struct ir3_shader_variant *fs) { uint32_t sp_fs_ctrl = A6XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) | 0x1000000 | @@ -449,8 +455,8 @@ tu6_emit_fs_config(struct tu_cs *cs, const struct ir3_shader_variant *fs) if (fs->need_pixlod) sp_fs_ctrl |= A6XX_SP_FS_CTRL_REG0_PIXLODENABLE; - uint32_t sp_fs_config = A6XX_SP_FS_CONFIG_NTEX(fs->num_samp) | - A6XX_SP_FS_CONFIG_NSAMP(fs->num_samp) | + uint32_t sp_fs_config = A6XX_SP_FS_CONFIG_NTEX(shader->texture_map.num_desc) | + A6XX_SP_FS_CONFIG_NSAMP(shader->sampler_map.num_desc) | A6XX_SP_FS_CONFIG_NIBO(fs->image_mapping.num_ibo); if (fs->instrlen) sp_fs_config |= A6XX_SP_FS_CONFIG_ENABLED; @@ -477,7 +483,8 @@ tu6_emit_fs_config(struct tu_cs *cs, const struct ir3_shader_variant *fs) } static void -tu6_emit_cs_config(struct tu_cs *cs, const struct ir3_shader_variant *v) +tu6_emit_cs_config(struct tu_cs *cs, const struct tu_shader *shader, + const struct ir3_shader_variant *v) { tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_UPDATE_CNTL, 1); tu_cs_emit(cs, 0xff); @@ -490,8 +497,8 @@ tu6_emit_cs_config(struct tu_cs *cs, const struct ir3_shader_variant *v) tu_cs_emit_pkt4(cs, REG_A6XX_SP_CS_CONFIG, 2); tu_cs_emit(cs, A6XX_SP_CS_CONFIG_ENABLED | A6XX_SP_CS_CONFIG_NIBO(v->image_mapping.num_ibo) | - A6XX_SP_CS_CONFIG_NTEX(v->num_samp) | - A6XX_SP_CS_CONFIG_NSAMP(v->num_samp)); + A6XX_SP_CS_CONFIG_NTEX(shader->texture_map.num_desc) | + A6XX_SP_CS_CONFIG_NSAMP(shader->sampler_map.num_desc)); tu_cs_emit(cs, v->instrlen); tu_cs_emit_pkt4(cs, REG_A6XX_SP_CS_CTRL_REG0, 1); @@ -1036,11 +1043,11 @@ tu6_emit_program(struct tu_cs *cs, fs = &dummy_variant; } - tu6_emit_vs_config(cs, vs); - tu6_emit_hs_config(cs, hs); - tu6_emit_ds_config(cs, ds); - tu6_emit_gs_config(cs, gs); - tu6_emit_fs_config(cs, fs); + tu6_emit_vs_config(cs, builder->shaders[MESA_SHADER_VERTEX], vs); + tu6_emit_hs_config(cs, builder->shaders[MESA_SHADER_TESS_CTRL], hs); + tu6_emit_ds_config(cs, builder->shaders[MESA_SHADER_TESS_EVAL], ds); + tu6_emit_gs_config(cs, builder->shaders[MESA_SHADER_GEOMETRY], gs); + tu6_emit_fs_config(cs, builder->shaders[MESA_SHADER_FRAGMENT], fs); tu6_emit_vs_system_values(cs, vs); tu6_emit_vpc(cs, vs, fs, binning_pass); @@ -1535,7 +1542,8 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder) continue; struct tu_shader *shader = - tu_shader_create(builder->device, stage, stage_info, builder->alloc); + tu_shader_create(builder->device, stage, stage_info, builder->layout, + builder->alloc); if (!shader) return VK_ERROR_OUT_OF_HOST_MEMORY; @@ -1910,11 +1918,14 @@ tu_pipeline_builder_init_graphics( const VkGraphicsPipelineCreateInfo *create_info, const VkAllocationCallbacks *alloc) { + TU_FROM_HANDLE(tu_pipeline_layout, layout, create_info->layout); + *builder = (struct tu_pipeline_builder) { .device = dev, .cache = cache, .create_info = create_info, .alloc = alloc, + .layout = layout, }; builder->rasterizer_discard = @@ -2003,7 +2014,7 @@ tu6_emit_compute_program(struct tu_cs *cs, { const struct ir3_shader_variant *v = &shader->variants[0]; - tu6_emit_cs_config(cs, v); + tu6_emit_cs_config(cs, shader, v); /* The compute program is the only one in the pipeline, so 0 offset. */ tu6_emit_shader_object(cs, MESA_SHADER_COMPUTE, v, binary_bo, 0); @@ -2044,6 +2055,7 @@ tu_compute_pipeline_create(VkDevice device, VkPipeline *pPipeline) { TU_FROM_HANDLE(tu_device, dev, device); + TU_FROM_HANDLE(tu_pipeline_layout, layout, pCreateInfo->layout); const VkPipelineShaderStageCreateInfo *stage_info = &pCreateInfo->stage; VkResult result; @@ -2053,11 +2065,13 @@ tu_compute_pipeline_create(VkDevice device, if (result != VK_SUCCESS) return result; + pipeline->layout = layout; + struct tu_shader_compile_options options; tu_shader_compile_options_init(&options, NULL); struct tu_shader *shader = - tu_shader_create(dev, MESA_SHADER_COMPUTE, stage_info, pAllocator); + tu_shader_create(dev, MESA_SHADER_COMPUTE, stage_info, layout, pAllocator); if (!shader) { result = VK_ERROR_OUT_OF_HOST_MEMORY; goto fail; diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h index 7541e729fee..4f78f7163e5 100644 --- a/src/freedreno/vulkan/tu_private.h +++ b/src/freedreno/vulkan/tu_private.h @@ -1050,10 +1050,12 @@ struct tu_shader_compile_options struct tu_descriptor_map { /* TODO: avoid fixed size array/justify the size */ - unsigned num; + unsigned num; /* number of array entries */ + unsigned num_desc; /* Number of descriptors (sum of array_size[]) */ int set[64]; int binding[64]; int value[64]; + int array_size[64]; }; struct tu_shader @@ -1080,6 +1082,7 @@ struct tu_shader * tu_shader_create(struct tu_device *dev, gl_shader_stage stage, const VkPipelineShaderStageCreateInfo *stage_info, + struct tu_pipeline_layout *layout, const VkAllocationCallbacks *alloc); void diff --git a/src/freedreno/vulkan/tu_shader.c b/src/freedreno/vulkan/tu_shader.c index 9a904e61d07..1fc9da9a779 100644 --- a/src/freedreno/vulkan/tu_shader.c +++ b/src/freedreno/vulkan/tu_shader.c @@ -109,26 +109,35 @@ tu_sort_variables_by_location(struct exec_list *variables) } static unsigned -map_add(struct tu_descriptor_map *map, int set, int binding, int value) +map_add(struct tu_descriptor_map *map, int set, int binding, int value, + int array_size) { - unsigned index; - for (index = 0; index < map->num; index++) { - if (set == map->set[index] && binding == map->binding[index]) - break; + unsigned index = 0; + for (unsigned i = 0; i < map->num; i++) { + if (set == map->set[i] && binding == map->binding[i]) { + assert(value == map->value[i]); + assert(array_size == map->array_size[i]); + return index; + } + index += map->array_size[i]; } - assert(index < ARRAY_SIZE(map->set)); + assert(index == map->num_desc); + + map->set[map->num] = set; + map->binding[map->num] = binding; + map->value[map->num] = value; + map->array_size[map->num] = array_size; + map->num++; + map->num_desc += array_size; - map->set[index] = set; - map->binding[index] = binding; - map->value[index] = value; - map->num = MAX2(map->num, index + 1); return index; } static void lower_tex_src_to_offset(nir_builder *b, nir_tex_instr *instr, unsigned src_idx, - struct tu_shader *shader) + struct tu_shader *shader, + const struct tu_pipeline_layout *layout) { nir_ssa_def *index = NULL; unsigned base_index = 0; @@ -184,39 +193,39 @@ lower_tex_src_to_offset(nir_builder *b, nir_tex_instr *instr, unsigned src_idx, nir_tex_instr_remove_src(instr, src_idx); } - if (array_elements > 1) - tu_finishme("texture/sampler array"); - - if (is_sampler) { - instr->sampler_index = map_add(&shader->sampler_map, - deref->var->data.descriptor_set, - deref->var->data.binding, - 0); - instr->sampler_index += base_index; - } else { - instr->texture_index = map_add(&shader->texture_map, - deref->var->data.descriptor_set, - deref->var->data.binding, - deref->var->data.index); - instr->texture_index += base_index; - instr->texture_array_size = array_elements; - } + uint32_t set = deref->var->data.descriptor_set; + uint32_t binding = deref->var->data.binding; + struct tu_descriptor_set_layout *set_layout = layout->set[set].layout; + struct tu_descriptor_set_binding_layout *binding_layout = + &set_layout->binding[binding]; + + int desc_index = map_add(is_sampler ? + &shader->sampler_map : &shader->texture_map, + deref->var->data.descriptor_set, + deref->var->data.binding, + deref->var->data.index, + binding_layout->array_size) + base_index; + if (is_sampler) + instr->sampler_index = desc_index; + else + instr->texture_index = desc_index; } static bool -lower_sampler(nir_builder *b, nir_tex_instr *instr, struct tu_shader *shader) +lower_sampler(nir_builder *b, nir_tex_instr *instr, struct tu_shader *shader, + const struct tu_pipeline_layout *layout) { int texture_idx = nir_tex_instr_src_index(instr, nir_tex_src_texture_deref); if (texture_idx >= 0) - lower_tex_src_to_offset(b, instr, texture_idx, shader); + lower_tex_src_to_offset(b, instr, texture_idx, shader, layout); int sampler_idx = nir_tex_instr_src_index(instr, nir_tex_src_sampler_deref); if (sampler_idx >= 0) - lower_tex_src_to_offset(b, instr, sampler_idx, shader); + lower_tex_src_to_offset(b, instr, sampler_idx, shader, layout); if (texture_idx < 0 && sampler_idx < 0) return false; @@ -226,7 +235,8 @@ lower_sampler(nir_builder *b, nir_tex_instr *instr, struct tu_shader *shader) static bool lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr, - struct tu_shader *shader) + struct tu_shader *shader, + const struct tu_pipeline_layout *layout) { /* TODO: remove this when layered rendering is implemented */ if (instr->intrinsic == nir_intrinsic_load_layer_id) { @@ -260,23 +270,30 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr, return false; nir_const_value *const_val = nir_src_as_const_value(instr->src[0]); - if (!const_val || const_val->u32 != 0) - tu_finishme("non-zero vulkan_resource_index array index"); unsigned set = nir_intrinsic_desc_set(instr); unsigned binding = nir_intrinsic_binding(instr); + struct tu_descriptor_set_layout *set_layout = layout->set[set].layout; + struct tu_descriptor_set_binding_layout *binding_layout = + &set_layout->binding[binding]; unsigned index = 0; switch (nir_intrinsic_desc_type(instr)) { case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + if (!const_val || const_val->u32 != 0) + tu_finishme("non-zero vulkan_resource_index array index"); /* skip index 0 which is used for push constants */ - index = map_add(&shader->ubo_map, set, binding, 0) + 1; + index = map_add(&shader->ubo_map, set, binding, 0, + binding_layout->array_size) + 1; break; case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - index = map_add(&shader->ssbo_map, set, binding, 0); + if (!const_val) + tu_finishme("non-constant vulkan_resource_index array index"); + index = map_add(&shader->ssbo_map, set, binding, 0, + binding_layout->array_size); break; default: tu_finishme("unsupported desc_type for vulkan_resource_index"); @@ -291,7 +308,8 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr, } static bool -lower_impl(nir_function_impl *impl, struct tu_shader *shader) +lower_impl(nir_function_impl *impl, struct tu_shader *shader, + const struct tu_pipeline_layout *layout) { nir_builder b; nir_builder_init(&b, impl); @@ -302,10 +320,10 @@ lower_impl(nir_function_impl *impl, struct tu_shader *shader) b.cursor = nir_before_instr(instr); switch (instr->type) { case nir_instr_type_tex: - progress |= lower_sampler(&b, nir_instr_as_tex(instr), shader); + progress |= lower_sampler(&b, nir_instr_as_tex(instr), shader, layout); break; case nir_instr_type_intrinsic: - progress |= lower_intrinsic(&b, nir_instr_as_intrinsic(instr), shader); + progress |= lower_intrinsic(&b, nir_instr_as_intrinsic(instr), shader, layout); break; default: break; @@ -317,13 +335,14 @@ lower_impl(nir_function_impl *impl, struct tu_shader *shader) } static bool -tu_lower_io(nir_shader *shader, struct tu_shader *tu_shader) +tu_lower_io(nir_shader *shader, struct tu_shader *tu_shader, + const struct tu_pipeline_layout *layout) { bool progress = false; nir_foreach_function(function, shader) { if (function->impl) - progress |= lower_impl(function->impl, tu_shader); + progress |= lower_impl(function->impl, tu_shader, layout); } return progress; @@ -333,6 +352,7 @@ struct tu_shader * tu_shader_create(struct tu_device *dev, gl_shader_stage stage, const VkPipelineShaderStageCreateInfo *stage_info, + struct tu_pipeline_layout *layout, const VkAllocationCallbacks *alloc) { const struct tu_shader_module *module = @@ -426,7 +446,7 @@ tu_shader_create(struct tu_device *dev, if (stage == MESA_SHADER_FRAGMENT) NIR_PASS_V(nir, nir_lower_input_attachments, true); - NIR_PASS_V(nir, tu_lower_io, shader); + NIR_PASS_V(nir, tu_lower_io, shader, layout); NIR_PASS_V(nir, nir_lower_io, nir_var_all, ir3_glsl_type_size, 0); -- 2.30.2