From: Connor Abbott Date: Wed, 18 Mar 2020 12:12:31 +0000 (+0100) Subject: tu: ir3: Emit push constants directly X-Git-Url: https://git.libre-soc.org/?p=mesa.git;a=commitdiff_plain;h=d3b7681df28e89311c4149708311de546e0a58cc tu: ir3: Emit push constants directly Carve out some space at the beginning for push constants, and push them directly, rather than remapping them to a UBO and then relying on the UBO pushing code. Remapping to a UBO is easy now, where there's a single table of UBO's, but with the bindless model it'll be a lot harder. I haven't removed all the code to move the remaining UBO's over by 1, though, because it's going to all get rewritten with bindless anyways. Part-of: --- diff --git a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c index a171cbe7a3c..64599669f02 100644 --- a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c +++ b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c @@ -235,7 +235,7 @@ ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader *shader) * first. */ const uint32_t max_upload = 16 * 1024; - uint32_t offset = 0; + uint32_t offset = shader->const_state.num_reserved_user_consts * 16; for (uint32_t i = 0; i < ARRAY_SIZE(state->range); i++) { uint32_t range_size = state->range[i].end - state->range[i].start; diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index 9089789b34c..280dc1c784b 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -75,7 +75,7 @@ enum ir3_driver_param { /** * Describes the layout of shader consts. This includes: - * + Driver lowered UBO ranges + * + User consts + driver lowered UBO ranges * + SSBO sizes * + Image sizes/dimensions * + Driver params (ie. IR3_DP_*) @@ -114,6 +114,7 @@ enum ir3_driver_param { */ struct ir3_const_state { unsigned num_ubos; + unsigned num_reserved_user_consts; unsigned num_driver_params; /* scalar */ struct { diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c index 0583be32ecd..0bf8f56f8bf 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/src/freedreno/vulkan/tu_cmd_buffer.c @@ -2676,6 +2676,21 @@ tu6_emit_user_consts(struct tu_cs *cs, const struct tu_pipeline *pipeline, &pipeline->program.link[type]; const struct ir3_ubo_analysis_state *state = &link->ubo_state; + if (link->push_consts.count > 0) { + unsigned num_units = link->push_consts.count; + unsigned offset = link->push_consts.lo; + tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 3 + num_units * 4); + tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(offset) | + CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | + CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | + CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(type)) | + CP_LOAD_STATE6_0_NUM_UNIT(num_units)); + tu_cs_emit(cs, 0); + tu_cs_emit(cs, 0); + for (unsigned i = 0; i < num_units * 4; i++) + tu_cs_emit(cs, push_constants[i + offset * 4]); + } + for (uint32_t i = 0; i < ARRAY_SIZE(state->range); i++) { if (state->range[i].start < state->range[i].end) { uint32_t size = state->range[i].end - state->range[i].start; @@ -2694,21 +2709,6 @@ tu6_emit_user_consts(struct tu_cs *cs, const struct tu_pipeline *pipeline, debug_assert((size % 16) == 0); debug_assert((offset % 16) == 0); - if (i == 0) { - /* push constants */ - tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 3 + (size / 4)); - tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(state->range[i].offset / 16) | - CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | - CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | - CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(type)) | - CP_LOAD_STATE6_0_NUM_UNIT(size / 16)); - tu_cs_emit(cs, 0); - tu_cs_emit(cs, 0); - for (unsigned i = 0; i < size / 4; i++) - tu_cs_emit(cs, push_constants[i + offset / 4]); - continue; - } - /* Look through the UBO map to find our UBO index, and get the VA for * that UBO. */ diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c index adf848d5697..5d36dfcaf3f 100644 --- a/src/freedreno/vulkan/tu_pipeline.c +++ b/src/freedreno/vulkan/tu_pipeline.c @@ -1948,6 +1948,7 @@ tu_pipeline_set_linkage(struct tu_program_descriptor_linkage *link, link->ubo_state = v->shader->ubo_state; link->const_state = v->shader->const_state; link->constlen = v->constlen; + link->push_consts = shader->push_consts; link->texture_map = shader->texture_map; link->sampler_map = shader->sampler_map; link->ubo_map = shader->ubo_map; diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h index aecc294b79f..3c50b2ec019 100644 --- a/src/freedreno/vulkan/tu_private.h +++ b/src/freedreno/vulkan/tu_private.h @@ -1129,10 +1129,17 @@ struct tu_descriptor_map int array_size[128]; }; +struct tu_push_constant_range +{ + uint32_t lo; + uint32_t count; +}; + struct tu_shader { struct ir3_shader ir3_shader; + struct tu_push_constant_range push_consts; struct tu_descriptor_map texture_map; struct tu_descriptor_map sampler_map; struct tu_descriptor_map ubo_map; @@ -1181,6 +1188,7 @@ struct tu_program_descriptor_linkage uint32_t constlen; + struct tu_push_constant_range push_consts; struct tu_descriptor_map texture_map; struct tu_descriptor_map sampler_map; struct tu_descriptor_map ubo_map; diff --git a/src/freedreno/vulkan/tu_shader.c b/src/freedreno/vulkan/tu_shader.c index a8a5303d6ca..deb6d895feb 100644 --- a/src/freedreno/vulkan/tu_shader.c +++ b/src/freedreno/vulkan/tu_shader.c @@ -210,17 +210,16 @@ static void lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *instr, struct tu_shader *shader) { - /* note: ir3 wants load_ubo, not load_uniform */ - assert(nir_intrinsic_base(instr) == 0); - nir_intrinsic_instr *load = - nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_ubo); - - nir_intrinsic_set_align(load, 4, 0); - + nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_uniform); load->num_components = instr->num_components; - load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); - load->src[1] = instr->src[0]; + uint32_t base = nir_intrinsic_base(instr); + assert(base % 4 == 0); + assert(base >= shader->push_consts.lo * 16); + base -= shader->push_consts.lo * 16; + nir_intrinsic_set_base(load, base / 4); + load->src[0] = + nir_src_for_ssa(nir_ushr(b, instr->src[0].ssa, nir_imm_int(b, 2))); nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, instr->dest.ssa.bit_size, instr->dest.ssa.name); @@ -343,6 +342,55 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr, } } +/* Figure out the range of push constants that we're actually going to push to + * the shader, and tell the backend to reserve this range when pushing UBO + * constants. + */ + +static void +gather_push_constants(nir_shader *shader, struct tu_shader *tu_shader) +{ + uint32_t min = UINT32_MAX, max = 0; + nir_foreach_function(function, shader) { + if (!function->impl) + continue; + + nir_foreach_block(block, function->impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (intrin->intrinsic != nir_intrinsic_load_push_constant) + continue; + + uint32_t base = nir_intrinsic_base(intrin); + uint32_t range = nir_intrinsic_range(intrin); + min = MIN2(min, base); + max = MAX2(max, base + range); + break; + } + } + } + + if (min >= max) { + tu_shader->push_consts.lo = 0; + tu_shader->push_consts.count = 0; + tu_shader->ir3_shader.const_state.num_reserved_user_consts = 0; + return; + } + + /* CP_LOAD_STATE OFFSET and NUM_UNIT are in units of vec4 (4 dwords), + * however there's an alignment requirement of 4 on OFFSET. Expand the + * range and change units accordingly. + */ + tu_shader->push_consts.lo = (min / 16) / 4 * 4; + tu_shader->push_consts.count = + align(max, 16) / 16 - tu_shader->push_consts.lo; + tu_shader->ir3_shader.const_state.num_reserved_user_consts = + align(tu_shader->push_consts.count, 4); +} + static bool lower_impl(nir_function_impl *impl, struct tu_shader *shader, const struct tu_pipeline_layout *layout) @@ -376,6 +424,8 @@ tu_lower_io(nir_shader *shader, struct tu_shader *tu_shader, { bool progress = false; + gather_push_constants(shader, tu_shader); + nir_foreach_function(function, shader) { if (function->impl) progress |= lower_impl(function->impl, tu_shader, layout);