From: Rob Clark Date: Mon, 15 Jun 2020 19:14:04 +0000 (-0700) Subject: freedreno/ir3: move const_state back to variant X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=640ff0e847f8b4e8799499b0c6c31b194ab5e468;p=mesa.git freedreno/ir3: move const_state back to variant For shader-cache, we want to not have anything important in `ir3_shader`. And to have shader variants with lower const size limits (to properly handle cross-stage limits), we also want variants to be able to have their own const_state. But we still need binning pass shaders to align with their draw pass counterpart so that the same const emit can be used for both passes. Signed-off-by: Rob Clark Part-of: --- diff --git a/src/freedreno/ir3/ir3_assembler.c b/src/freedreno/ir3/ir3_assembler.c index e81eda6e9ae..6dee9475da5 100644 --- a/src/freedreno/ir3/ir3_assembler.c +++ b/src/freedreno/ir3/ir3_assembler.c @@ -37,12 +37,12 @@ ir3_parse_asm(struct ir3_compiler *c, struct ir3_kernel_info *info, FILE *in) struct ir3_shader *shader = rzalloc_size(NULL, sizeof(*shader)); shader->compiler = c; shader->type = MESA_SHADER_COMPUTE; - shader->const_state = rzalloc_size(shader, sizeof(*shader->const_state)); mtx_init(&shader->variants_lock, mtx_plain); struct ir3_shader_variant *v = rzalloc_size(shader, sizeof(*v)); v->type = MESA_SHADER_COMPUTE; v->shader = shader; + v->const_state = rzalloc_size(v, sizeof(*v->const_state)); shader->variants = v; shader->variant_count = 1; diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index de76cb70e55..a73210e5cf2 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -249,18 +249,12 @@ ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s) ir3_optimize_loop(s); - /* do ubo load and idiv lowering after first opt loop to get a chance to - * propagate constants for divide by immed power-of-two and constant ubo - * block/offsets: - * - * NOTE that UBO analysis pass should only be done once, before variants + /* do idiv lowering after first opt loop to get a chance to propagate + * constants for divide by immed power-of-two: */ - const bool ubo_progress = OPT(s, ir3_nir_analyze_ubo_ranges, shader); const bool idiv_progress = OPT(s, nir_lower_idiv, nir_lower_idiv_fast); - /* UBO offset lowering has to come after we've decided what will be left as load_ubo */ - OPT_V(s, ir3_nir_lower_io_offsets, shader->compiler->gpu_id); - if (ubo_progress || idiv_progress) + if (idiv_progress) ir3_optimize_loop(s); OPT_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL); @@ -272,12 +266,6 @@ ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s) } nir_sweep(s); - - /* The first time thru, when not creating variant, do the one-time - * const_state layout setup. This should be done after ubo range - * analysis. - */ - ir3_setup_const_state(shader, s, shader->const_state); } void @@ -356,6 +344,13 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s) progress |= OPT(s, nir_lower_tex, &tex_options); } + progress |= OPT(s, ir3_nir_analyze_ubo_ranges, so); + + /* UBO offset lowering has to come after we've decided what will + * be left as load_ubo + */ + OPT_V(s, ir3_nir_lower_io_offsets, so->shader->compiler->gpu_id); + if (progress) ir3_optimize_loop(s); @@ -382,6 +377,13 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s) } nir_sweep(s); + + /* Binning pass variants re-use the const_state of the corresponding + * draw pass shader, so that same const emit can be re-used for both + * passes: + */ + if (!so->binning_pass) + ir3_setup_const_state(s, so, ir3_const_state(so)); } static void @@ -460,23 +462,23 @@ ir3_nir_scan_driver_consts(nir_shader *shader, } } -/* Sets up the non-variant-dependent constant state for the ir3_shader. Note +/* Sets up the variant-dependent constant state for the ir3_shader. Note * that it is also used from ir3_nir_analyze_ubo_ranges() to figure out the * maximum number of driver params that would eventually be used, to leave * space for this function to allocate the driver params. */ void -ir3_setup_const_state(struct ir3_shader *shader, nir_shader *nir, - struct ir3_const_state *const_state) +ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v, + struct ir3_const_state *const_state) { - struct ir3_compiler *compiler = shader->compiler; + struct ir3_compiler *compiler = v->shader->compiler; memset(&const_state->offsets, ~0, sizeof(const_state->offsets)); ir3_nir_scan_driver_consts(nir, const_state); if ((compiler->gpu_id < 500) && - (shader->stream_output.num_outputs > 0)) { + (v->shader->stream_output.num_outputs > 0)) { const_state->num_driver_params = MAX2(const_state->num_driver_params, IR3_DP_VTXCNT_MAX + 1); } @@ -511,14 +513,14 @@ ir3_setup_const_state(struct ir3_shader *shader, nir_shader *nir, const_state->offsets.driver_param = constoff; constoff += const_state->num_driver_params / 4; - if ((shader->type == MESA_SHADER_VERTEX) && + if ((v->type == MESA_SHADER_VERTEX) && (compiler->gpu_id < 500) && - shader->stream_output.num_outputs > 0) { + v->shader->stream_output.num_outputs > 0) { const_state->offsets.tfbo = constoff; constoff += align(IR3_MAX_SO_BUFFERS * ptrsz, 4) / 4; } - switch (shader->type) { + switch (v->type) { case MESA_SHADER_VERTEX: const_state->offsets.primitive_param = constoff; constoff += 1; diff --git a/src/freedreno/ir3/ir3_nir.h b/src/freedreno/ir3/ir3_nir.h index 67c56f931d7..0ad9b35f5fe 100644 --- a/src/freedreno/ir3/ir3_nir.h +++ b/src/freedreno/ir3/ir3_nir.h @@ -55,10 +55,9 @@ const nir_shader_compiler_options * ir3_get_compiler_options(struct ir3_compiler void ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s); void ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s); -void ir3_setup_const_state(struct ir3_shader *shader, nir_shader *nir, +void ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v, struct ir3_const_state *const_state); - -bool ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader *shader); +bool ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader_variant *v); nir_ssa_def * ir3_nir_try_propagate_bit_shift(nir_builder *b, nir_ssa_def *offset, int32_t shift); diff --git a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c index 0ae11a3512a..d14e5499445 100644 --- a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c +++ b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c @@ -302,10 +302,11 @@ instr_is_load_ubo(nir_instr *instr) } bool -ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader *shader) +ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader_variant *v) { - struct ir3_const_state *const_state = shader->const_state; + struct ir3_const_state *const_state = ir3_const_state(v); struct ir3_ubo_analysis_state *state = &const_state->ubo_state; + struct ir3_compiler *compiler = v->shader->compiler; memset(state, 0, sizeof(*state)); for (int i = 0; i < IR3_MAX_UBO_PUSH_RANGES; i++) { @@ -318,7 +319,7 @@ ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader *shader) nir_foreach_instr (instr, block) { if (instr_is_load_ubo(instr)) gather_ubo_ranges(nir, nir_instr_as_intrinsic(instr), - state, shader->compiler->const_upload_unit); + state, compiler->const_upload_unit); } } } @@ -340,11 +341,11 @@ ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader *shader) * be driver params but this pass usually eliminatings them. */ struct ir3_const_state worst_case_const_state = { }; - ir3_setup_const_state(shader, nir, &worst_case_const_state); - const uint32_t max_upload = (shader->compiler->max_const - + ir3_setup_const_state(nir, v, &worst_case_const_state); + const uint32_t max_upload = (compiler->max_const - worst_case_const_state.offsets.immediate) * 16; - uint32_t offset = shader->num_reserved_user_consts * 16; + uint32_t offset = v->shader->num_reserved_user_consts * 16; state->num_enabled = ARRAY_SIZE(state->range); for (uint32_t i = 0; i < ARRAY_SIZE(state->range); i++) { if (state->range[i].start >= state->range[i].end) { @@ -375,7 +376,7 @@ ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader *shader) if (instr_is_load_ubo(instr)) lower_ubo_load_to_uniform(nir_instr_as_intrinsic(instr), &builder, state, &num_ubos, - shader->compiler->const_upload_unit); + compiler->const_upload_unit); } } diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c index 66f72c98dc1..bb5dfa6343f 100644 --- a/src/freedreno/ir3/ir3_shader.c +++ b/src/freedreno/ir3/ir3_shader.c @@ -205,6 +205,9 @@ create_variant(struct ir3_shader *shader, const struct ir3_shader_key *key, v->mergedregs = false; } + if (!v->binning_pass) + v->const_state = rzalloc_size(v, sizeof(*v->const_state)); + ret = ir3_compile_shader_nir(shader->compiler, v); if (ret) { debug_error("compile failed!"); @@ -345,7 +348,6 @@ ir3_shader_from_nir(struct ir3_compiler *compiler, nir_shader *nir, if (stream_output) memcpy(&shader->stream_output, stream_output, sizeof(shader->stream_output)); shader->num_reserved_user_consts = reserved_user_consts; - shader->const_state = rzalloc_size(shader, sizeof(*shader->const_state)); if (nir->info.stage == MESA_SHADER_GEOMETRY) NIR_PASS_V(nir, ir3_nir_lower_gs); diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index ff131cc1838..41ca394837b 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -468,6 +468,8 @@ struct ir3_shader_variant { */ unsigned constlen; + struct ir3_const_state *const_state; + /* About Linkage: * + Let the frag shader determine the position/compmask for the * varyings, since it is the place where we know if the varying @@ -621,7 +623,6 @@ struct ir3_shader { struct ir3_compiler *compiler; - struct ir3_const_state *const_state; unsigned num_reserved_user_consts; struct nir_shader *nir; @@ -641,10 +642,17 @@ struct ir3_shader { struct ir3_shader_key key_mask; }; +/** + * In order to use the same cmdstream, in particular constlen setup and const + * emit, for both binning and draw pass (a6xx+), the binning pass re-uses it's + * corresponding draw pass shaders const_state. + */ static inline struct ir3_const_state * ir3_const_state(const struct ir3_shader_variant *v) { - return v->shader->const_state; + if (v->binning_pass) + return v->nonbinning->const_state; + return v->const_state; } void * ir3_shader_assemble(struct ir3_shader_variant *v); diff --git a/src/freedreno/vulkan/tu_clear_blit.c b/src/freedreno/vulkan/tu_clear_blit.c index e6e7daf1bfe..c452d7e8f05 100644 --- a/src/freedreno/vulkan/tu_clear_blit.c +++ b/src/freedreno/vulkan/tu_clear_blit.c @@ -322,9 +322,7 @@ r3d_common(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool blit, uint32_t num_ bool layered_clear) { struct ir3_const_state dummy_const_state = {}; - struct ir3_shader dummy_shader = { - .const_state = &dummy_const_state, - }; + struct ir3_shader dummy_shader = {}; struct ir3_shader_variant vs = { .type = MESA_SHADER_VERTEX, @@ -347,6 +345,7 @@ r3d_common(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool blit, uint32_t num_ .regid = regid(1, 0), }, .shader = &dummy_shader, + .const_state = &dummy_const_state, }; if (layered_clear) { vs = (struct ir3_shader_variant) { @@ -354,6 +353,7 @@ r3d_common(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool blit, uint32_t num_ .instrlen = 1, .info.max_reg = 0, .shader = &dummy_shader, + .const_state = &dummy_const_state, }; } @@ -383,6 +383,7 @@ r3d_common(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool blit, uint32_t num_ .cmd = 4, }, .shader = &dummy_shader, + .const_state = &dummy_const_state, }; struct ir3_shader_variant gs_shader = { @@ -410,6 +411,7 @@ r3d_common(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool blit, uint32_t num_ .regid = regid(1, 0), }, .shader = &dummy_shader, + .const_state = &dummy_const_state, }, *gs = layered_clear ? &gs_shader : NULL;