From f07e25bc6d84fd5b71dd7f84e0d36166ff6aaf79 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 17 Jul 2020 14:08:43 -0700 Subject: [PATCH] freedreno/ir3: Clean up instrlen setup. We were calculating it with the gpu_id check in two places, do it once and use ir3_compiler for the gpu_id dependency. Part-of: --- src/freedreno/ir3/ir3.c | 17 +++++++---------- src/freedreno/ir3/ir3_compiler.c | 2 ++ src/freedreno/ir3/ir3_compiler.h | 5 +++++ src/freedreno/ir3/ir3_shader.c | 10 ++-------- 4 files changed, 16 insertions(+), 18 deletions(-) diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c index b170093ce43..124b41f9a3a 100644 --- a/src/freedreno/ir3/ir3.c +++ b/src/freedreno/ir3/ir3.c @@ -921,6 +921,7 @@ void * ir3_assemble(struct ir3_shader_variant *v) uint32_t *ptr, *dwords; struct ir3_info *info = &v->info; struct ir3 *shader = v->ir; + const struct ir3_compiler *compiler = v->shader->compiler; memset(info, 0, sizeof(*info)); info->data = v; @@ -928,21 +929,17 @@ void * ir3_assemble(struct ir3_shader_variant *v) info->max_half_reg = -1; info->max_const = -1; + uint32_t instr_count = 0; foreach_block (block, &shader->block_list) { foreach_instr (instr, &block->instr_list) { - info->sizedwords += 2; + instr_count++; } } - /* need an integer number of instruction "groups" (sets of 16 - * instructions on a4xx or sets of 4 instructions on a3xx), - * so pad out w/ NOPs if needed: (NOTE each instruction is 64bits) - */ - if (v->shader->compiler->gpu_id >= 400) { - info->sizedwords = align(info->sizedwords, 16 * 2); - } else { - info->sizedwords = align(info->sizedwords, 4 * 2); - } + v->instrlen = DIV_ROUND_UP(instr_count, compiler->instr_align); + + /* Pad out with NOPs to instrlen. */ + info->sizedwords = v->instrlen * compiler->instr_align * sizeof(instr_t) / 4; ptr = dwords = rzalloc_size(v, 4 * info->sizedwords); diff --git a/src/freedreno/ir3/ir3_compiler.c b/src/freedreno/ir3/ir3_compiler.c index 342282ca7db..4bc246ba164 100644 --- a/src/freedreno/ir3/ir3_compiler.c +++ b/src/freedreno/ir3/ir3_compiler.c @@ -115,6 +115,7 @@ ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id) compiler->unminify_coords = false; compiler->txf_ms_with_isaml = false; compiler->array_index_add_half = true; + compiler->instr_align = 16; compiler->const_upload_unit = 4; } else { /* no special handling for "flat" */ @@ -123,6 +124,7 @@ ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id) compiler->unminify_coords = true; compiler->txf_ms_with_isaml = true; compiler->array_index_add_half = false; + compiler->instr_align = 4; compiler->const_upload_unit = 8; } diff --git a/src/freedreno/ir3/ir3_compiler.h b/src/freedreno/ir3/ir3_compiler.h index 663e0c531e0..5d7d140e416 100644 --- a/src/freedreno/ir3/ir3_compiler.h +++ b/src/freedreno/ir3/ir3_compiler.h @@ -96,6 +96,11 @@ struct ir3_compiler { /* The maximum number of constants, in vec4's, for compute shaders. */ uint16_t max_const_compute; + /* Number of instructions that the shader's base address and length + * (instrlen divides instruction count by this) must be aligned to. + */ + uint32_t instr_align; + /* on a3xx, the unit of indirect const load is higher than later gens (in * vec4 units): */ diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c index 55d62aedcd4..f91c4508c6a 100644 --- a/src/freedreno/ir3/ir3_shader.c +++ b/src/freedreno/ir3/ir3_shader.c @@ -124,19 +124,13 @@ fixup_regfootprint(struct ir3_shader_variant *v) */ void * ir3_shader_assemble(struct ir3_shader_variant *v) { - unsigned gpu_id = v->shader->compiler->gpu_id; + const struct ir3_compiler *compiler = v->shader->compiler; void *bin; bin = ir3_assemble(v); if (!bin) return NULL; - if (gpu_id >= 400) { - v->instrlen = v->info.sizedwords / (2 * 16); - } else { - v->instrlen = v->info.sizedwords / (2 * 4); - } - /* NOTE: if relative addressing is used, we set constlen in * the compiler (to worst-case value) since we don't know in * the assembler what the max addr reg value can be: @@ -147,7 +141,7 @@ void * ir3_shader_assemble(struct ir3_shader_variant *v) * uploads are in units of 4 dwords. Round it up here to make calculations * regarding the shared constlen simpler. */ - if (gpu_id >= 400) + if (compiler->gpu_id >= 400) v->constlen = align(v->constlen, 4); fixup_regfootprint(v); -- 2.30.2