From c8fb5f8a011e1db78af3ceaf91c5cb3b1acaee14 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 25 May 2015 10:30:54 -0400 Subject: [PATCH] freedreno/ir3: move inputs/outputs to shader These belong in the shader, rather than the block. Mostly a lot of churn and nothing too interesting. But splitting this out from the rest of ir3_block reshuffling to cut down the noise in the later patch. Signed-off-by: Rob Clark --- .../drivers/freedreno/freedreno_util.h | 4 +- src/gallium/drivers/freedreno/ir3/ir3.c | 46 ++----- src/gallium/drivers/freedreno/ir3/ir3.h | 31 ++--- .../drivers/freedreno/ir3/ir3_cmdline.c | 16 +-- .../drivers/freedreno/ir3/ir3_compiler_nir.c | 127 +++++++++--------- src/gallium/drivers/freedreno/ir3/ir3_cp.c | 13 +- src/gallium/drivers/freedreno/ir3/ir3_depth.c | 19 +-- src/gallium/drivers/freedreno/ir3/ir3_group.c | 36 ++--- .../drivers/freedreno/ir3/ir3_legalize.c | 10 +- src/gallium/drivers/freedreno/ir3/ir3_print.c | 6 +- src/gallium/drivers/freedreno/ir3/ir3_ra.c | 22 +-- src/gallium/drivers/freedreno/ir3/ir3_sched.c | 6 +- 12 files changed, 160 insertions(+), 176 deletions(-) diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h index 630c6013ab9..deb0e602ce2 100644 --- a/src/gallium/drivers/freedreno/freedreno_util.h +++ b/src/gallium/drivers/freedreno/freedreno_util.h @@ -62,8 +62,8 @@ enum adreno_stencil_op fd_stencil_op(unsigned op); #define FD_DBG_NOBYPASS 0x0040 #define FD_DBG_FRAGHALF 0x0080 #define FD_DBG_NOBIN 0x0100 -#define FD_DBG_OPTMSGS 0x0400 -#define FD_DBG_GLSL120 0x1000 +#define FD_DBG_OPTMSGS 0x0200 +#define FD_DBG_GLSL120 0x0400 extern int fd_mesa_debug; extern bool fd_binning_enabled; diff --git a/src/gallium/drivers/freedreno/ir3/ir3.c b/src/gallium/drivers/freedreno/ir3/ir3.c index 92c92e5001f..7515b79b0c9 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.c +++ b/src/gallium/drivers/freedreno/ir3/ir3.c @@ -66,12 +66,20 @@ void * ir3_alloc(struct ir3 *shader, int sz) return ptr; } -struct ir3 * ir3_create(struct ir3_compiler *compiler) +struct ir3 * ir3_create(struct ir3_compiler *compiler, + unsigned nin, unsigned nout) { - struct ir3 *shader = - calloc(1, sizeof(struct ir3)); + struct ir3 *shader = calloc(1, sizeof(struct ir3)); + grow_heap(shader); + shader->compiler = compiler; + shader->ninputs = nin; + shader->inputs = ir3_alloc(shader, sizeof(shader->inputs[0]) * nin); + + shader->noutputs = nout; + shader->outputs = ir3_alloc(shader, sizeof(shader->outputs[0]) * nout); + return shader; } @@ -601,39 +609,11 @@ static void insert_instr(struct ir3_block *block, array_insert(shader->baryfs, instr); } -struct ir3_block * ir3_block_create(struct ir3 *shader, - unsigned ntmp, unsigned nin, unsigned nout) +struct ir3_block * ir3_block_create(struct ir3 *shader) { - struct ir3_block *block; - unsigned size; - char *ptr; - - size = sizeof(*block); - size += sizeof(block->temporaries[0]) * ntmp; - size += sizeof(block->inputs[0]) * nin; - size += sizeof(block->outputs[0]) * nout; - - ptr = ir3_alloc(shader, size); - - block = (void *)ptr; - ptr += sizeof(*block); - - block->temporaries = (void *)ptr; - block->ntemporaries = ntmp; - ptr += sizeof(block->temporaries[0]) * ntmp; - - block->inputs = (void *)ptr; - block->ninputs = nin; - ptr += sizeof(block->inputs[0]) * nin; - - block->outputs = (void *)ptr; - block->noutputs = nout; - ptr += sizeof(block->outputs[0]) * nout; - + struct ir3_block *block = ir3_alloc(shader, sizeof(*block)); block->shader = shader; - list_inithead(&block->instr_list); - return block; } diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h index 93a6ab5da7c..38912aa3bd4 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.h +++ b/src/gallium/drivers/freedreno/ir3/ir3.h @@ -333,6 +333,10 @@ struct ir3_heap_chunk; struct ir3 { struct ir3_compiler *compiler; + unsigned ninputs, noutputs; + struct ir3_instruction **inputs; + struct ir3_instruction **outputs; + /* Track bary.f (and ldlv) instructions.. this is needed in * scheduling to ensure that all varying fetches happen before * any potential kill instructions. The hw gets grumpy if all @@ -365,24 +369,19 @@ struct ir3 { struct ir3_block { struct ir3 *shader; - unsigned ntemporaries, ninputs, noutputs; - /* maps TGSI_FILE_TEMPORARY index back to the assigning instruction: */ - struct ir3_instruction **temporaries; - struct ir3_instruction **inputs; - struct ir3_instruction **outputs; /* only a single address register: */ struct ir3_instruction *address; struct list_head instr_list; }; -struct ir3 * ir3_create(struct ir3_compiler *compiler); +struct ir3 * ir3_create(struct ir3_compiler *compiler, + unsigned nin, unsigned nout); void ir3_destroy(struct ir3 *shader); void * ir3_assemble(struct ir3 *shader, struct ir3_info *info, uint32_t gpu_id); void * ir3_alloc(struct ir3 *shader, int sz); -struct ir3_block * ir3_block_create(struct ir3 *shader, - unsigned ntmp, unsigned nin, unsigned nout); +struct ir3_block * ir3_block_create(struct ir3 *shader); struct ir3_instruction * ir3_instr_create(struct ir3_block *block, int category, opc_t opc); @@ -780,32 +779,28 @@ static inline struct ir3_instruction * __ssa_src_n(struct ir3_instruction *instr void ir3_print(struct ir3 *ir); void ir3_print_instr(struct ir3_instruction *instr); -/* flatten if/else: */ -int ir3_block_flatten(struct ir3_block *block); - /* depth calculation: */ int ir3_delayslots(struct ir3_instruction *assigner, struct ir3_instruction *consumer, unsigned n); void ir3_insert_by_depth(struct ir3_instruction *instr, struct list_head *list); -void ir3_block_depth(struct ir3_block *block); +void ir3_depth(struct ir3 *ir); /* copy-propagate: */ -void ir3_block_cp(struct ir3_block *block); +void ir3_cp(struct ir3 *ir); /* group neighbors and insert mov's to resolve conflicts: */ -void ir3_block_group(struct ir3_block *block); +void ir3_group(struct ir3 *ir); /* scheduling: */ -int ir3_block_sched(struct ir3_block *block); +int ir3_sched(struct ir3 *ir); /* register assignment: */ struct ir3_ra_reg_set * ir3_ra_alloc_reg_set(void *memctx); -int ir3_block_ra(struct ir3_block *block, enum shader_t type, +int ir3_ra(struct ir3 *ir3, enum shader_t type, bool frag_coord, bool frag_face); /* legalize: */ -void ir3_block_legalize(struct ir3_block *block, - bool *has_samp, int *max_bary); +void ir3_legalize(struct ir3 *ir, bool *has_samp, int *max_bary); /* ************************************************************************* */ /* instruction helpers */ diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c index 3fa886131f0..ad9d2719d59 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c @@ -66,34 +66,34 @@ static void dump_info(struct ir3_shader_variant *so, const char *str) // TODO make gpu_id configurable on cmdline bin = ir3_shader_assemble(so, 320); if (fd_mesa_debug & FD_DBG_DISASM) { - struct ir3_block *block = so->ir->block; + struct ir3 *ir = so->ir; struct ir3_register *reg; uint8_t regid; unsigned i; debug_printf("; %s: %s\n", type, str); - for (i = 0; i < block->ninputs; i++) { - if (!block->inputs[i]) { + for (i = 0; i < ir->ninputs; i++) { + if (!ir->inputs[i]) { debug_printf("; in%d unused\n", i); continue; } - reg = block->inputs[i]->regs[0]; + reg = ir->inputs[i]->regs[0]; regid = reg->num; debug_printf("@in(%sr%d.%c)\tin%d\n", (reg->flags & IR3_REG_HALF) ? "h" : "", (regid >> 2), "xyzw"[regid & 0x3], i); } - for (i = 0; i < block->noutputs; i++) { - if (!block->outputs[i]) { + for (i = 0; i < ir->noutputs; i++) { + if (!ir->outputs[i]) { debug_printf("; out%d unused\n", i); continue; } /* kill shows up as a virtual output.. skip it! */ - if (is_kill(block->outputs[i])) + if (is_kill(ir->outputs[i])) continue; - reg = block->outputs[i]->regs[0]; + reg = ir->outputs[i]->regs[0]; regid = reg->num; debug_printf("@out(%sr%d.%c)\tout%d\n", (reg->flags & IR3_REG_HALF) ? "h" : "", diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 39f4527c22b..f62a5ec2b26 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -51,6 +51,8 @@ static struct ir3_instruction * create_immed(struct ir3_block *block, uint32_t val); struct ir3_compile { + struct ir3_compiler *compiler; + const struct tgsi_token *tokens; struct nir_shader *s; @@ -170,7 +172,8 @@ static struct nir_shader *to_nir(const struct tgsi_token *tokens) /* TODO nir doesn't lower everything for us yet, but ideally it would: */ static const struct tgsi_token * -lower_tgsi(const struct tgsi_token *tokens, struct ir3_shader_variant *so) +lower_tgsi(struct ir3_compile *ctx, const struct tgsi_token *tokens, + struct ir3_shader_variant *so) { struct tgsi_shader_info info; struct tgsi_lowering_config lconfig = { @@ -192,7 +195,7 @@ lower_tgsi(const struct tgsi_token *tokens, struct ir3_shader_variant *so) break; } - if (so->ir->compiler->gpu_id >= 400) { + if (ctx->compiler->gpu_id >= 400) { /* a4xx seems to have *no* sam.p */ lconfig.lower_TXP = ~0; /* lower all txp */ } else { @@ -204,13 +207,14 @@ lower_tgsi(const struct tgsi_token *tokens, struct ir3_shader_variant *so) } static struct ir3_compile * -compile_init(struct ir3_shader_variant *so, +compile_init(struct ir3_compiler *compiler, + struct ir3_shader_variant *so, const struct tgsi_token *tokens) { struct ir3_compile *ctx = rzalloc(NULL, struct ir3_compile); const struct tgsi_token *lowered_tokens; - if (so->ir->compiler->gpu_id >= 400) { + if (compiler->gpu_id >= 400) { /* need special handling for "flat" */ ctx->flat_bypass = true; ctx->levels_add_one = false; @@ -230,6 +234,7 @@ compile_init(struct ir3_shader_variant *so, break; } + ctx->compiler = compiler; ctx->ir = so->ir; ctx->so = so; ctx->next_inloc = 8; @@ -240,7 +245,7 @@ compile_init(struct ir3_shader_variant *so, ctx->addr_ht = _mesa_hash_table_create(ctx, _mesa_hash_pointer, _mesa_key_pointer_equal); - lowered_tokens = lower_tgsi(tokens, so); + lowered_tokens = lower_tgsi(ctx, tokens, so); if (!lowered_tokens) lowered_tokens = tokens; ctx->s = to_nir(lowered_tokens); @@ -454,7 +459,7 @@ create_collect(struct ir3_block *block, struct ir3_instruction **arr, return NULL; collect = ir3_instr_create2(block, -1, OPC_META_FI, 1 + arrsz); - ir3_reg_create(collect, 0, 0); + ir3_reg_create(collect, 0, 0); /* dst */ for (unsigned i = 0; i < arrsz; i++) ir3_reg_create(collect, 0, IR3_REG_SSA)->instr = arr[i]; @@ -1134,8 +1139,8 @@ static void add_sysval_input(struct ir3_compile *ctx, unsigned name, so->inputs[n].interpolate = TGSI_INTERPOLATE_CONSTANT; so->total_in++; - ctx->block->ninputs = MAX2(ctx->block->ninputs, r + 1); - ctx->block->inputs[r] = instr; + ctx->ir->ninputs = MAX2(ctx->ir->ninputs, r + 1); + ctx->ir->inputs[r] = instr; } static void @@ -1174,17 +1179,18 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) case nir_intrinsic_load_input: for (int i = 0; i < intr->num_components; i++) { unsigned n = idx * 4 + i; - dst[i] = b->inputs[n]; + dst[i] = ctx->ir->inputs[n]; } break; case nir_intrinsic_load_input_indirect: src = get_src(ctx, &intr->src[0]); struct ir3_instruction *collect = - create_collect(b, b->inputs, b->ninputs); + create_collect(b, ctx->ir->inputs, ctx->ir->ninputs); struct ir3_instruction *addr = get_addr(ctx, src[0]); for (int i = 0; i < intr->num_components; i++) { unsigned n = idx * 4 + i; - dst[i] = create_indirect_load(ctx, b->ninputs, n, addr, collect); + dst[i] = create_indirect_load(ctx, ctx->ir->ninputs, + n, addr, collect); } break; case nir_intrinsic_load_var: @@ -1197,7 +1203,7 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) src = get_src(ctx, &intr->src[0]); for (int i = 0; i < intr->num_components; i++) { unsigned n = idx * 4 + i; - b->outputs[n] = src[i]; + ctx->ir->outputs[n] = src[i]; } break; case nir_intrinsic_load_base_vertex: @@ -1707,7 +1713,7 @@ setup_input(struct ir3_compile *ctx, nir_variable *in) instr = create_input(ctx->block, NULL, idx); } - ctx->block->inputs[idx] = instr; + ctx->ir->inputs[idx] = instr; } if (so->inputs[n].bary || (ctx->so->type == SHADER_VERTEX)) { @@ -1774,7 +1780,7 @@ setup_output(struct ir3_compile *ctx, nir_variable *out) for (int i = 0; i < ncomp; i++) { unsigned idx = (n * 4) + i; - ctx->block->outputs[idx] = create_immed(ctx->block, fui(0.0)); + ctx->ir->outputs[idx] = create_immed(ctx->block, fui(0.0)); } } @@ -1794,12 +1800,14 @@ emit_instructions(struct ir3_compile *ctx) ninputs += 8; } - ctx->block = ir3_block_create(ctx->ir, 0, ninputs, noutputs); + ctx->ir = ir3_create(ctx->compiler, ninputs, noutputs); + ctx->block = ir3_block_create(ctx->ir); + ctx->ir->block = ctx->block; if (ctx->so->type == SHADER_FRAGMENT) { - ctx->block->noutputs -= ARRAY_SIZE(ctx->kill); + ctx->ir->noutputs -= ARRAY_SIZE(ctx->kill); } else if (ctx->so->type == SHADER_VERTEX) { - ctx->block->ninputs -= 8; + ctx->ir->ninputs -= 8; } /* for fragment shader, we have a single input register (usually @@ -1849,12 +1857,12 @@ static void fixup_frag_inputs(struct ir3_compile *ctx) { struct ir3_shader_variant *so = ctx->so; - struct ir3_block *block = ctx->block; + struct ir3 *ir = ctx->ir; struct ir3_instruction **inputs; struct ir3_instruction *instr; int n, regid = 0; - block->ninputs = 0; + ir->ninputs = 0; n = 4; /* always have frag_pos */ n += COND(so->frag_face, 4); @@ -1866,15 +1874,15 @@ fixup_frag_inputs(struct ir3_compile *ctx) /* this ultimately gets assigned to hr0.x so doesn't conflict * with frag_coord/frag_pos.. */ - inputs[block->ninputs++] = ctx->frag_face; + inputs[ir->ninputs++] = ctx->frag_face; ctx->frag_face->regs[0]->num = 0; /* remaining channels not used, but let's avoid confusing * other parts that expect inputs to come in groups of vec4 */ - inputs[block->ninputs++] = NULL; - inputs[block->ninputs++] = NULL; - inputs[block->ninputs++] = NULL; + inputs[ir->ninputs++] = NULL; + inputs[ir->ninputs++] = NULL; + inputs[ir->ninputs++] = NULL; } /* since we don't know where to set the regid for frag_coord, @@ -1888,28 +1896,28 @@ fixup_frag_inputs(struct ir3_compile *ctx) ctx->frag_coord[2]->regs[0]->num = regid++; ctx->frag_coord[3]->regs[0]->num = regid++; - inputs[block->ninputs++] = ctx->frag_coord[0]; - inputs[block->ninputs++] = ctx->frag_coord[1]; - inputs[block->ninputs++] = ctx->frag_coord[2]; - inputs[block->ninputs++] = ctx->frag_coord[3]; + inputs[ir->ninputs++] = ctx->frag_coord[0]; + inputs[ir->ninputs++] = ctx->frag_coord[1]; + inputs[ir->ninputs++] = ctx->frag_coord[2]; + inputs[ir->ninputs++] = ctx->frag_coord[3]; } /* we always have frag_pos: */ so->pos_regid = regid; /* r0.x */ - instr = create_input(block, NULL, block->ninputs); + instr = create_input(ctx->block, NULL, ir->ninputs); instr->regs[0]->num = regid++; - inputs[block->ninputs++] = instr; + inputs[ir->ninputs++] = instr; ctx->frag_pos->regs[1]->instr = instr; /* r0.y */ - instr = create_input(block, NULL, block->ninputs); + instr = create_input(ctx->block, NULL, ir->ninputs); instr->regs[0]->num = regid++; - inputs[block->ninputs++] = instr; + inputs[ir->ninputs++] = instr; ctx->frag_pos->regs[2]->instr = instr; - block->inputs = inputs; + ir->inputs = inputs; } int @@ -1919,18 +1927,14 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, struct ir3_shader_key key) { struct ir3_compile *ctx; - struct ir3_block *block; + struct ir3 *ir; struct ir3_instruction **inputs; unsigned i, j, actual_in; int ret = 0, max_bary; assert(!so->ir); - so->ir = ir3_create(compiler); - - assert(so->ir); - - ctx = compile_init(so, tokens); + ctx = compile_init(compiler, so, tokens); if (!ctx) { DBG("INIT failed!"); ret = -1; @@ -1945,11 +1949,10 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, goto out; } - block = ctx->block; - so->ir->block = block; + ir = so->ir = ctx->ir; /* keep track of the inputs from TGSI perspective.. */ - inputs = block->inputs; + inputs = ir->inputs; /* but fixup actual inputs for frag shader: */ if (so->type == SHADER_FRAGMENT) @@ -1966,24 +1969,24 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, (name == TGSI_SEMANTIC_PSIZE))) { if (i != j) { so->outputs[j] = so->outputs[i]; - block->outputs[(j*4)+0] = block->outputs[(i*4)+0]; - block->outputs[(j*4)+1] = block->outputs[(i*4)+1]; - block->outputs[(j*4)+2] = block->outputs[(i*4)+2]; - block->outputs[(j*4)+3] = block->outputs[(i*4)+3]; + ir->outputs[(j*4)+0] = ir->outputs[(i*4)+0]; + ir->outputs[(j*4)+1] = ir->outputs[(i*4)+1]; + ir->outputs[(j*4)+2] = ir->outputs[(i*4)+2]; + ir->outputs[(j*4)+3] = ir->outputs[(i*4)+3]; } j++; } } so->outputs_count = j; - block->noutputs = j * 4; + ir->noutputs = j * 4; } /* if we want half-precision outputs, mark the output registers * as half: */ if (key.half_precision) { - for (i = 0; i < block->noutputs; i++) { - struct ir3_instruction *out = block->outputs[i]; + for (i = 0; i < ir->noutputs; i++) { + struct ir3_instruction *out = ir->outputs[i]; if (!out) continue; out->regs[0]->flags |= IR3_REG_HALF; @@ -2004,36 +2007,34 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, */ if (so->type == SHADER_FRAGMENT) { for (i = 0; i < ctx->kill_count; i++) - block->outputs[block->noutputs++] = ctx->kill[i]; + ir->outputs[ir->noutputs++] = ctx->kill[i]; } if (fd_mesa_debug & FD_DBG_OPTMSGS) { printf("BEFORE CP:\n"); - ir3_print(so->ir); + ir3_print(ir); } - ir3_block_depth(block); - - ir3_block_cp(block); + ir3_cp(ir); if (fd_mesa_debug & FD_DBG_OPTMSGS) { printf("BEFORE GROUPING:\n"); - ir3_print(so->ir); + ir3_print(ir); } /* Group left/right neighbors, inserting mov's where needed to * solve conflicts: */ - ir3_block_group(block); + ir3_group(ir); - ir3_block_depth(block); + ir3_depth(ir); if (fd_mesa_debug & FD_DBG_OPTMSGS) { printf("AFTER DEPTH:\n"); - ir3_print(so->ir); + ir3_print(ir); } - ret = ir3_block_sched(block); + ret = ir3_sched(ir); if (ret) { DBG("SCHED failed!"); goto out; @@ -2041,10 +2042,10 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, if (fd_mesa_debug & FD_DBG_OPTMSGS) { printf("AFTER SCHED:\n"); - ir3_print(so->ir); + ir3_print(ir); } - ret = ir3_block_ra(block, so->type, so->frag_coord, so->frag_face); + ret = ir3_ra(ir, so->type, so->frag_coord, so->frag_face); if (ret) { DBG("RA failed!"); goto out; @@ -2052,14 +2053,14 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, if (fd_mesa_debug & FD_DBG_OPTMSGS) { printf("AFTER RA:\n"); - ir3_print(so->ir); + ir3_print(ir); } - ir3_block_legalize(block, &so->has_samp, &max_bary); + ir3_legalize(ir, &so->has_samp, &max_bary); /* fixup input/outputs: */ for (i = 0; i < so->outputs_count; i++) { - so->outputs[i].regid = block->outputs[i*4]->regs[0]->num; + so->outputs[i].regid = ir->outputs[i*4]->regs[0]->num; /* preserve hack for depth output.. tgsi writes depth to .z, * but what we give the hw is the scalar register: */ diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cp.c b/src/gallium/drivers/freedreno/ir3/ir3_cp.c index 8c057166f32..a477bd4b237 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_cp.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_cp.c @@ -387,16 +387,17 @@ instr_cp(struct ir3_instruction *instr, unsigned *flags) return instr; } -void ir3_block_cp(struct ir3_block *block) +void +ir3_cp(struct ir3 *ir) { - ir3_clear_mark(block->shader); + ir3_clear_mark(ir->block->shader); - for (unsigned i = 0; i < block->noutputs; i++) { - if (block->outputs[i]) { + for (unsigned i = 0; i < ir->noutputs; i++) { + if (ir->outputs[i]) { struct ir3_instruction *out = - instr_cp(block->outputs[i], NULL); + instr_cp(ir->outputs[i], NULL); - block->outputs[i] = out; + ir->outputs[i] = out; } } } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_depth.c b/src/gallium/drivers/freedreno/ir3/ir3_depth.c index 601e14a1c85..6fc8b1762ff 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_depth.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_depth.c @@ -144,22 +144,23 @@ remove_unused_by_block(struct ir3_block *block) } } -void ir3_block_depth(struct ir3_block *block) +void +ir3_depth(struct ir3 *ir) { unsigned i; - ir3_clear_mark(block->shader); - for (i = 0; i < block->noutputs; i++) - if (block->outputs[i]) - ir3_instr_depth(block->outputs[i]); + ir3_clear_mark(ir->block->shader); + for (i = 0; i < ir->noutputs; i++) + if (ir->outputs[i]) + ir3_instr_depth(ir->outputs[i]); /* mark un-used instructions: */ - remove_unused_by_block(block); + remove_unused_by_block(ir->block); /* cleanup unused inputs: */ - for (i = 0; i < block->ninputs; i++) { - struct ir3_instruction *in = block->inputs[i]; + for (i = 0; i < ir->ninputs; i++) { + struct ir3_instruction *in = ir->inputs[i]; if (in && (in->depth == DEPTH_UNUSED)) - block->inputs[i] = NULL; + ir->inputs[i] = NULL; } } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_group.c b/src/gallium/drivers/freedreno/ir3/ir3_group.c index d744477aad1..85d0948fa97 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_group.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_group.c @@ -99,7 +99,8 @@ static struct ir3_instruction *instr_get(void *arr, int idx) { return ssa(((struct ir3_instruction *)arr)->regs[idx+1]); } -static void instr_insert_mov(void *arr, int idx, struct ir3_instruction *instr) +static void +instr_insert_mov(void *arr, int idx, struct ir3_instruction *instr) { ((struct ir3_instruction *)arr)->regs[idx+1]->instr = ir3_MOV(instr->block, instr, TYPE_F32); @@ -107,7 +108,8 @@ static void instr_insert_mov(void *arr, int idx, struct ir3_instruction *instr) static struct group_ops instr_ops = { instr_get, instr_insert_mov }; -static void group_n(struct group_ops *ops, void *arr, unsigned n) +static void +group_n(struct group_ops *ops, void *arr, unsigned n) { unsigned i, j; @@ -170,7 +172,8 @@ restart: } } -static void instr_find_neighbors(struct ir3_instruction *instr) +static void +instr_find_neighbors(struct ir3_instruction *instr) { struct ir3_instruction *src; @@ -189,7 +192,8 @@ static void instr_find_neighbors(struct ir3_instruction *instr) * we need to insert dummy/padding instruction for grouping, and * then take it back out again before anyone notices. */ -static void pad_and_group_input(struct ir3_instruction **input, unsigned n) +static void +pad_and_group_input(struct ir3_instruction **input, unsigned n) { int i, mask = 0; struct ir3_block *block = NULL; @@ -214,7 +218,8 @@ static void pad_and_group_input(struct ir3_instruction **input, unsigned n) } } -static void block_find_neighbors(struct ir3_block *block) +static void +find_neighbors(struct ir3 *ir) { unsigned i; @@ -232,22 +237,23 @@ static void block_find_neighbors(struct ir3_block *block) * This logic won't quite cut it if we don't align smaller * on vec4 boundaries */ - for (i = 0; i < block->ninputs; i += 4) - pad_and_group_input(&block->inputs[i], 4); - for (i = 0; i < block->noutputs; i += 4) - group_n(&arr_ops_out, &block->outputs[i], 4); + for (i = 0; i < ir->ninputs; i += 4) + pad_and_group_input(&ir->inputs[i], 4); + for (i = 0; i < ir->noutputs; i += 4) + group_n(&arr_ops_out, &ir->outputs[i], 4); - for (i = 0; i < block->noutputs; i++) { - if (block->outputs[i]) { - struct ir3_instruction *instr = block->outputs[i]; + for (i = 0; i < ir->noutputs; i++) { + if (ir->outputs[i]) { + struct ir3_instruction *instr = ir->outputs[i]; instr_find_neighbors(instr); } } } -void ir3_block_group(struct ir3_block *block) +void +ir3_group(struct ir3 *ir) { - ir3_clear_mark(block->shader); - block_find_neighbors(block); + ir3_clear_mark(ir->block->shader); + find_neighbors(ir); } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_legalize.c b/src/gallium/drivers/freedreno/ir3/ir3_legalize.c index be0b5ce442c..34055f4c612 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_legalize.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_legalize.c @@ -26,7 +26,6 @@ * Rob Clark */ -#include "pipe/p_shader_tokens.h" #include "util/u_math.h" #include "freedreno_util.h" @@ -48,7 +47,8 @@ struct ir3_legalize_ctx { int max_bary; }; -static void legalize(struct ir3_legalize_ctx *ctx) +static void +legalize(struct ir3_legalize_ctx *ctx) { struct ir3_block *block = ctx->block; struct ir3_instruction *last_input = NULL; @@ -220,11 +220,11 @@ static void legalize(struct ir3_legalize_ctx *ctx) ->flags |= IR3_INSTR_SS | IR3_INSTR_SY; } -void ir3_block_legalize(struct ir3_block *block, - bool *has_samp, int *max_bary) +void +ir3_legalize(struct ir3 *ir, bool *has_samp, int *max_bary) { struct ir3_legalize_ctx ctx = { - .block = block, + .block = ir->block, .max_bary = -1, }; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_print.c b/src/gallium/drivers/freedreno/ir3/ir3_print.c index 986a5bc7be0..965c834b8aa 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_print.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_print.c @@ -198,10 +198,10 @@ ir3_print(struct ir3 *ir) print_block(block, 0); - for (unsigned i = 0; i < block->noutputs; i++) { - if (!block->outputs[i]) + for (unsigned i = 0; i < ir->noutputs; i++) { + if (!ir->outputs[i]) continue; printf("out%d: ", i); - print_instr(block->outputs[i], 0); + print_instr(ir->outputs[i], 0); } } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_ra.c b/src/gallium/drivers/freedreno/ir3/ir3_ra.c index 39ce9c5d4ce..394c63f646d 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_ra.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_ra.c @@ -527,13 +527,13 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block) static void ra_add_interference(struct ir3_ra_ctx *ctx) { - struct ir3_block *block = ctx->ir->block; + struct ir3 *ir = ctx->ir; ra_block_compute_live_ranges(ctx, ctx->ir->block); /* need to fix things up to keep outputs live: */ - for (unsigned i = 0; i < block->noutputs; i++) { - struct ir3_instruction *instr = block->outputs[i]; + for (unsigned i = 0; i < ir->noutputs; i++) { + struct ir3_instruction *instr = ir->outputs[i]; struct ir3_instruction *defn; int cls, sz, off; @@ -682,10 +682,10 @@ ra_alloc(struct ir3_ra_ctx *ctx) * constraints/unknowns about setup for some of these regs: */ if (ctx->type == SHADER_FRAGMENT) { - struct ir3_block *block = ctx->ir->block; + struct ir3 *ir = ctx->ir; unsigned i = 0, j; - if (ctx->frag_face && (i < block->ninputs) && block->inputs[i]) { - struct ir3_instruction *instr = block->inputs[i]; + if (ctx->frag_face && (i < ir->ninputs) && ir->inputs[i]) { + struct ir3_instruction *instr = ir->inputs[i]; unsigned cls = size_to_class(1, true); unsigned name = ctx->class_base[cls] + instr->name; unsigned reg = ctx->set->gpr_to_ra_reg[cls][0]; @@ -695,8 +695,8 @@ ra_alloc(struct ir3_ra_ctx *ctx) i += 4; } - for (j = 0; i < block->ninputs; i++) { - struct ir3_instruction *instr = block->inputs[i]; + for (j = 0; i < ir->ninputs; i++) { + struct ir3_instruction *instr = ir->inputs[i]; if (instr) { struct ir3_instruction *defn; int cls, sz, off; @@ -725,14 +725,14 @@ ra_alloc(struct ir3_ra_ctx *ctx) return 0; } -int ir3_block_ra(struct ir3_block *block, enum shader_t type, +int ir3_ra(struct ir3 *ir, enum shader_t type, bool frag_coord, bool frag_face) { struct ir3_ra_ctx ctx = { - .ir = block->shader, + .ir = ir, .type = type, .frag_face = frag_face, - .set = block->shader->compiler->set, + .set = ir->compiler->set, }; int ret; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_sched.c b/src/gallium/drivers/freedreno/ir3/ir3_sched.c index 1d166d879df..0d404a83583 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_sched.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_sched.c @@ -424,11 +424,11 @@ sched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block) } } -int ir3_block_sched(struct ir3_block *block) +int ir3_sched(struct ir3 *ir) { struct ir3_sched_ctx ctx = {0}; - ir3_clear_mark(block->shader); - sched_block(&ctx, block); + ir3_clear_mark(ir->block->shader); + sched_block(&ctx, ir->block); if (ctx.error) return -1; return 0; -- 2.30.2