X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Ffreedreno%2Fir3%2Fir3_context.c;h=c389f750bd57596f06b4b09ed6a3972b238f9180;hb=0324706764b9d0a1a6a6c1af13fc7cfb01500d80;hp=8c7d9a33f3ad04902ba2b4943b499462681156c8;hpb=ef3eecd66bdcaa3991dd2b53cb3e7285bed6d718;p=mesa.git diff --git a/src/freedreno/ir3/ir3_context.c b/src/freedreno/ir3/ir3_context.c index 8c7d9a33f3a..c389f750bd5 100644 --- a/src/freedreno/ir3/ir3_context.c +++ b/src/freedreno/ir3/ir3_context.c @@ -24,8 +24,6 @@ * Rob Clark */ -#include "util/u_math.h" - #include "ir3_compiler.h" #include "ir3_context.h" #include "ir3_image.h" @@ -73,19 +71,31 @@ ir3_context_init(struct ir3_compiler *compiler, * creating duplicate variants.. */ - if (ir3_key_lowers_nir(&so->key)) { - nir_shader *s = nir_shader_clone(ctx, so->shader->nir); - ctx->s = ir3_optimize_nir(so->shader, s, &so->key); - } else { - /* fast-path for shader key that lowers nothing in NIR: */ - ctx->s = nir_shader_clone(ctx, so->shader->nir); - } + ctx->s = nir_shader_clone(ctx, so->shader->nir); + if (ir3_key_lowers_nir(&so->key)) + ir3_optimize_nir(so->shader, ctx->s, &so->key); /* this needs to be the last pass run, so do this here instead of * in ir3_optimize_nir(): */ NIR_PASS_V(ctx->s, nir_lower_bool_to_int32); NIR_PASS_V(ctx->s, nir_lower_locals_to_regs); + + /* We want to lower nir_op_imul as late as possible, to catch also + * those generated by earlier passes (e.g, nir_lower_locals_to_regs). + * However, we want a final swing of a few passes to have a chance + * at optimizing the result. + */ + bool progress = false; + NIR_PASS(progress, ctx->s, ir3_nir_lower_imul); + if (progress) { + NIR_PASS_V(ctx->s, nir_opt_algebraic); + NIR_PASS_V(ctx->s, nir_opt_copy_prop_vars); + NIR_PASS_V(ctx->s, nir_opt_dead_write_vars); + NIR_PASS_V(ctx->s, nir_opt_dce); + NIR_PASS_V(ctx->s, nir_opt_constant_folding); + } + NIR_PASS_V(ctx->s, nir_convert_from_ssa, true); if (ir3_shader_debug & IR3_DBG_DISASM) { @@ -101,73 +111,8 @@ ir3_context_init(struct ir3_compiler *compiler, nir_print_shader(ctx->s, stderr); } - ir3_nir_scan_driver_consts(ctx->s, &so->const_layout); - - so->num_uniforms = ctx->s->num_uniforms; - so->num_ubos = ctx->s->info.num_ubos; - ir3_ibo_mapping_init(&so->image_mapping, ctx->s->info.num_textures); - /* Layout of constant registers, each section aligned to vec4. Note - * that pointer size (ubo, etc) changes depending on generation. - * - * user consts - * UBO addresses - * SSBO sizes - * if (vertex shader) { - * driver params (IR3_DP_*) - * if (stream_output.num_outputs > 0) - * stream-out addresses - * } - * immediates - * - * Immediates go last mostly because they are inserted in the CP pass - * after the nir -> ir3 frontend. - * - * Note UBO size in bytes should be aligned to vec4 - */ - debug_assert((ctx->so->shader->ubo_state.size % 16) == 0); - unsigned constoff = align(ctx->so->shader->ubo_state.size / 16, 4); - unsigned ptrsz = ir3_pointer_size(ctx->compiler); - - memset(&so->constbase, ~0, sizeof(so->constbase)); - - if (so->num_ubos > 0) { - so->constbase.ubo = constoff; - constoff += align(ctx->s->info.num_ubos * ptrsz, 4) / 4; - } - - if (so->const_layout.ssbo_size.count > 0) { - unsigned cnt = so->const_layout.ssbo_size.count; - so->constbase.ssbo_sizes = constoff; - constoff += align(cnt, 4) / 4; - } - - if (so->const_layout.image_dims.count > 0) { - unsigned cnt = so->const_layout.image_dims.count; - so->constbase.image_dims = constoff; - constoff += align(cnt, 4) / 4; - } - - unsigned num_driver_params = 0; - if (so->type == MESA_SHADER_VERTEX) { - num_driver_params = IR3_DP_VS_COUNT; - } else if (so->type == MESA_SHADER_COMPUTE) { - num_driver_params = IR3_DP_CS_COUNT; - } - - so->constbase.driver_param = constoff; - constoff += align(num_driver_params, 4) / 4; - - if ((so->type == MESA_SHADER_VERTEX) && - (compiler->gpu_id < 500) && - so->shader->stream_output.num_outputs > 0) { - so->constbase.tfbo = constoff; - constoff += align(IR3_MAX_SO_BUFFERS * ptrsz, 4) / 4; - } - - so->constbase.immediate = constoff; - return ctx; } @@ -237,7 +182,7 @@ ir3_get_src(struct ir3_context *ctx, nir_src *src) for (unsigned i = 0; i < num_components; i++) { unsigned n = src->reg.base_offset * reg->num_components + i; compile_assert(ctx, n < arr->length); - value[i] = ir3_create_array_load(ctx, arr, n, addr); + value[i] = ir3_create_array_load(ctx, arr, n, addr, reg->bit_size); } return value; @@ -541,20 +486,28 @@ ir3_get_array(struct ir3_context *ctx, nir_register *reg) /* relative (indirect) if address!=NULL */ struct ir3_instruction * ir3_create_array_load(struct ir3_context *ctx, struct ir3_array *arr, int n, - struct ir3_instruction *address) + struct ir3_instruction *address, unsigned bitsize) { struct ir3_block *block = ctx->block; struct ir3_instruction *mov; struct ir3_register *src; + unsigned flags = 0; mov = ir3_instr_create(block, OPC_MOV); - mov->cat1.src_type = TYPE_U32; - mov->cat1.dst_type = TYPE_U32; + if (bitsize < 32) { + mov->cat1.src_type = TYPE_U16; + mov->cat1.dst_type = TYPE_U16; + flags |= IR3_REG_HALF; + } else { + mov->cat1.src_type = TYPE_U32; + mov->cat1.dst_type = TYPE_U32; + } + mov->barrier_class = IR3_BARRIER_ARRAY_R; mov->barrier_conflict = IR3_BARRIER_ARRAY_W; - ir3_reg_create(mov, 0, 0); + ir3_reg_create(mov, 0, flags); src = ir3_reg_create(mov, 0, IR3_REG_ARRAY | - COND(address, IR3_REG_RELATIV)); + COND(address, IR3_REG_RELATIV) | flags); src->instr = arr->last_write; src->size = arr->length; src->array.id = arr->id; @@ -577,8 +530,11 @@ ir3_create_array_store(struct ir3_context *ctx, struct ir3_array *arr, int n, /* if not relative store, don't create an extra mov, since that * ends up being difficult for cp to remove. + * + * Also, don't skip the mov if the src is meta (like fanout/split), + * since that creates a situation that RA can't really handle properly. */ - if (!address) { + if (!address && !is_meta(src)) { dst = src->regs[0]; src->barrier_class |= IR3_BARRIER_ARRAY_W;