X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Ffreedreno%2Fir3%2Fir3.c;h=23f29664e82c093baba5f51c3802cbaa0a5dda2a;hb=38a4b861459b02401d3ff71670218506e7acf019;hp=f209585dd9a0bdb07010952ff8a5056146801b43;hpb=c7c432738a68d543dc1b73eae895ec4dfacd92b2;p=mesa.git diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c index f209585dd9a..23f29664e82 100644 --- a/src/freedreno/ir3/ir3.c +++ b/src/freedreno/ir3/ir3.c @@ -45,17 +45,12 @@ void * ir3_alloc(struct ir3 *shader, int sz) return rzalloc_size(shader, sz); /* TODO: don't use rzalloc */ } -struct ir3 * ir3_create(struct ir3_compiler *compiler, - unsigned nin, unsigned nout) +struct ir3 * ir3_create(struct ir3_compiler *compiler, gl_shader_stage type) { - struct ir3 *shader = rzalloc(compiler, struct ir3); + struct ir3 *shader = rzalloc(NULL, struct ir3); shader->compiler = compiler; - shader->ninputs = nin; - shader->inputs = ir3_alloc(shader, sizeof(shader->inputs[0]) * nin); - - shader->noutputs = nout; - shader->outputs = ir3_alloc(shader, sizeof(shader->outputs[0]) * nout); + shader->type = type; list_inithead(&shader->block_list); list_inithead(&shader->array_list); @@ -103,28 +98,28 @@ static uint32_t reg(struct ir3_register *reg, struct ir3_info *info, if (reg->flags & IR3_REG_RELATIV) { components = reg->size; val.idummy10 = reg->array.offset; - max = (reg->array.offset + repeat + components - 1) >> 2; + max = (reg->array.offset + repeat + components - 1); } else { components = util_last_bit(reg->wrmask); val.comp = reg->num & 0x3; val.num = reg->num >> 2; - max = (reg->num + repeat + components - 1) >> 2; + max = (reg->num + repeat + components - 1); } if (reg->flags & IR3_REG_CONST) { - info->max_const = MAX2(info->max_const, max); + info->max_const = MAX2(info->max_const, max >> 2); } else if (val.num == 63) { /* ignore writes to dummy register r63.x */ - } else if (max < 48) { + } else if (max < regid(48, 0)) { if (reg->flags & IR3_REG_HALF) { if (info->gpu_id >= 600) { /* starting w/ a6xx, half regs conflict with full regs: */ - info->max_reg = MAX2(info->max_reg, (max+1)/2); + info->max_reg = MAX2(info->max_reg, max >> 3); } else { - info->max_half_reg = MAX2(info->max_half_reg, max); + info->max_half_reg = MAX2(info->max_half_reg, max >> 2); } } else { - info->max_reg = MAX2(info->max_reg, max); + info->max_reg = MAX2(info->max_reg, max >> 2); } } } @@ -146,9 +141,10 @@ static int emit_cat0(struct ir3_instruction *instr, void *ptr, } cat0->repeat = instr->repeat; cat0->ss = !!(instr->flags & IR3_INSTR_SS); - cat0->inv = instr->cat0.inv; - cat0->comp = instr->cat0.comp; + cat0->inv0 = instr->cat0.inv; + cat0->comp0 = instr->cat0.comp; cat0->opc = instr->opc; + cat0->opc_hi = instr->opc >= 16; cat0->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); cat0->sync = !!(instr->flags & IR3_INSTR_SY); cat0->opc_cat = 0; @@ -234,7 +230,8 @@ static int emit_cat2(struct ir3_instruction *instr, void *ptr, } else if (src1->flags & IR3_REG_CONST) { iassert(src1->num < (1 << 12)); cat2->c1.src1 = reg(src1, info, instr->repeat, - IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF); + IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF | + absneg); cat2->c1.src1_c = 1; } else { iassert(src1->num < (1 << 11)); @@ -260,7 +257,8 @@ static int emit_cat2(struct ir3_instruction *instr, void *ptr, } else if (src2->flags & IR3_REG_CONST) { iassert(src2->num < (1 << 12)); cat2->c2.src2 = reg(src2, info, instr->repeat, - IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF); + IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF | + absneg); cat2->c2.src2_c = 1; } else { iassert(src2->num < (1 << 11)); @@ -344,7 +342,7 @@ static int emit_cat3(struct ir3_instruction *instr, void *ptr, } else if (src1->flags & IR3_REG_CONST) { iassert(src1->num < (1 << 12)); cat3->c1.src1 = reg(src1, info, instr->repeat, - IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF); + IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF | absneg); cat3->c1.src1_c = 1; } else { iassert(src1->num < (1 << 11)); @@ -369,7 +367,7 @@ static int emit_cat3(struct ir3_instruction *instr, void *ptr, } else if (src3->flags & IR3_REG_CONST) { iassert(src3->num < (1 << 12)); cat3->c2.src3 = reg(src3, info, instr->repeat, - IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF); + IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF | absneg); cat3->c2.src3_c = 1; } else { iassert(src3->num < (1 << 11)); @@ -451,27 +449,23 @@ static int emit_cat5(struct ir3_instruction *instr, void *ptr, * than tex/sampler idx, we use the first src reg in the ir to hold * samp_tex hvec2: */ - struct ir3_register *src1 = instr->regs[2]; - struct ir3_register *src2 = instr->regs[3]; + struct ir3_register *src1; + struct ir3_register *src2; instr_cat5_t *cat5 = ptr; - switch (instr->opc) { - case OPC_DSX: - case OPC_DSXPP_1: - case OPC_DSY: - case OPC_DSYPP_1: - iassert((instr->flags & IR3_INSTR_S2EN) == 0); - src1 = instr->regs[1]; - src2 = instr->regs[2]; - break; - default: + iassert((instr->regs_count == 1) || + (instr->regs_count == 2) || + (instr->regs_count == 3) || + (instr->regs_count == 4)); + + if (instr->flags & IR3_INSTR_S2EN) { src1 = instr->regs[2]; - src2 = instr->regs[3]; - break; + src2 = instr->regs_count > 3 ? instr->regs[3] : NULL; + } else { + src1 = instr->regs_count > 1 ? instr->regs[1] : NULL; + src2 = instr->regs_count > 2 ? instr->regs[2] : NULL; } - iassert_type(dst, type_size(instr->cat5.type) == 32) - assume(src1 || !src2); if (src1) { @@ -479,20 +473,44 @@ static int emit_cat5(struct ir3_instruction *instr, void *ptr, cat5->src1 = reg(src1, info, instr->repeat, IR3_REG_HALF); } + if (src2) { + iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF)); + cat5->src2 = reg(src2, info, instr->repeat, IR3_REG_HALF); + } + + if (instr->flags & IR3_INSTR_B) { + cat5->s2en_bindless.base_hi = instr->cat5.tex_base >> 1; + cat5->base_lo = instr->cat5.tex_base & 1; + } + if (instr->flags & IR3_INSTR_S2EN) { struct ir3_register *samp_tex = instr->regs[1]; - if (src2) { - iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF)); - cat5->s2en.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF); - } iassert(samp_tex->flags & IR3_REG_HALF); - cat5->s2en.src3 = reg(samp_tex, info, instr->repeat, IR3_REG_HALF); + cat5->s2en_bindless.src3 = reg(samp_tex, info, instr->repeat, + (instr->flags & IR3_INSTR_B) ? 0 : IR3_REG_HALF); + if (instr->flags & IR3_INSTR_B) { + if (instr->flags & IR3_INSTR_A1EN) { + cat5->s2en_bindless.desc_mode = CAT5_BINDLESS_A1_UNIFORM; + } else { + cat5->s2en_bindless.desc_mode = CAT5_BINDLESS_UNIFORM; + } + } else { + /* TODO: This should probably be CAT5_UNIFORM, at least on a6xx, + * as this is what the blob does and it is presumably faster, but + * first we should confirm it is actually nonuniform and figure + * out when the whole descriptor mode mechanism was introduced. + */ + cat5->s2en_bindless.desc_mode = CAT5_NONUNIFORM; + } iassert(!(instr->cat5.samp | instr->cat5.tex)); - } else { - if (src2) { - iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF)); - cat5->norm.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF); + } else if (instr->flags & IR3_INSTR_B) { + cat5->s2en_bindless.src3 = instr->cat5.samp; + if (instr->flags & IR3_INSTR_A1EN) { + cat5->s2en_bindless.desc_mode = CAT5_BINDLESS_A1_IMM; + } else { + cat5->s2en_bindless.desc_mode = CAT5_BINDLESS_IMM; } + } else { cat5->norm.samp = instr->cat5.samp; cat5->norm.tex = instr->cat5.tex; } @@ -503,7 +521,7 @@ static int emit_cat5(struct ir3_instruction *instr, void *ptr, cat5->is_3d = !!(instr->flags & IR3_INSTR_3D); cat5->is_a = !!(instr->flags & IR3_INSTR_A); cat5->is_s = !!(instr->flags & IR3_INSTR_S); - cat5->is_s2en = !!(instr->flags & IR3_INSTR_S2EN); + cat5->is_s2en_bindless = !!(instr->flags & (IR3_INSTR_S2EN | IR3_INSTR_B)); cat5->is_o = !!(instr->flags & IR3_INSTR_O); cat5->is_p = !!(instr->flags & IR3_INSTR_P); cat5->opc = instr->opc; @@ -517,13 +535,11 @@ static int emit_cat5(struct ir3_instruction *instr, void *ptr, static int emit_cat6_a6xx(struct ir3_instruction *instr, void *ptr, struct ir3_info *info) { - struct ir3_register *src1, *src2; + struct ir3_register *src1, *src2, *ssbo; instr_cat6_a6xx_t *cat6 = ptr; - bool has_dest = (instr->opc == OPC_LDIB); - - /* first reg should be SSBO binding point: */ - iassert(instr->regs[1]->flags & IR3_REG_IMMED); + bool has_dest = (instr->opc == OPC_LDIB || instr->opc == OPC_LDC); + ssbo = instr->regs[1]; src1 = instr->regs[2]; if (has_dest) { @@ -536,7 +552,7 @@ static int emit_cat6_a6xx(struct ir3_instruction *instr, void *ptr, } cat6->type = instr->cat6.type; - cat6->d = instr->cat6.d - 1; + cat6->d = instr->cat6.d - (instr->opc == OPC_LDC ? 0 : 1); cat6->typed = instr->cat6.typed; cat6->type_size = instr->cat6.iim_val - 1; cat6->opc = instr->opc; @@ -546,7 +562,21 @@ static int emit_cat6_a6xx(struct ir3_instruction *instr, void *ptr, cat6->src1 = reg(src1, info, instr->repeat, 0); cat6->src2 = reg(src2, info, instr->repeat, 0); - cat6->ssbo = instr->regs[1]->iim_val; + cat6->ssbo = reg(ssbo, info, instr->repeat, IR3_REG_IMMED); + + if (instr->flags & IR3_INSTR_B) { + if (ssbo->flags & IR3_REG_IMMED) { + cat6->desc_mode = CAT6_BINDLESS_IMM; + } else { + cat6->desc_mode = CAT6_BINDLESS_UNIFORM; + } + cat6->base = instr->cat6.base; + } else { + if (ssbo->flags & IR3_REG_IMMED) + cat6->desc_mode = CAT6_IMM; + else + cat6->desc_mode = CAT6_UNIFORM; + } switch (instr->opc) { case OPC_ATOMIC_ADD: @@ -561,31 +591,29 @@ static int emit_cat6_a6xx(struct ir3_instruction *instr, void *ptr, case OPC_ATOMIC_OR: case OPC_ATOMIC_XOR: cat6->pad1 = 0x1; - cat6->pad2 = 0xc; - cat6->pad3 = 0x0; - cat6->pad4 = 0x3; + cat6->pad3 = 0xc; + cat6->pad5 = 0x3; break; case OPC_STIB: cat6->pad1 = 0x0; - cat6->pad2 = 0xc; - cat6->pad3 = 0x0; - cat6->pad4 = 0x2; + cat6->pad3 = 0xc; + cat6->pad5 = 0x2; break; case OPC_LDIB: cat6->pad1 = 0x1; - cat6->pad2 = 0xc; - cat6->pad3 = 0x0; - cat6->pad4 = 0x2; + cat6->pad3 = 0xc; + cat6->pad5 = 0x2; break; case OPC_LDC: cat6->pad1 = 0x0; - cat6->pad2 = 0x8; - cat6->pad3 = 0x0; - cat6->pad4 = 0x2; + cat6->pad3 = 0x8; + cat6->pad5 = 0x2; break; default: iassert(0); } + cat6->pad2 = 0x0; + cat6->pad4 = 0x0; return 0; } @@ -789,18 +817,30 @@ static int emit_cat6(struct ir3_instruction *instr, void *ptr, return 0; } else if (instr->cat6.src_offset || (instr->opc == OPC_LDG) || - (instr->opc == OPC_LDL)) { + (instr->opc == OPC_LDL) || (instr->opc == OPC_LDLW)) { + struct ir3_register *src3 = instr->regs[3]; instr_cat6a_t *cat6a = ptr; cat6->src_off = true; - cat6a->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED); - cat6a->src1_im = !!(src1->flags & IR3_REG_IMMED); - if (src2) { - cat6a->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED); - cat6a->src2_im = !!(src2->flags & IR3_REG_IMMED); + if (instr->opc == OPC_LDG) { + /* For LDG src1 can not be immediate, so src1_imm is redundant and + * instead used to signal whether (when true) 'off' is a 32 bit + * register or an immediate offset. + */ + cat6a->src1 = reg(src1, info, instr->repeat, 0); + cat6a->src1_im = !(src3->flags & IR3_REG_IMMED); + cat6a->off = reg(src3, info, instr->repeat, IR3_REG_IMMED); + } else { + cat6a->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED); + cat6a->src1_im = !!(src1->flags & IR3_REG_IMMED); + cat6a->off = reg(src3, info, instr->repeat, IR3_REG_IMMED); + iassert(src3->flags & IR3_REG_IMMED); } - cat6a->off = instr->cat6.src_offset; + + /* Num components */ + cat6a->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED); + cat6a->src2_im = true; } else { instr_cat6b_t *cat6b = ptr; @@ -815,11 +855,22 @@ static int emit_cat6(struct ir3_instruction *instr, void *ptr, } if (instr->cat6.dst_offset || (instr->opc == OPC_STG) || - (instr->opc == OPC_STL)) { + (instr->opc == OPC_STL) || (instr->opc == OPC_STLW)) { instr_cat6c_t *cat6c = ptr; cat6->dst_off = true; cat6c->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); - cat6c->off = instr->cat6.dst_offset; + + if (instr->flags & IR3_INSTR_G) { + struct ir3_register *src3 = instr->regs[4]; + cat6c->off = reg(src3, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); + if (src3->flags & IR3_REG_IMMED) { + /* Immediate offsets are in bytes... */ + cat6->g = false; + cat6c->off *= 4; + } + } else { + cat6c->off = instr->cat6.dst_offset; + } } else { instr_cat6d_t *cat6d = ptr; cat6->dst_off = false; @@ -858,16 +909,14 @@ void * ir3_assemble(struct ir3 *shader, struct ir3_info *info, { uint32_t *ptr, *dwords; + memset(info, 0, sizeof(*info)); info->gpu_id = gpu_id; info->max_reg = -1; info->max_half_reg = -1; info->max_const = -1; - info->instrs_count = 0; - info->sizedwords = 0; - info->ss = info->sy = 0; - list_for_each_entry (struct ir3_block, block, &shader->block_list, node) { - list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { + foreach_block (block, &shader->block_list) { + foreach_instr (instr, &block->instr_list) { info->sizedwords += 2; } } @@ -884,19 +933,43 @@ void * ir3_assemble(struct ir3 *shader, struct ir3_info *info, ptr = dwords = calloc(4, info->sizedwords); - list_for_each_entry (struct ir3_block, block, &shader->block_list, node) { - list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { + foreach_block (block, &shader->block_list) { + unsigned sfu_delay = 0; + + foreach_instr (instr, &block->instr_list) { int ret = emit[opc_cat(instr->opc)](instr, dwords, info); if (ret) goto fail; + + if ((instr->opc == OPC_BARY_F) && (instr->regs[0]->flags & IR3_REG_EI)) + info->last_baryf = info->instrs_count; + info->instrs_count += 1 + instr->repeat + instr->nop; + info->nops_count += instr->nop; + if (instr->opc == OPC_NOP) + info->nops_count += 1 + instr->repeat; + if (instr->opc == OPC_MOV) { + if (instr->cat1.src_type == instr->cat1.dst_type) { + info->mov_count += 1 + instr->repeat; + } else { + info->cov_count += 1 + instr->repeat; + } + } dwords += 2; - if (instr->flags & IR3_INSTR_SS) + if (instr->flags & IR3_INSTR_SS) { info->ss++; + info->sstall += sfu_delay; + } if (instr->flags & IR3_INSTR_SY) info->sy++; + + if (is_sfu(instr)) { + sfu_delay = 10; + } else if (sfu_delay > 0) { + sfu_delay--; + } } } @@ -942,6 +1015,7 @@ struct ir3_block * ir3_block_create(struct ir3 *shader) block->shader = shader; list_inithead(&block->node); list_inithead(&block->instr_list); + block->predecessors = _mesa_pointer_set_create(block); return block; } @@ -1037,37 +1111,70 @@ ir3_instr_set_address(struct ir3_instruction *instr, { if (instr->address != addr) { struct ir3 *ir = instr->block->shader; + + debug_assert(!instr->address); + debug_assert(instr->block == addr->block); + instr->address = addr; - array_insert(ir, ir->indirects, instr); + debug_assert(reg_num(addr->regs[0]) == REG_A0); + unsigned comp = reg_comp(addr->regs[0]); + if (comp == 0) { + array_insert(ir, ir->a0_users, instr); + } else { + debug_assert(comp == 1); + array_insert(ir, ir->a1_users, instr); + } } } void ir3_block_clear_mark(struct ir3_block *block) { - list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) + foreach_instr (instr, &block->instr_list) instr->flags &= ~IR3_INSTR_MARK; } void ir3_clear_mark(struct ir3 *ir) { - list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { + foreach_block (block, &ir->block_list) { ir3_block_clear_mark(block); } } -/* note: this will destroy instr->depth, don't do it until after sched! */ unsigned ir3_count_instructions(struct ir3 *ir) { - unsigned cnt = 0; - list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { - list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { + unsigned cnt = 1; + foreach_block (block, &ir->block_list) { + block->start_ip = cnt; + foreach_instr (instr, &block->instr_list) { + instr->ip = cnt++; + } + block->end_ip = cnt; + } + return cnt; +} + +/* When counting instructions for RA, we insert extra fake instructions at the + * beginning of each block, where values become live, and at the end where + * values die. This prevents problems where values live-in at the beginning or + * live-out at the end of a block from being treated as if they were + * live-in/live-out at the first/last instruction, which would be incorrect. + * In ir3_legalize these ip's are assumed to be actual ip's of the final + * program, so it would be incorrect to use this everywhere. + */ + +unsigned +ir3_count_instructions_ra(struct ir3 *ir) +{ + unsigned cnt = 1; + foreach_block (block, &ir->block_list) { + block->start_ip = cnt++; + foreach_instr (instr, &block->instr_list) { instr->ip = cnt++; } - block->start_ip = list_first_entry(&block->instr_list, struct ir3_instruction, node)->ip; - block->end_ip = list_last_entry(&block->instr_list, struct ir3_instruction, node)->ip; + block->end_ip = cnt++; } return cnt; } @@ -1075,8 +1182,98 @@ ir3_count_instructions(struct ir3 *ir) struct ir3_array * ir3_lookup_array(struct ir3 *ir, unsigned id) { - list_for_each_entry (struct ir3_array, arr, &ir->array_list, node) + foreach_array (arr, &ir->array_list) if (arr->id == id) return arr; return NULL; } + +void +ir3_find_ssa_uses(struct ir3 *ir, void *mem_ctx, bool falsedeps) +{ + /* We could do this in a single pass if we can assume instructions + * are always sorted. Which currently might not always be true. + * (In particular after ir3_group pass, but maybe other places.) + */ + foreach_block (block, &ir->block_list) + foreach_instr (instr, &block->instr_list) + instr->uses = NULL; + + foreach_block (block, &ir->block_list) { + foreach_instr (instr, &block->instr_list) { + foreach_ssa_src_n (src, n, instr) { + if (__is_false_dep(instr, n) && !falsedeps) + continue; + if (!src->uses) + src->uses = _mesa_pointer_set_create(mem_ctx); + _mesa_set_add(src->uses, instr); + } + } + } +} + +/** + * Set the destination type of an instruction, for example if a + * conversion is folded in, handling the special cases where the + * instruction's dest type or opcode needs to be fixed up. + */ +void +ir3_set_dst_type(struct ir3_instruction *instr, bool half) +{ + if (half) { + instr->regs[0]->flags |= IR3_REG_HALF; + } else { + instr->regs[0]->flags &= ~IR3_REG_HALF; + } + + switch (opc_cat(instr->opc)) { + case 1: /* move instructions */ + if (half) { + instr->cat1.dst_type = half_type(instr->cat1.dst_type); + } else { + instr->cat1.dst_type = full_type(instr->cat1.dst_type); + } + break; + case 4: + if (half) { + instr->opc = cat4_half_opc(instr->opc); + } else { + instr->opc = cat4_full_opc(instr->opc); + } + break; + case 5: + if (half) { + instr->cat5.type = half_type(instr->cat5.type); + } else { + instr->cat5.type = full_type(instr->cat5.type); + } + break; + } +} + +/** + * One-time fixup for instruction src-types. Other than cov's that + * are folded, an instruction's src type does not change. + */ +void +ir3_fixup_src_type(struct ir3_instruction *instr) +{ + bool half = !!(instr->regs[1]->flags & IR3_REG_HALF); + + switch (opc_cat(instr->opc)) { + case 1: /* move instructions */ + if (half) { + instr->cat1.src_type = half_type(instr->cat1.src_type); + } else { + instr->cat1.src_type = full_type(instr->cat1.src_type); + } + break; + case 3: + if (half) { + instr->opc = cat3_half_opc(instr->opc); + } else { + instr->opc = cat3_full_opc(instr->opc); + } + break; + } +}