X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fnv50%2Fnv50_tgsi_to_nc.c;h=15aa40cddd10ecb8dacac450b64a6f4ce783ef1e;hb=42ba8d141fe07fcfa6f39623d8226919bf27be9f;hp=6fd749b35f8dd79513be4643579983bb537a610d;hpb=6b14a3eb191ab798e524f2413180256fbcc2b33e;p=mesa.git diff --git a/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c b/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c index 6fd749b35f8..15aa40cddd1 100644 --- a/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c +++ b/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c @@ -20,8 +20,6 @@ * SOFTWARE. */ -/* #define NV50_TGSI2NC_DEBUG */ - #include #include "nv50_context.h" @@ -31,7 +29,6 @@ #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" -#include "util/u_simple_list.h" #include "tgsi/tgsi_dump.h" #define BLD_MAX_TEMPS 64 @@ -39,7 +36,7 @@ #define BLD_MAX_PREDS 4 #define BLD_MAX_IMMDS 128 -#define BLD_MAX_COND_NESTING 4 +#define BLD_MAX_COND_NESTING 8 #define BLD_MAX_LOOP_NESTING 4 #define BLD_MAX_CALL_NESTING 2 @@ -70,14 +67,14 @@ bld_vals_del_val(struct bld_value_stack *stk, struct nv_value *val) { unsigned i; - for (i = stk->size - 1; i >= 0; --i) - if (stk->body[i] == val) + for (i = stk->size; i > 0; --i) + if (stk->body[i - 1] == val) break; - if (i < 0) + if (!i) return FALSE; - if (i != stk->size - 1) - stk->body[i] = stk->body[stk->size - 1]; + if (i != stk->size) + stk->body[i - 1] = stk->body[stk->size - 1]; --stk->size; /* XXX: old size in REALLOC */ return TRUE; @@ -125,7 +122,7 @@ struct bld_context { struct bld_value_stack pvs[BLD_MAX_PREDS][4]; /* TGSI_FILE_PREDICATE */ struct bld_value_stack ovs[PIPE_MAX_SHADER_OUTPUTS][4]; - uint32_t outputs_written[(PIPE_MAX_SHADER_OUTPUTS + 31) / 32]; + uint32_t outputs_written[(PIPE_MAX_SHADER_OUTPUTS + 7) / 8]; struct nv_value *frgcrd[4]; struct nv_value *sysval[4]; @@ -214,6 +211,7 @@ static INLINE void bld_warn_uninitialized(struct bld_context *bld, int kind, struct bld_value_stack *stk, struct nv_basic_block *b) { +#if NV50_DEBUG & NV50_DEBUG_PROG_IR long i = (stk - &bld->tvs[0][0]) / 4; long c = (stk - &bld->tvs[0][0]) & 3; @@ -222,6 +220,7 @@ bld_warn_uninitialized(struct bld_context *bld, int kind, debug_printf("WARNING: TEMP[%li].%c %s used uninitialized in BB:%i\n", i, (int)('x' + c), kind ? "may be" : "is", b->id); +#endif } static INLINE struct nv_value * @@ -272,6 +271,12 @@ fetch_by_bb(struct bld_value_stack *stack, fetch_by_bb(stack, vals, n, b->in[i]); } +static INLINE boolean +nvbb_is_terminated(struct nv_basic_block *bb) +{ + return bb->exit && bb->exit->is_terminator; +} + static INLINE struct nv_value * bld_load_imm_u32(struct bld_context *bld, uint32_t u); @@ -288,7 +293,8 @@ bld_phi(struct bld_context *bld, struct nv_basic_block *b, struct bld_value_stack *stack) { struct nv_basic_block *in; - struct nv_value *vals[16], *val; + struct nv_value *vals[16] = { 0 }; + struct nv_value *val; struct nv_instruction *phi; int i, j, n; @@ -474,6 +480,7 @@ bld_loop_end(struct bld_context *bld, struct nv_basic_block *bb) stk = (struct bld_value_stack *)phi->target; phi->target = NULL; + /* start with s == 1, src[0] is from outside the loop */ for (s = 1, n = 0; n < bb->num_in; ++n) { if (bb->in_kind[n] != CFG_EDGE_BACK) continue; @@ -485,8 +492,11 @@ bld_loop_end(struct bld_context *bld, struct nv_basic_block *bb) for (i = 0; i < 4; ++i) if (phi->src[i] && phi->src[i]->value == val) break; - if (i == 4) + if (i == 4) { + /* skip values we do not want to replace */ + for (; phi->src[s] && phi->src[s]->value != phi->def[0]; ++s); nv_reference(bld->pc, &phi->src[s++], val); + } } bld->pc->current_block = save; @@ -646,7 +656,10 @@ bld_pow(struct bld_context *bld, struct nv_value *x, struct nv_value *e) static INLINE struct nv_value * bld_load_imm_f32(struct bld_context *bld, float f) { - return bld_insn_1(bld, NV_OP_MOV, bld_imm_f32(bld, f)); + struct nv_value *imm = bld_insn_1(bld, NV_OP_MOV, bld_imm_f32(bld, f)); + + SET_TYPE(imm, NV_TYPE_F32); + return imm; } static INLINE struct nv_value * @@ -660,6 +673,7 @@ bld_get_address(struct bld_context *bld, int id, struct nv_value *indirect) { int i; struct nv_instruction *nvi; + struct nv_value *val; for (i = 0; i < 4; ++i) { if (!bld->saved_addr[i][0]) @@ -672,7 +686,13 @@ bld_get_address(struct bld_context *bld, int id, struct nv_value *indirect) } i &= 3; - bld->saved_addr[i][0] = bld_load_imm_u32(bld, id); + val = bld_imm_u32(bld, id); + if (indirect) + val = bld_insn_2(bld, NV_OP_ADD, indirect, val); + else + val = bld_insn_1(bld, NV_OP_MOV, val); + + bld->saved_addr[i][0] = val; bld->saved_addr[i][0]->reg.file = NV_FILE_ADDR; bld->saved_addr[i][0]->reg.type = NV_TYPE_U16; bld->saved_addr[i][1] = indirect; @@ -695,19 +715,15 @@ bld_predicate(struct bld_context *bld, struct nv_value *src, boolean bool_only) while (nvi->opcode == NV_OP_ABS || nvi->opcode == NV_OP_NEG || nvi->opcode == NV_OP_CVT) { s0i = nvi->src[0]->value->insn; - if (!s0i || - s0i->opcode == NV_OP_LDA || - s0i->opcode == NV_OP_MOV || - s0i->opcode == NV_OP_PHI) + if (!s0i || !nv50_op_can_write_flags(s0i->opcode)) break; nvi = s0i; assert(!nvi->flags_src); } } - if (nvi->opcode == NV_OP_LDA || - nvi->opcode == NV_OP_MOV || - nvi->opcode == NV_OP_PHI || nvi->bb != bld->pc->current_block) { + if (!nv50_op_can_write_flags(nvi->opcode) || + nvi->bb != bld->pc->current_block) { nvi = new_instruction(bld->pc, NV_OP_CVT); nv_reference(bld->pc, &nvi->src[0], src); } @@ -944,6 +960,8 @@ emit_store(struct bld_context *bld, const struct tgsi_full_instruction *inst, switch (reg->Register.File) { case TGSI_FILE_OUTPUT: + if (!value->insn && (bld->ti->output_file == NV_FILE_OUT)) + value = bld_insn_1(bld, NV_OP_MOV, value); value = bld_insn_1(bld, NV_OP_MOV, value); value->reg.file = bld->ti->output_file; @@ -956,9 +974,9 @@ emit_store(struct bld_context *bld, const struct tgsi_full_instruction *inst, break; case TGSI_FILE_TEMPORARY: assert(reg->Register.Index < BLD_MAX_TEMPS); - value->reg.file = NV_FILE_GPR; - if (value->insn->bb != bld->pc->current_block) + if (!value->insn || (value->insn->bb != bld->pc->current_block)) value = bld_insn_1(bld, NV_OP_MOV, value); + value->reg.file = NV_FILE_GPR; if (bld->ti->store_to_memory) bld_lmem_store(bld, ptr, reg->Register.Index * 4 + chan, value); @@ -1072,7 +1090,7 @@ emit_fetch(struct bld_context *bld, const struct tgsi_full_instruction *insn, const struct tgsi_full_src_register *src = &insn->Src[s]; struct nv_value *res; struct nv_value *ptr = NULL; - unsigned idx, swz, dim_idx, ind_idx, ind_swz; + unsigned idx, swz, dim_idx, ind_idx, ind_swz, sgn; ubyte type = infer_src_type(insn->Instruction.Opcode); idx = src->Register.Index; @@ -1092,9 +1110,8 @@ emit_fetch(struct bld_context *bld, const struct tgsi_full_instruction *insn, switch (src->Register.File) { case TGSI_FILE_CONSTANT: - dim_idx = src->Dimension.Index ? src->Dimension.Index + 2 : 1; - assert(dim_idx < 14); - assert(dim_idx == 1); /* for now */ + dim_idx = src->Dimension.Index; + assert(dim_idx < 15); res = new_value(bld->pc, NV_FILE_MEM_C(dim_idx), type); SET_TYPE(res, type); @@ -1120,7 +1137,7 @@ emit_fetch(struct bld_context *bld, const struct tgsi_full_instruction *insn, case TGSI_FILE_INPUT: res = bld_saved_input(bld, idx, swz); if (res && (insn->Instruction.Opcode != TGSI_OPCODE_TXP)) - return res; + break; res = new_value(bld->pc, bld->ti->input_file, type); res->reg.id = bld->ti->input_map[idx][swz]; @@ -1146,6 +1163,13 @@ emit_fetch(struct bld_context *bld, const struct tgsi_full_instruction *insn, case TGSI_FILE_PREDICATE: res = bld_fetch_global(bld, &bld->pvs[idx][swz]); break; + case TGSI_FILE_SYSTEM_VALUE: + res = new_value(bld->pc, bld->ti->input_file, NV_TYPE_U32); + res->reg.id = bld->ti->sysval_map[idx]; + res = bld_insn_1(bld, NV_OP_LDA, res); + res = bld_insn_1(bld, NV_OP_CVT, res); + res->reg.type = NV_TYPE_F32; + break; default: NOUVEAU_ERR("illegal/unhandled src reg file: %d\n", src->Register.File); abort(); @@ -1154,10 +1178,15 @@ emit_fetch(struct bld_context *bld, const struct tgsi_full_instruction *insn, if (!res) return bld_undef(bld, NV_FILE_GPR); + sgn = tgsi_util_get_full_src_register_sign_mode(src, chan); + if (insn->Instruction.Opcode != TGSI_OPCODE_MOV) res->reg.as_type = type; + else + if (sgn != TGSI_UTIL_SIGN_KEEP) /* apparently "MOV A, -B" assumes float */ + res->reg.as_type = NV_TYPE_F32; - switch (tgsi_util_get_full_src_register_sign_mode(src, chan)) { + switch (sgn) { case TGSI_UTIL_SIGN_KEEP: break; case TGSI_UTIL_SIGN_CLEAR: @@ -1183,7 +1212,8 @@ static void bld_lit(struct bld_context *bld, struct nv_value *dst0[4], const struct tgsi_full_instruction *insn) { - struct nv_value *val0, *zero; + struct nv_value *val0 = NULL; + struct nv_value *zero = NULL; unsigned mask = insn->Dst[0].Register.WriteMask; if (mask & ((1 << 0) | (1 << 3))) @@ -1254,10 +1284,14 @@ get_tex_dim(const struct tgsi_full_instruction *insn, int *dim, int *arg) static void load_proj_tex_coords(struct bld_context *bld, - struct nv_value *t[4], int dim, + struct nv_value *t[4], int dim, int arg, const struct tgsi_full_instruction *insn) { - int c, mask = 0; + int c, mask; + + mask = (1 << dim) - 1; + if (arg != dim) + mask |= 4; /* depth comparison value */ t[3] = emit_fetch(bld, insn, 0, 3); @@ -1269,17 +1303,19 @@ load_proj_tex_coords(struct bld_context *bld, t[3] = bld_insn_1(bld, NV_OP_RCP, t[3]); - for (c = 0; c < dim; ++c) { + for (c = 0; c < 4; ++c) { + if (!(mask & (1 << c))) + continue; t[c] = emit_fetch(bld, insn, 0, c); - if (t[c]->insn->opcode == NV_OP_LINTERP || - t[c]->insn->opcode == NV_OP_PINTERP) { - t[c] = bld_duplicate_insn(bld, t[c]->insn); - t[c]->insn->opcode = NV_OP_PINTERP; - nv_reference(bld->pc, &t[c]->insn->src[1], t[3]); - } else { - mask |= 1 << c; - } + if (t[c]->insn->opcode != NV_OP_LINTERP && + t[c]->insn->opcode != NV_OP_PINTERP) + continue; + t[c] = bld_duplicate_insn(bld, t[c]->insn); + t[c]->insn->opcode = NV_OP_PINTERP; + nv_reference(bld->pc, &t[c]->insn->src[1], t[3]); + + mask &= ~(1 << c); } for (c = 0; mask; ++c, mask >>= 1) { @@ -1446,16 +1482,19 @@ bld_tex(struct bld_context *bld, struct nv_value *dst0[4], uint opcode = translate_opcode(insn->Instruction.Opcode); int arg, dim, c; const int tic = insn->Src[1].Register.Index; - const int tsc = 0; + const int tsc = tic; const int cube = (insn->Texture.Texture == TGSI_TEXTURE_CUBE) ? 1 : 0; get_tex_dim(insn, &dim, &arg); if (!cube && insn->Instruction.Opcode == TGSI_OPCODE_TXP) - load_proj_tex_coords(bld, t, dim, insn); - else + load_proj_tex_coords(bld, t, dim, arg, insn); + else { for (c = 0; c < dim; ++c) t[c] = emit_fetch(bld, insn, 0, c); + if (arg != dim) + t[dim] = emit_fetch(bld, insn, 0, 2); + } if (cube) { assert(dim >= 3); @@ -1470,9 +1509,6 @@ bld_tex(struct bld_context *bld, struct nv_value *dst0[4], t[c] = bld_insn_2(bld, NV_OP_MUL, t[c], s[0]); } - if (arg != dim) - t[dim] = emit_fetch(bld, insn, 0, 2); - if (opcode == NV_OP_TXB || opcode == NV_OP_TXL) { t[arg++] = emit_fetch(bld, insn, 0, 3); @@ -1516,15 +1552,17 @@ static void bld_instruction(struct bld_context *bld, const struct tgsi_full_instruction *insn) { + struct nv50_program *prog = bld->ti->p; + const struct tgsi_full_dst_register *dreg = &insn->Dst[0]; struct nv_value *src0; struct nv_value *src1; struct nv_value *src2; - struct nv_value *dst0[4]; + struct nv_value *dst0[4] = { 0 }; struct nv_value *temp; int c; uint opcode = translate_opcode(insn->Instruction.Opcode); -#ifdef NV50_TGSI2NC_DEBUG +#if NV50_DEBUG & NV50_DEBUG_PROG_IR debug_printf("bld_instruction:"); tgsi_dump_instruction(insn, 1); #endif @@ -1616,6 +1654,23 @@ bld_instruction(struct bld_context *bld, if (insn->Dst[0].Register.WriteMask & 8) dst0[3] = emit_fetch(bld, insn, 1, 3); break; + case TGSI_OPCODE_EXP: + src0 = emit_fetch(bld, insn, 0, 0); + temp = bld_insn_1(bld, NV_OP_FLOOR, src0); + + if (insn->Dst[0].Register.WriteMask & 2) + dst0[1] = bld_insn_2(bld, NV_OP_SUB, src0, temp); + if (insn->Dst[0].Register.WriteMask & 1) { + temp = bld_insn_1(bld, NV_OP_PREEX2, temp); + dst0[0] = bld_insn_1(bld, NV_OP_EX2, temp); + } + if (insn->Dst[0].Register.WriteMask & 4) { + temp = bld_insn_1(bld, NV_OP_PREEX2, src0); + dst0[2] = bld_insn_1(bld, NV_OP_EX2, temp); + } + if (insn->Dst[0].Register.WriteMask & 8) + dst0[3] = bld_imm_f32(bld, 1.0f); + break; case TGSI_OPCODE_EX2: src0 = emit_fetch(bld, insn, 0, 0); temp = bld_insn_1(bld, NV_OP_PREEX2, src0); @@ -1643,6 +1698,8 @@ bld_instruction(struct bld_context *bld, { struct nv_basic_block *b = new_basic_block(bld->pc); + assert(bld->cond_lvl < BLD_MAX_COND_NESTING); + nvbb_attach_block(bld->pc->current_block, b, CFG_EDGE_FORWARD); bld->join_bb[bld->cond_lvl] = bld->pc->current_block; @@ -1676,6 +1733,9 @@ bld_instruction(struct bld_context *bld, { struct nv_basic_block *b = new_basic_block(bld->pc); + if (!nvbb_is_terminated(bld->pc->current_block)) + bld_flow(bld, NV_OP_BRA, NV_CC_TR, NULL, b, FALSE); + --bld->cond_lvl; nvbb_attach_block(bld->pc->current_block, b, bld->out_kind); nvbb_attach_block(bld->cond_bb[bld->cond_lvl], b, CFG_EDGE_FORWARD); @@ -1695,6 +1755,8 @@ bld_instruction(struct bld_context *bld, struct nv_basic_block *bl = new_basic_block(bld->pc); struct nv_basic_block *bb = new_basic_block(bld->pc); + assert(bld->loop_lvl < BLD_MAX_LOOP_NESTING); + bld->loop_bb[bld->loop_lvl] = bl; bld->brkt_bb[bld->loop_lvl] = bb; @@ -1743,7 +1805,8 @@ bld_instruction(struct bld_context *bld, { struct nv_basic_block *bb = bld->loop_bb[bld->loop_lvl - 1]; - bld_flow(bld, NV_OP_BRA, NV_CC_TR, NULL, bb, FALSE); + if (!nvbb_is_terminated(bld->pc->current_block)) + bld_flow(bld, NV_OP_BRA, NV_CC_TR, NULL, bb, FALSE); nvbb_attach_block(bld->pc->current_block, bb, CFG_EDGE_BACK); @@ -1794,6 +1857,24 @@ bld_instruction(struct bld_context *bld, FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) dst0[c] = temp; break; + case TGSI_OPCODE_LOG: + src0 = emit_fetch(bld, insn, 0, 0); + src0 = bld_insn_1(bld, NV_OP_ABS, src0); + temp = bld_insn_1(bld, NV_OP_LG2, src0); + dst0[2] = temp; + if (insn->Dst[0].Register.WriteMask & 3) { + temp = bld_insn_1(bld, NV_OP_FLOOR, temp); + dst0[0] = temp; + } + if (insn->Dst[0].Register.WriteMask & 2) { + temp = bld_insn_1(bld, NV_OP_PREEX2, temp); + temp = bld_insn_1(bld, NV_OP_EX2, temp); + temp = bld_insn_1(bld, NV_OP_RCP, temp); + dst0[1] = bld_insn_2(bld, NV_OP_MUL, src0, temp); + } + if (insn->Dst[0].Register.WriteMask & 8) + dst0[3] = bld_imm_f32(bld, 1.0f); + break; case TGSI_OPCODE_RCP: case TGSI_OPCODE_LG2: src0 = emit_fetch(bld, insn, 0, 0); @@ -1862,6 +1943,7 @@ bld_instruction(struct bld_context *bld, dst0[c] = bld_insn_2(bld, NV_OP_XOR, temp, temp); dst0[c]->insn->cc = NV_CC_EQ; nv_reference(bld->pc, &dst0[c]->insn->flags_src, src1); + dst0[c] = bld_insn_2(bld, NV_OP_SELECT, dst0[c], temp); } break; case TGSI_OPCODE_SUB: @@ -1910,6 +1992,31 @@ bld_instruction(struct bld_context *bld, FOR_EACH_DST0_ENABLED_CHANNEL(c, insn) emit_store(bld, insn, c, dst0[c]); + + if (prog->type == PIPE_SHADER_VERTEX && prog->vp.clpd_nr && + dreg->Register.File == TGSI_FILE_OUTPUT && !dreg->Register.Indirect && + prog->out[dreg->Register.Index].sn == TGSI_SEMANTIC_POSITION) { + + int p; + for (p = 0; p < prog->vp.clpd_nr; p++) { + struct nv_value *clipd = NULL; + + for (c = 0; c < 4; c++) { + temp = new_value(bld->pc, NV_FILE_MEM_C(15), NV_TYPE_F32); + temp->reg.id = p * 4 + c; + temp = bld_insn_1(bld, NV_OP_LDA, temp); + + clipd = clipd ? + bld_insn_3(bld, NV_OP_MAD, dst0[c], temp, clipd) : + bld_insn_2(bld, NV_OP_MUL, dst0[c], temp); + } + + temp = bld_insn_1(bld, NV_OP_MOV, clipd); + temp->reg.file = NV_FILE_OUT; + temp->reg.id = bld->ti->p->vp.clpd + p; + temp->insn->fixed = 1; + } + } } static INLINE void