static INLINE boolean
nvi_isnop(struct nv_instruction *nvi)
{
- if (nvi->opcode == NV_OP_EXPORT)
+ if (nvi->opcode == NV_OP_EXPORT || nvi->opcode == NV_OP_UNDEF)
return TRUE;
if (nvi->fixed ||
}
static void
-nv_pc_pass_pre_emission(struct nv_pc *pc, struct nv_basic_block *b)
+nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b)
{
+ struct nv_pc *pc = (struct nv_pc *)priv;
struct nv_basic_block *in;
struct nv_instruction *nvi, *next;
int j;
uint size, n32 = 0;
- b->priv = 0;
-
for (j = pc->num_blocks - 1; j >= 0 && !pc->bb_list[j]->bin_size; --j);
if (j >= 0) {
in = pc->bb_list[j];
assert(!b->entry || (b->exit && b->exit->is_long));
pc->bin_size += b->bin_size *= 4;
-
- /* descend CFG */
-
- if (!b->out[0])
- return;
- if (!b->out[1] && ++(b->out[0]->priv) != b->out[0]->num_in)
- return;
-
- for (j = 0; j < 2; ++j)
- if (b->out[j] && b->out[j] != b)
- nv_pc_pass_pre_emission(pc, b->out[j]);
}
int
debug_printf("preparing %u blocks for emission\n", pc->num_blocks);
pc->bb_list = CALLOC(pc->num_blocks, sizeof(struct nv_basic_block *));
-
pc->num_blocks = 0;
- nv_pc_pass_pre_emission(pc, pc->root);
+
+ nv_pc_pass_in_order(pc->root, nv_pc_pass_pre_emission, pc);
return 0;
}
return;
assert(src0 && src1);
+ if (src1->value->reg.file == NV_FILE_IMM) {
+ /* should only be present from folding a constant MUL part of a MAD */
+ assert(nvi->opcode == NV_OP_ADD);
+ return;
+ }
+
if (is_cmem_load(src0->value->insn)) {
if (!is_cmem_load(src1->value->insn)) {
nvi->src[0] = src1;
- nvi->src[1] = src0;
- /* debug_printf("swapping cmem load to 1\n"); */
+ nvi->src[1] = src0;
+ /* debug_printf("swapping cmem load to 1\n"); */
}
} else
if (is_smem_load(src1->value->insn)) {
if (!is_smem_load(src0->value->insn)) {
nvi->src[0] = src1;
- nvi->src[1] = src0;
- /* debug_printf("swapping smem load to 0\n"); */
+ nvi->src[1] = src0;
+ /* debug_printf("swapping smem load to 0\n"); */
}
}
if (nvi->def[0]->refc > 1)
continue;
- /* cannot MOV immediate to $oX */
- if (nvi->src[0]->value->reg.file == NV_FILE_IMM)
+ /* cannot write to $oX when using immediate */
+ for (j = 0; j < 4 && nvi->src[j]; ++j)
+ if (nvi->src[j]->value->reg.file == NV_FILE_IMM)
+ break;
+ if (j < 4)
continue;
nvi->def[0] = sti->def[0];
if (is_immd_move(ld) && nv50_nvi_can_use_imm(nvi, j)) {
nv_reference(ctx->pc, &nvi->src[j], ld->src[0]->value);
- debug_printf("folded immediate %i\n", ld->def[0]->n);
continue;
}
return (src->reg.file == NV_FILE_IMM) ? src : NULL;
}
+static void
+modifiers_apply(uint32_t *val, ubyte type, ubyte mod)
+{
+ if (mod & NV_MOD_ABS) {
+ if (type == NV_TYPE_F32)
+ *val &= 0x7fffffff;
+ else
+ if ((*val) & (1 << 31))
+ *val = ~(*val) + 1;
+ }
+ if (mod & NV_MOD_NEG) {
+ if (type == NV_TYPE_F32)
+ *val ^= 0x80000000;
+ else
+ *val = ~(*val) + 1;
+ }
+}
+
+static INLINE uint
+modifiers_opcode(ubyte mod)
+{
+ switch (mod) {
+ case NV_MOD_NEG: return NV_OP_NEG;
+ case NV_MOD_ABS: return NV_OP_ABS;
+ case 0:
+ return NV_OP_MOV;
+ default:
+ return NV_OP_NOP;
+ }
+}
+
+static void
+constant_expression(struct nv_pc *pc, struct nv_instruction *nvi,
+ struct nv_value *src0, struct nv_value *src1)
+{
+ struct nv_value *val;
+ union {
+ float f32;
+ uint32_t u32;
+ int32_t s32;
+ } u0, u1, u;
+ ubyte type;
+
+ if (!nvi->def[0])
+ return;
+ type = nvi->def[0]->reg.type;
+
+ u.u32 = 0;
+ u0.u32 = src0->reg.imm.u32;
+ u1.u32 = src1->reg.imm.u32;
+
+ modifiers_apply(&u0.u32, type, nvi->src[0]->mod);
+ modifiers_apply(&u0.u32, type, nvi->src[1]->mod);
+
+ switch (nvi->opcode) {
+ case NV_OP_MAD:
+ if (nvi->src[2]->value->reg.file != NV_FILE_GPR)
+ return;
+ /* fall through */
+ case NV_OP_MUL:
+ switch (type) {
+ case NV_TYPE_F32: u.f32 = u0.f32 * u1.f32; break;
+ case NV_TYPE_U32: u.u32 = u0.u32 * u1.u32; break;
+ case NV_TYPE_S32: u.s32 = u0.s32 * u1.s32; break;
+ default:
+ assert(0);
+ break;
+ }
+ break;
+ case NV_OP_ADD:
+ switch (type) {
+ case NV_TYPE_F32: u.f32 = u0.f32 + u1.f32; break;
+ case NV_TYPE_U32: u.u32 = u0.u32 + u1.u32; break;
+ case NV_TYPE_S32: u.s32 = u0.s32 + u1.s32; break;
+ default:
+ assert(0);
+ break;
+ }
+ break;
+ case NV_OP_SUB:
+ switch (type) {
+ case NV_TYPE_F32: u.f32 = u0.f32 - u1.f32;
+ case NV_TYPE_U32: u.u32 = u0.u32 - u1.u32;
+ case NV_TYPE_S32: u.s32 = u0.s32 - u1.s32;
+ default:
+ assert(0);
+ break;
+ }
+ break;
+ default:
+ return;
+ }
+
+ nvi->opcode = NV_OP_MOV;
+
+ val = new_value(pc, NV_FILE_IMM, type);
+
+ val->reg.imm.u32 = u.u32;
+
+ nv_reference(pc, &nvi->src[1], NULL);
+ nv_reference(pc, &nvi->src[0], val);
+
+ if (nvi->src[2]) { /* from MAD */
+ nvi->src[1] = nvi->src[0];
+ nvi->src[0] = nvi->src[2];
+ nvi->src[2] = NULL;
+ nvi->opcode = NV_OP_ADD;
+ }
+}
+
static void
constant_operand(struct nv_pc *pc,
struct nv_instruction *nvi, struct nv_value *val, int s)
{
+ union {
+ float f32;
+ uint32_t u32;
+ int32_t s32;
+ } u;
int t = s ? 0 : 1;
+ uint op;
ubyte type;
if (!nvi->def[0])
return;
type = nvi->def[0]->reg.type;
+ u.u32 = val->reg.imm.u32;
+ modifiers_apply(&u.u32, type, nvi->src[s]->mod);
+
switch (nvi->opcode) {
case NV_OP_MUL:
- if ((type == NV_TYPE_F32 && val->reg.imm.f32 == 1.0f) ||
- (NV_TYPE_ISINT(type) && val->reg.imm.u32 == 1)) {
- nvi->opcode = NV_OP_MOV;
+ if ((type == NV_TYPE_F32 && u.f32 == 1.0f) ||
+ (NV_TYPE_ISINT(type) && u.u32 == 1)) {
+ if ((op = modifiers_opcode(nvi->src[t]->mod)) == NV_OP_NOP)
+ break;
+ nvi->opcode = op;
nv_reference(pc, &nvi->src[s], NULL);
- if (!s) {
- nvi->src[0] = nvi->src[1];
- nvi->src[1] = NULL;
- }
+ nvi->src[0] = nvi->src[t];
+ nvi->src[1] = NULL;
} else
- if ((type == NV_TYPE_F32 && val->reg.imm.f32 == 2.0f) ||
- (NV_TYPE_ISINT(type) && val->reg.imm.u32 == 2)) {
+ if ((type == NV_TYPE_F32 && u.f32 == 2.0f) ||
+ (NV_TYPE_ISINT(type) && u.u32 == 2)) {
nvi->opcode = NV_OP_ADD;
nv_reference(pc, &nvi->src[s], nvi->src[t]->value);
+ nvi->src[s]->mod = nvi->src[t]->mod;
} else
- if (type == NV_TYPE_F32 && val->reg.imm.f32 == -1.0f) {
- nvi->opcode = NV_OP_NEG;
+ if (type == NV_TYPE_F32 && u.f32 == -1.0f) {
+ if (nvi->src[t]->mod & NV_MOD_NEG)
+ nvi->opcode = NV_OP_MOV;
+ else
+ nvi->opcode = NV_OP_NEG;
nv_reference(pc, &nvi->src[s], NULL);
nvi->src[0] = nvi->src[t];
nvi->src[1] = NULL;
} else
- if (type == NV_TYPE_F32 && val->reg.imm.f32 == -2.0f) {
+ if (type == NV_TYPE_F32 && u.f32 == -2.0f) {
nvi->opcode = NV_OP_ADD;
- assert(!nvi->src[s]->mod);
nv_reference(pc, &nvi->src[s], nvi->src[t]->value);
- nvi->src[t]->mod ^= NV_MOD_NEG;
- nvi->src[s]->mod |= NV_MOD_NEG;
+ nvi->src[s]->mod = (nvi->src[t]->mod ^= NV_MOD_NEG);
} else
- if (val->reg.imm.u32 == 0) {
+ if (u.u32 == 0) {
nvi->opcode = NV_OP_MOV;
nv_reference(pc, &nvi->src[t], NULL);
if (s) {
}
break;
case NV_OP_ADD:
- if (val->reg.imm.u32 == 0) {
- nvi->opcode = NV_OP_MOV;
+ if (u.u32 == 0) {
+ if ((op = modifiers_opcode(nvi->src[t]->mod)) == NV_OP_NOP)
+ break;
+ nvi->opcode = op;
nv_reference(pc, &nvi->src[s], NULL);
nvi->src[0] = nvi->src[t];
nvi->src[1] = NULL;
}
break;
+ case NV_OP_RCP:
+ u.f32 = 1.0f / u.f32;
+ (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32;
+ nvi->opcode = NV_OP_MOV;
+ assert(s == 0);
+ nv_reference(pc, &nvi->src[0], val);
+ break;
+ case NV_OP_RSQ:
+ u.f32 = 1.0f / sqrtf(u.f32);
+ (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32;
+ nvi->opcode = NV_OP_MOV;
+ assert(s == 0);
+ nv_reference(pc, &nvi->src[0], val);
+ break;
default:
break;
}
next = nvi->next;
- if ((src = find_immediate(nvi->src[0])) != NULL)
- constant_operand(ctx->pc, nvi, src, 0);
- else
- if ((src = find_immediate(nvi->src[1])) != NULL)
- constant_operand(ctx->pc, nvi, src, 1);
+ src0 = find_immediate(nvi->src[0]);
+ src1 = find_immediate(nvi->src[1]);
+
+ if (src0 && src1)
+ constant_expression(ctx->pc, nvi, src0, src1);
+ else {
+ if (src0)
+ constant_operand(ctx->pc, nvi, src0, 0);
+ else
+ if (src1)
+ constant_operand(ctx->pc, nvi, src1, 1);
+ }
/* try to combine MUL, ADD into MAD */
if (nvi->opcode != NV_OP_ADD)
break;
if (it) {
-#if 1
- nvcg_replace_value(ctx->pc, ld->def[0], it->value);
-#else
- ld->opcode = NV_OP_MOV;
- nv_reference(ctx->pc, &ld->src[0], it->value);
-#endif
+ if (ld->def[0]->reg.id >= 0)
+ it->value = ld->def[0];
+ else
+ nvcg_replace_value(ctx->pc, ld->def[0], it->value);
} else {
if (ctx->alloc == LOAD_RECORD_POOL_SIZE)
continue;
int j;
struct nv_instruction *nvi, *next;
- for (nvi = b->entry; nvi; nvi = next) {
+ for (nvi = b->phi ? b->phi : b->entry; nvi; nvi = next) {
next = nvi->next;
if (inst_cullable(nvi)) {
if (ik->src[4] || ir->src[4])
continue; /* don't mess with address registers */
+ if (ik->flags_src || ir->flags_src ||
+ ik->flags_def || ir->flags_def)
+ continue; /* and also not with flags, for now */
+
for (s = 0; s < 3; ++s) {
struct nv_value *a, *b;