struct ir3_register *src1 = instr->regs[1];
struct ir3_register *src2 = instr->regs[2];
instr_cat2_t *cat2 = ptr;
+ unsigned absneg = ir3_cat2_absneg(instr->opc);
iassert((instr->regs_count == 2) || (instr->regs_count == 3));
if (src1->flags & IR3_REG_RELATIV) {
iassert(src1->num < (1 << 10));
cat2->rel1.src1 = reg(src1, info, instr->repeat,
- IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_NEGATE |
- IR3_REG_ABS | IR3_REG_R | IR3_REG_HALF);
+ IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
+ IR3_REG_HALF | absneg);
cat2->rel1.src1_c = !!(src1->flags & IR3_REG_CONST);
cat2->rel1.src1_rel = 1;
} else if (src1->flags & IR3_REG_CONST) {
iassert(src1->num < (1 << 12));
cat2->c1.src1 = reg(src1, info, instr->repeat,
- IR3_REG_CONST | IR3_REG_NEGATE | IR3_REG_ABS |
- IR3_REG_R | IR3_REG_HALF);
+ IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF);
cat2->c1.src1_c = 1;
} else {
iassert(src1->num < (1 << 11));
cat2->src1 = reg(src1, info, instr->repeat,
- IR3_REG_IMMED | IR3_REG_NEGATE | IR3_REG_ABS |
- IR3_REG_R | IR3_REG_HALF);
+ IR3_REG_IMMED | IR3_REG_R | IR3_REG_HALF |
+ absneg);
}
cat2->src1_im = !!(src1->flags & IR3_REG_IMMED);
- cat2->src1_neg = !!(src1->flags & IR3_REG_NEGATE);
- cat2->src1_abs = !!(src1->flags & IR3_REG_ABS);
+ cat2->src1_neg = !!(src1->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT));
+ cat2->src1_abs = !!(src1->flags & (IR3_REG_FABS | IR3_REG_SABS));
cat2->src1_r = !!(src1->flags & IR3_REG_R);
if (src2) {
if (src2->flags & IR3_REG_RELATIV) {
iassert(src2->num < (1 << 10));
cat2->rel2.src2 = reg(src2, info, instr->repeat,
- IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_NEGATE |
- IR3_REG_ABS | IR3_REG_R | IR3_REG_HALF);
+ IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
+ IR3_REG_HALF | absneg);
cat2->rel2.src2_c = !!(src2->flags & IR3_REG_CONST);
cat2->rel2.src2_rel = 1;
} else if (src2->flags & IR3_REG_CONST) {
iassert(src2->num < (1 << 12));
cat2->c2.src2 = reg(src2, info, instr->repeat,
- IR3_REG_CONST | IR3_REG_NEGATE | IR3_REG_ABS |
- IR3_REG_R | IR3_REG_HALF);
+ IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF);
cat2->c2.src2_c = 1;
} else {
iassert(src2->num < (1 << 11));
cat2->src2 = reg(src2, info, instr->repeat,
- IR3_REG_IMMED | IR3_REG_NEGATE | IR3_REG_ABS |
- IR3_REG_R | IR3_REG_HALF);
+ IR3_REG_IMMED | IR3_REG_R | IR3_REG_HALF |
+ absneg);
}
cat2->src2_im = !!(src2->flags & IR3_REG_IMMED);
- cat2->src2_neg = !!(src2->flags & IR3_REG_NEGATE);
- cat2->src2_abs = !!(src2->flags & IR3_REG_ABS);
+ cat2->src2_neg = !!(src2->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT));
+ cat2->src2_abs = !!(src2->flags & (IR3_REG_FABS | IR3_REG_SABS));
cat2->src2_r = !!(src2->flags & IR3_REG_R);
}
struct ir3_register *src1 = instr->regs[1];
struct ir3_register *src2 = instr->regs[2];
struct ir3_register *src3 = instr->regs[3];
+ unsigned absneg = ir3_cat3_absneg(instr->opc);
instr_cat3_t *cat3 = ptr;
uint32_t src_flags = 0;
if (src1->flags & IR3_REG_RELATIV) {
iassert(src1->num < (1 << 10));
cat3->rel1.src1 = reg(src1, info, instr->repeat,
- IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_NEGATE |
- IR3_REG_R | IR3_REG_HALF);
+ IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
+ IR3_REG_HALF | absneg);
cat3->rel1.src1_c = !!(src1->flags & IR3_REG_CONST);
cat3->rel1.src1_rel = 1;
} else if (src1->flags & IR3_REG_CONST) {
iassert(src1->num < (1 << 12));
cat3->c1.src1 = reg(src1, info, instr->repeat,
- IR3_REG_CONST | IR3_REG_NEGATE | IR3_REG_R |
- IR3_REG_HALF);
+ IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF);
cat3->c1.src1_c = 1;
} else {
iassert(src1->num < (1 << 11));
cat3->src1 = reg(src1, info, instr->repeat,
- IR3_REG_NEGATE | IR3_REG_R | IR3_REG_HALF);
+ IR3_REG_R | IR3_REG_HALF | absneg);
}
- cat3->src1_neg = !!(src1->flags & IR3_REG_NEGATE);
+ cat3->src1_neg = !!(src1->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT));
cat3->src1_r = !!(src1->flags & IR3_REG_R);
cat3->src2 = reg(src2, info, instr->repeat,
- IR3_REG_CONST | IR3_REG_NEGATE |
- IR3_REG_R | IR3_REG_HALF);
+ IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF | absneg);
cat3->src2_c = !!(src2->flags & IR3_REG_CONST);
- cat3->src2_neg = !!(src2->flags & IR3_REG_NEGATE);
+ cat3->src2_neg = !!(src2->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT));
cat3->src2_r = !!(src2->flags & IR3_REG_R);
if (src3->flags & IR3_REG_RELATIV) {
iassert(src3->num < (1 << 10));
cat3->rel2.src3 = reg(src3, info, instr->repeat,
- IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_NEGATE |
- IR3_REG_R | IR3_REG_HALF);
+ IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
+ IR3_REG_HALF | absneg);
cat3->rel2.src3_c = !!(src3->flags & IR3_REG_CONST);
cat3->rel2.src3_rel = 1;
} else if (src3->flags & IR3_REG_CONST) {
iassert(src3->num < (1 << 12));
cat3->c2.src3 = reg(src3, info, instr->repeat,
- IR3_REG_CONST | IR3_REG_NEGATE | IR3_REG_R |
- IR3_REG_HALF);
+ IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF);
cat3->c2.src3_c = 1;
} else {
iassert(src3->num < (1 << 11));
cat3->src3 = reg(src3, info, instr->repeat,
- IR3_REG_NEGATE | IR3_REG_R | IR3_REG_HALF);
+ IR3_REG_R | IR3_REG_HALF | absneg);
}
- cat3->src3_neg = !!(src3->flags & IR3_REG_NEGATE);
+ cat3->src3_neg = !!(src3->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT));
cat3->src3_r = !!(src3->flags & IR3_REG_R);
cat3->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
if (src->flags & IR3_REG_RELATIV) {
iassert(src->num < (1 << 10));
cat4->rel.src = reg(src, info, instr->repeat,
- IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_NEGATE |
- IR3_REG_ABS | IR3_REG_R | IR3_REG_HALF);
+ IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_FNEG |
+ IR3_REG_FABS | IR3_REG_R | IR3_REG_HALF);
cat4->rel.src_c = !!(src->flags & IR3_REG_CONST);
cat4->rel.src_rel = 1;
} else if (src->flags & IR3_REG_CONST) {
iassert(src->num < (1 << 12));
cat4->c.src = reg(src, info, instr->repeat,
- IR3_REG_CONST | IR3_REG_NEGATE | IR3_REG_ABS |
+ IR3_REG_CONST | IR3_REG_FNEG | IR3_REG_FABS |
IR3_REG_R | IR3_REG_HALF);
cat4->c.src_c = 1;
} else {
iassert(src->num < (1 << 11));
cat4->src = reg(src, info, instr->repeat,
- IR3_REG_IMMED | IR3_REG_NEGATE | IR3_REG_ABS |
+ IR3_REG_IMMED | IR3_REG_FNEG | IR3_REG_FABS |
IR3_REG_R | IR3_REG_HALF);
}
cat4->src_im = !!(src->flags & IR3_REG_IMMED);
- cat4->src_neg = !!(src->flags & IR3_REG_NEGATE);
- cat4->src_abs = !!(src->flags & IR3_REG_ABS);
+ cat4->src_neg = !!(src->flags & IR3_REG_FNEG);
+ cat4->src_abs = !!(src->flags & IR3_REG_FABS);
cat4->src_r = !!(src->flags & IR3_REG_R);
cat4->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
IR3_REG_HALF = 0x004,
IR3_REG_RELATIV= 0x008,
IR3_REG_R = 0x010,
- IR3_REG_NEGATE = 0x020,
- IR3_REG_ABS = 0x040,
- IR3_REG_EVEN = 0x080,
- IR3_REG_POS_INF= 0x100,
+ /* Most instructions, it seems, can do float abs/neg but not
+ * integer. The CP pass needs to know what is intended (int or
+ * float) in order to do the right thing. For this reason the
+ * abs/neg flags are split out into float and int variants. In
+ * addition, .b (bitwise) operations, the negate is actually a
+ * bitwise not, so split that out into a new flag to make it
+ * more clear.
+ */
+ IR3_REG_FNEG = 0x020,
+ IR3_REG_FABS = 0x040,
+ IR3_REG_SNEG = 0x080,
+ IR3_REG_SABS = 0x100,
+ IR3_REG_BNOT = 0x200,
+ IR3_REG_EVEN = 0x400,
+ IR3_REG_POS_INF= 0x800,
/* (ei) flag, end-input? Set on last bary, presumably to signal
* that the shader needs no more input:
*/
- IR3_REG_EI = 0x200,
+ IR3_REG_EI = 0x1000,
/* meta-flags, for intermediate stages of IR, ie.
* before register assignment is done:
*/
- IR3_REG_SSA = 0x1000, /* 'instr' is ptr to assigning instr */
- IR3_REG_IA = 0x2000, /* meta-input dst is "assigned" */
- IR3_REG_ADDR = 0x4000, /* register is a0.x */
+ IR3_REG_SSA = 0x2000, /* 'instr' is ptr to assigning instr */
+ IR3_REG_IA = 0x4000, /* meta-input dst is "assigned" */
+ IR3_REG_ADDR = 0x8000, /* register is a0.x */
} flags;
union {
/* normal registers:
return true;
}
+/* some cat2 instructions (ie. those which are not float can embed an
+ * immediate:
+ */
+static inline bool ir3_cat2_immed(opc_t opc)
+{
+ switch (opc) {
+ case OPC_ADD_U:
+ case OPC_ADD_S:
+ case OPC_SUB_U:
+ case OPC_SUB_S:
+ case OPC_CMPS_U:
+ case OPC_CMPS_S:
+ case OPC_MIN_U:
+ case OPC_MIN_S:
+ case OPC_MAX_U:
+ case OPC_MAX_S:
+ case OPC_CMPV_U:
+ case OPC_CMPV_S:
+ case OPC_MUL_U:
+ case OPC_MUL_S:
+ case OPC_MULL_U:
+ case OPC_CLZ_S:
+ case OPC_ABSNEG_S:
+ case OPC_AND_B:
+ case OPC_OR_B:
+ case OPC_NOT_B:
+ case OPC_XOR_B:
+ case OPC_BFREV_B:
+ case OPC_CLZ_B:
+ case OPC_SHL_B:
+ case OPC_SHR_B:
+ case OPC_ASHR_B:
+ case OPC_MGEN_B:
+ case OPC_GETBIT_B:
+ case OPC_CBITS_B:
+ case OPC_BARY_F:
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+
+/* map cat2 instruction to valid abs/neg flags: */
+static inline unsigned ir3_cat2_absneg(opc_t opc)
+{
+ switch (opc) {
+ case OPC_ADD_F:
+ case OPC_MIN_F:
+ case OPC_MAX_F:
+ case OPC_MUL_F:
+ case OPC_SIGN_F:
+ case OPC_CMPS_F:
+ case OPC_ABSNEG_F:
+ case OPC_CMPV_F:
+ case OPC_FLOOR_F:
+ case OPC_CEIL_F:
+ case OPC_RNDNE_F:
+ case OPC_RNDAZ_F:
+ case OPC_TRUNC_F:
+ case OPC_BARY_F:
+ return IR3_REG_FABS | IR3_REG_FNEG;
+
+ case OPC_ADD_U:
+ case OPC_ADD_S:
+ case OPC_SUB_U:
+ case OPC_SUB_S:
+ case OPC_CMPS_U:
+ case OPC_CMPS_S:
+ case OPC_MIN_U:
+ case OPC_MIN_S:
+ case OPC_MAX_U:
+ case OPC_MAX_S:
+ case OPC_CMPV_U:
+ case OPC_CMPV_S:
+ case OPC_MUL_U:
+ case OPC_MUL_S:
+ case OPC_MULL_U:
+ case OPC_CLZ_S:
+ return 0;
+
+ case OPC_ABSNEG_S:
+ return IR3_REG_SABS | IR3_REG_SNEG;
+
+ case OPC_AND_B:
+ case OPC_OR_B:
+ case OPC_NOT_B:
+ case OPC_XOR_B:
+ case OPC_BFREV_B:
+ case OPC_CLZ_B:
+ case OPC_SHL_B:
+ case OPC_SHR_B:
+ case OPC_ASHR_B:
+ case OPC_MGEN_B:
+ case OPC_GETBIT_B:
+ case OPC_CBITS_B:
+ return IR3_REG_BNOT;
+
+ default:
+ return 0;
+ }
+}
+
+/* map cat3 instructions to valid abs/neg flags: */
+static inline unsigned ir3_cat3_absneg(opc_t opc)
+{
+ switch (opc) {
+ case OPC_MAD_F16:
+ case OPC_MAD_F32:
+ case OPC_SEL_F16:
+ case OPC_SEL_F32:
+ return IR3_REG_FNEG;
+
+ case OPC_MAD_U16:
+ case OPC_MADSH_U16:
+ case OPC_MAD_S16:
+ case OPC_MADSH_M16:
+ case OPC_MAD_U24:
+ case OPC_MAD_S24:
+ case OPC_SEL_S16:
+ case OPC_SEL_S32:
+ case OPC_SAD_S16:
+ case OPC_SAD_S32:
+ /* neg *may* work on 3rd src.. */
+
+ case OPC_SEL_B16:
+ case OPC_SEL_B32:
+
+ default:
+ return 0;
+ }
+}
+
#define array_insert(arr, val) do { \
if (arr ## _count == arr ## _sz) { \
arr ## _sz = MAX2(2 * arr ## _sz, 16); \
else
compile_assert(ctx, src->Index < (1 << 6));
+ /* NOTE: abs/neg modifiers in tgsi only apply to float */
if (src->Absolute)
- flags |= IR3_REG_ABS;
+ flags |= IR3_REG_FABS;
if (src->Negate)
- flags |= IR3_REG_NEGATE;
+ flags |= IR3_REG_FNEG;
if (src->Indirect) {
flags |= IR3_REG_RELATIV;
} else {
reg = add_src_reg(ctx, cur, src, src_swiz(src, i));
}
- reg->flags |= flags & ~IR3_REG_NEGATE;
- if (flags & IR3_REG_NEGATE)
- reg->flags ^= IR3_REG_NEGATE;
+ reg->flags |= flags & ~(IR3_REG_FNEG | IR3_REG_SNEG);
+ if (flags & IR3_REG_FNEG)
+ reg->flags ^= IR3_REG_FNEG;
+ if (flags & IR3_REG_SNEG)
+ reg->flags ^= IR3_REG_SNEG;
}
va_end(ap);
}
case TGSI_OPCODE_FSLT:
/* absneg.s dst, (neg)tmp0 */
instr = instr_create(ctx, 2, OPC_ABSNEG_S);
- vectorize(ctx, instr, dst, 1, tmp_src, IR3_REG_NEGATE);
+ vectorize(ctx, instr, dst, 1, tmp_src, IR3_REG_SNEG);
break;
case TGSI_OPCODE_CMP:
a1 = &inst->Src[1].Register;
/* absneg.s dst, (neg)tmp */
instr = instr_create(ctx, 2, OPC_ABSNEG_S);
- vectorize(ctx, instr, dst, 1, tmp_src, IR3_REG_NEGATE);
+ vectorize(ctx, instr, dst, 1, tmp_src, IR3_REG_SNEG);
put_dst(ctx, inst, dst);
}
if (type_sint(src_type)) {
/* absneg.f af, (abs)af */
instr = instr_create(ctx, 2, OPC_ABSNEG_F);
- vectorize(ctx, instr, &af_dst, 1, af_src, IR3_REG_ABS);
+ vectorize(ctx, instr, &af_dst, 1, af_src, IR3_REG_FABS);
/* absneg.f bf, (abs)bf */
instr = instr_create(ctx, 2, OPC_ABSNEG_F);
- vectorize(ctx, instr, &bf_dst, 1, bf_src, IR3_REG_ABS);
+ vectorize(ctx, instr, &bf_dst, 1, bf_src, IR3_REG_FABS);
/* absneg.s a, (abs)numerator */
instr = instr_create(ctx, 2, OPC_ABSNEG_S);
- vectorize(ctx, instr, &a_dst, 1, a, IR3_REG_ABS);
+ vectorize(ctx, instr, &a_dst, 1, a, IR3_REG_SABS);
/* absneg.s b, (abs)denominator */
instr = instr_create(ctx, 2, OPC_ABSNEG_S);
- vectorize(ctx, instr, &b_dst, 1, b, IR3_REG_ABS);
+ vectorize(ctx, instr, &b_dst, 1, b, IR3_REG_SABS);
} else {
/* mov.u32u32 a, numerator */
instr = instr_create(ctx, 1, 0);
/* absneg.s b, (neg)q */
instr = instr_create(ctx, 2, OPC_ABSNEG_S);
- vectorize(ctx, instr, &b_dst, 1, q_src, IR3_REG_NEGATE);
+ vectorize(ctx, instr, &b_dst, 1, q_src, IR3_REG_SNEG);
/* sel.b dst, b, r, q */
instr = instr_create(ctx, 3, OPC_SEL_B32);
switch (t->tgsi_opc) {
case TGSI_OPCODE_ABS:
+ src0_flags = IR3_REG_FABS;
+ break;
case TGSI_OPCODE_IABS:
- src0_flags = IR3_REG_ABS;
+ src0_flags = IR3_REG_SABS;
break;
case TGSI_OPCODE_INEG:
- src0_flags = IR3_REG_NEGATE;
+ src0_flags = IR3_REG_SNEG;
break;
case TGSI_OPCODE_SUB:
- src1_flags = IR3_REG_NEGATE;
+ src1_flags = IR3_REG_FNEG;
break;
}