}
}
- assert(0);
+ NOUVEAU_ERR("out of registers\n");
+ abort();
}
static INLINE struct nv50_reg *
}
}
- assert(0);
+ NOUVEAU_ERR("out of registers\n");
+ abort();
return NULL;
}
e->inst[1] |= ((src->hw & 127) << 14);
}
+static void
+set_half_src(struct nv50_pc *pc, struct nv50_reg *src, int lh,
+ struct nv50_program_exec *e, int pos)
+{
+ struct nv50_reg *r = src;
+
+ alloc_reg(pc, r);
+ if (r->type != P_TEMP) {
+ r = temp_temp(pc, e);
+ emit_mov(pc, r, src);
+ }
+
+ if (r->hw > (NV50_SU_MAX_TEMP / 2)) {
+ NOUVEAU_ERR("out of low GPRs\n");
+ abort();
+ }
+
+ e->inst[pos / 32] |= ((src->hw * 2) + lh) << (pos % 32);
+}
+
static void
emit_mov_from_pred(struct nv50_pc *pc, struct nv50_reg *dst, int pred)
{
emit(pc, e);
}
+static void
+emit_not(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] = 0xd0000000;
+ e->inst[1] = 0x0402c000;
+ set_long(pc, e);
+ set_dst(pc, dst, e);
+ set_src_1(pc, src, e);
+
+ emit(pc, e);
+}
+
static void
emit_shift(struct nv50_pc *pc, struct nv50_reg *dst,
struct nv50_reg *src0, struct nv50_reg *src1, unsigned dir)
emit(pc, e);
}
+static void
+emit_shl_imm(struct nv50_pc *pc, struct nv50_reg *dst,
+ struct nv50_reg *src, int s)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] = 0x30000000;
+ e->inst[1] = 0xc4100000;
+ if (s < 0) {
+ e->inst[1] |= 1 << 29;
+ s = -s;
+ }
+ e->inst[1] |= ((s & 0x7f) << 16);
+
+ set_long(pc, e);
+ set_dst(pc, dst, e);
+ set_src_0(pc, src, e);
+
+ emit(pc, e);
+}
+
static void
emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
struct nv50_reg *src1, struct nv50_reg *src2)
emit(pc, e);
}
+static void
+emit_mad_u16(struct nv50_pc *pc, struct nv50_reg *dst,
+ struct nv50_reg *src0, int lh_0, struct nv50_reg *src1, int lh_1,
+ struct nv50_reg *src2)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] = 0x60000000;
+ if (!pc->allow32)
+ set_long(pc, e);
+ set_dst(pc, dst, e);
+
+ set_half_src(pc, src0, lh_0, e, 9);
+ set_half_src(pc, src1, lh_1, e, 16);
+ alloc_reg(pc, src2);
+ if (is_long(e) || (src2->type != P_TEMP) || (src2->hw != dst->hw))
+ set_src_2(pc, src2, e);
+
+ emit(pc, e);
+}
+
+static void
+emit_mul_u16(struct nv50_pc *pc, struct nv50_reg *dst,
+ struct nv50_reg *src0, int lh_0, struct nv50_reg *src1, int lh_1)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] = 0x40000000;
+ set_long(pc, e);
+ set_dst(pc, dst, e);
+
+ set_half_src(pc, src0, lh_0, e, 9);
+ set_half_src(pc, src1, lh_1, e, 16);
+
+ emit(pc, e);
+}
+
static void
emit_sad(struct nv50_pc *pc, struct nv50_reg *dst,
struct nv50_reg *src0, struct nv50_reg *src1, struct nv50_reg *src2)
struct nv50_program_exec *e = exec(pc);
e->inst[0] = 0x50000000;
+ if (!pc->allow32)
+ set_long(pc, e);
+ check_swap_src_0_1(pc, &src0, &src1);
set_dst(pc, dst, e);
set_src_0(pc, src0, e);
set_src_1(pc, src1, e);
e->inst[1] |= 0x0c << 24;
else
e->inst[0] |= 0x81 << 8;
+
+ emit(pc, e);
}
static INLINE void
case 0x5:
/* SAD */
m = ~(0x81 << 8);
- q = 0x0c << 24;
+ q = (0x0c << 24) | ((e->inst[0] & (0x7f << 2)) << 12);
+ break;
+ case 0x6:
+ /* MAD u16 */
+ q = (e->inst[0] & (0x7f << 2)) << 12;
break;
case 0x8:
/* INTERP (move centroid, perspective and flat bits) */
case TGSI_OPCODE_IMAX:
case TGSI_OPCODE_IMIN:
case TGSI_OPCODE_ISHR:
+ case TGSI_OPCODE_NOT:
+ case TGSI_OPCODE_UMAD:
case TGSI_OPCODE_UMAX:
case TGSI_OPCODE_UMIN:
+ case TGSI_OPCODE_UMUL:
case TGSI_OPCODE_USHR:
return NV50_MOD_I32;
default:
emit_mul(pc, dst[c], src[0][c], src[1][c]);
}
break;
+ case TGSI_OPCODE_NOT:
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_not(pc, dst[c], src[0][c]);
+ }
+ break;
case TGSI_OPCODE_POW:
emit_pow(pc, brdc, src[0][0], src[1][0]);
break;
emit_minmax(pc, 0x0a4, dst[c], src[0][c], src[1][c]);
}
break;
+ case TGSI_OPCODE_UMAD:
+ {
+ assert(!temp);
+ temp = temp_temp(pc, NULL);
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_mul_u16(pc, temp, src[0][c], 0, src[1][c], 1);
+ emit_mad_u16(pc, temp, src[0][c], 1, src[1][c], 0,
+ temp);
+ emit_shl_imm(pc, temp, temp, 16);
+ emit_mad_u16(pc, temp, src[0][c], 0, src[1][c], 0,
+ temp);
+ emit_add_b32(pc, dst[c], temp, src[2][c]);
+ }
+ }
+ break;
+ case TGSI_OPCODE_UMUL:
+ {
+ assert(!temp);
+ temp = temp_temp(pc, NULL);
+ for (c = 0; c < 4; c++) {
+ if (!(mask & (1 << c)))
+ continue;
+ emit_mul_u16(pc, temp, src[0][c], 0, src[1][c], 1);
+ emit_mad_u16(pc, temp, src[0][c], 1, src[1][c], 0,
+ temp);
+ emit_shl_imm(pc, temp, temp, 16);
+ emit_mad_u16(pc, dst[c], src[0][c], 0, src[1][c], 0,
+ temp);
+ }
+ }
+ break;
case TGSI_OPCODE_XPD:
temp = temp_temp(pc, NULL);
if (mask & (1 << 0)) {