FREE(src);
}
+/* release the hardware resource held by r */
+static void
+release_hw(struct nv50_pc *pc, struct nv50_reg *r)
+{
+ assert(r->type == P_TEMP);
+ if (r->hw == -1)
+ return;
+
+ assert(pc->r_temp[r->hw] == r);
+ pc->r_temp[r->hw] = NULL;
+
+ r->acc = 0;
+ if (r->index == -1)
+ FREE(r);
+}
+
static void
free_temp(struct nv50_pc *pc, struct nv50_reg *r)
{
static INLINE void
set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e)
{
- assert(imm->hw >= pc->param_nr * 4);
- unsigned val = fui(pc->immd_buf[imm->hw - pc->param_nr * 4]);
+ float f = pc->immd_buf[imm->hw];
+ unsigned val = fui(imm->neg ? -f : f);
set_long(pc, e);
/*XXX: can't be predicated - bits overlap.. catch cases where both
struct nv50_program_exec *e)
{
set_long(pc, e);
-#if 1
- e->inst[1] |= (1 << 22);
-#else
- if (src->type == P_IMMD) {
- e->inst[1] |= (NV50_CB_PMISC << 22);
- } else {
- if (pc->p->type == PIPE_SHADER_VERTEX)
- e->inst[1] |= (NV50_CB_PVP << 22);
- else
- e->inst[1] |= (NV50_CB_PFP << 22);
- }
-#endif
e->param.index = src->hw;
e->param.shift = s;
e->param.mask = m << (s % 32);
+
+ e->inst[1] |= (((src->type == P_IMMD) ? 0 : 1) << 22);
}
static void
check_swap_src_0_1(pc, &src0, &src1);
set_dst(pc, dst, e);
set_src_0(pc, src0, e);
- if (src1->type == P_IMMD && !is_long(e))
+ if (src1->type == P_IMMD && !is_long(e)) {
+ if (src0->neg)
+ e->inst[0] |= 0x00008000;
set_immd(pc, src1, e);
- else
+ } else {
set_src_1(pc, src1, e);
+ if (src0->neg ^ src1->neg) {
+ if (is_long(e))
+ e->inst[1] |= 0x08000000;
+ else
+ e->inst[0] |= 0x00008000;
+ }
+ }
emit(pc, e);
}
e->inst[0] |= 0xb0000000;
- if (!pc->allow32)
+ check_swap_src_0_1(pc, &src0, &src1);
+
+ if (!pc->allow32 || src0->neg || src1->neg) {
set_long(pc, e);
+ e->inst[1] |= (src0->neg << 26) | (src1->neg << 27);
+ }
- check_swap_src_0_1(pc, &src0, &src1);
set_dst(pc, dst, e);
set_src_0(pc, src0, e);
- if (is_long(e) || src1->type == P_CONST || src1->type == P_ATTR)
+ if (src1->type == P_CONST || src1->type == P_ATTR || is_long(e))
set_src_2(pc, src1, e);
else
if (src1->type == P_IMMD)
emit(pc, e);
}
-static void
+static INLINE void
emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
struct nv50_reg *src1)
{
- struct nv50_program_exec *e = exec(pc);
-
- e->inst[0] |= 0xb0000000;
-
- set_long(pc, e);
- if (check_swap_src_0_1(pc, &src0, &src1))
- e->inst[1] |= 0x04000000;
- else
- e->inst[1] |= 0x08000000;
-
- set_dst(pc, dst, e);
- set_src_0(pc, src0, e);
- set_src_2(pc, src1, e);
-
- emit(pc, e);
+ src1->neg ^= 1;
+ emit_add(pc, dst, src0, src1);
+ src1->neg ^= 1;
}
static void
set_src_1(pc, src1, e);
set_src_2(pc, src2, e);
+ if (src0->neg ^ src1->neg)
+ e->inst[1] |= 0x04000000;
+ if (src2->neg)
+ e->inst[1] |= 0x08000000;
+
emit(pc, e);
}
-static void
+static INLINE void
emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
struct nv50_reg *src1, struct nv50_reg *src2)
{
- struct nv50_program_exec *e = exec(pc);
-
- e->inst[0] |= 0xe0000000;
- set_long(pc, e);
- e->inst[1] |= 0x08000000; /* src0 * src1 - src2 */
-
- check_swap_src_0_1(pc, &src0, &src1);
- set_dst(pc, dst, e);
- set_src_0(pc, src0, e);
- set_src_1(pc, src1, e);
- set_src_2(pc, src2, e);
-
- emit(pc, e);
+ src2->neg ^= 1;
+ emit_mad(pc, dst, src0, src1, src2);
+ src2->neg ^= 1;
}
static void
emit(pc, e);
}
+#define CVTOP_RN 0x01
+#define CVTOP_FLOOR 0x03
+#define CVTOP_CEIL 0x05
+#define CVTOP_TRUNC 0x07
+#define CVTOP_SAT 0x08
+#define CVTOP_ABS 0x10
+
+#define CVT_F32_F32 0xc4
+#define CVT_F32_S32 0x44
+#define CVT_F32_U32 0x64
+#define CVT_S32_F32 0x8c
+#define CVT_S32_S32 0x0c
+#define CVT_F32_F32_ROP 0xcc
+
+static void
+emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src,
+ int wp, unsigned cop, unsigned fmt)
+{
+ struct nv50_program_exec *e;
+
+ e = exec(pc);
+ set_long(pc, e);
+
+ e->inst[0] |= 0xa0000000;
+ e->inst[1] |= 0x00004000;
+ e->inst[1] |= (cop << 16);
+ e->inst[1] |= (fmt << 24);
+ set_src_0(pc, src, e);
+
+ if (wp >= 0)
+ set_pred_wr(pc, 1, wp, e);
+
+ if (dst)
+ set_dst(pc, dst, e);
+ else {
+ e->inst[0] |= 0x000001fc;
+ e->inst[1] |= 0x00000008;
+ }
+
+ emit(pc, e);
+}
+
static void
emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst,
struct nv50_reg *src0, struct nv50_reg *src1)
free_temp(pc, dst);
}
-static void
+static INLINE void
emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
{
- struct nv50_program_exec *e = exec(pc);
-
- e->inst[0] = 0xa0000000; /* cvt */
- set_long(pc, e);
- e->inst[1] |= (6 << 29); /* cvt */
- e->inst[1] |= 0x08000000; /* integer mode */
- e->inst[1] |= 0x04000000; /* 32 bit */
- e->inst[1] |= ((0x1 << 3)) << 14; /* .rn */
- e->inst[1] |= (1 << 14); /* src .f32 */
- set_dst(pc, dst, e);
- set_src_0(pc, src, e);
-
- emit(pc, e);
+ emit_cvt(pc, dst, src, -1, CVTOP_FLOOR, CVT_F32_F32_ROP);
}
static void
free_temp(pc, temp);
}
-static void
+static INLINE void
emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
{
- struct nv50_program_exec *e = exec(pc);
-
- e->inst[0] = 0xa0000000; /* cvt */
- set_long(pc, e);
- e->inst[1] |= (6 << 29); /* cvt */
- e->inst[1] |= 0x04000000; /* 32 bit */
- e->inst[1] |= (1 << 14); /* src .f32 */
- e->inst[1] |= ((1 << 6) << 14); /* .abs */
- set_dst(pc, dst, e);
- set_src_0(pc, src, e);
-
- emit(pc, e);
+ emit_cvt(pc, dst, src, -1, CVTOP_ABS, CVT_F32_F32);
}
static void
e->inst[1] = 0xc4014788;
set_src_0(pc, src, e);
set_pred_wr(pc, 1, r_pred, e);
+ if (src->neg)
+ e->inst[1] |= 0x20000000;
emit(pc, e);
/* This is probably KILP */
emit(pc, e);
}
+static void
+emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
+ struct nv50_reg **src, unsigned unit, unsigned type, boolean proj)
+{
+ struct nv50_reg *temp, *t[4];
+ struct nv50_program_exec *e;
+
+ unsigned c, mode, dim;
+
+ switch (type) {
+ case TGSI_TEXTURE_1D:
+ dim = 1;
+ break;
+ case TGSI_TEXTURE_UNKNOWN:
+ case TGSI_TEXTURE_2D:
+ case TGSI_TEXTURE_SHADOW1D: /* XXX: x, z */
+ case TGSI_TEXTURE_RECT:
+ dim = 2;
+ break;
+ case TGSI_TEXTURE_3D:
+ case TGSI_TEXTURE_CUBE:
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_SHADOWRECT: /* XXX */
+ dim = 3;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ alloc_temp4(pc, t, 0);
+
+ if (proj) {
+ if (src[0]->type == P_TEMP && src[0]->rhw != -1) {
+ mode = pc->interp_mode[src[0]->index];
+
+ t[3]->rhw = src[3]->rhw;
+ emit_interp(pc, t[3], NULL, (mode & INTERP_CENTROID));
+ emit_flop(pc, 0, t[3], t[3]);
+
+ for (c = 0; c < dim; c++) {
+ t[c]->rhw = src[c]->rhw;
+ emit_interp(pc, t[c], t[3],
+ (mode | INTERP_PERSPECTIVE));
+ }
+ } else {
+ emit_flop(pc, 0, t[3], src[3]);
+ for (c = 0; c < dim; c++)
+ emit_mul(pc, t[c], src[c], t[3]);
+
+ /* XXX: for some reason the blob sometimes uses MAD:
+ * emit_mad(pc, t[c], src[0][c], t[3], t[3])
+ * pc->p->exec_tail->inst[1] |= 0x080fc000;
+ */
+ }
+ } else {
+ if (type == TGSI_TEXTURE_CUBE) {
+ temp = temp_temp(pc);
+ emit_minmax(pc, 4, temp, src[0], src[1]);
+ emit_minmax(pc, 4, temp, temp, src[2]);
+ emit_flop(pc, 0, temp, temp);
+ for (c = 0; c < 3; c++)
+ emit_mul(pc, t[c], src[c], temp);
+ } else {
+ for (c = 0; c < dim; c++)
+ emit_mov(pc, t[c], src[c]);
+ }
+ }
+
+ e = exec(pc);
+ set_long(pc, e);
+ e->inst[0] |= 0xf0000000;
+ e->inst[1] |= 0x00000004;
+ set_dst(pc, t[0], e);
+ e->inst[0] |= (unit << 9);
+
+ if (dim == 2)
+ e->inst[0] |= 0x00400000;
+ else
+ if (dim == 3)
+ e->inst[0] |= 0x00800000;
+
+ e->inst[0] |= (mask & 0x3) << 25;
+ e->inst[1] |= (mask & 0xc) << 12;
+
+ emit(pc, e);
+
+#if 1
+ if (mask & 1) emit_mov(pc, dst[0], t[0]);
+ if (mask & 2) emit_mov(pc, dst[1], t[1]);
+ if (mask & 4) emit_mov(pc, dst[2], t[2]);
+ if (mask & 8) emit_mov(pc, dst[3], t[3]);
+
+ free_temp4(pc, t);
+#else
+ /* XXX: if p.e. MUL is used directly after TEX, it would still use
+ * the texture coordinates, not the fetched values: latency ? */
+
+ for (c = 0; c < 4; c++) {
+ if (mask & (1 << c))
+ assimilate_temp(pc, dst[c], t[c]);
+ else
+ free_temp(pc, t[c]);
+ }
+#endif
+}
+
static void
convert_to_long(struct nv50_pc *pc, struct nv50_program_exec *e)
{
e->inst[1] |= q;
}
+static boolean
+negate_supported(const struct tgsi_full_instruction *insn, int i)
+{
+ switch (insn->Instruction.Opcode) {
+ case TGSI_OPCODE_DP3:
+ case TGSI_OPCODE_DP4:
+ case TGSI_OPCODE_MUL:
+ case TGSI_OPCODE_KIL:
+ case TGSI_OPCODE_ADD:
+ case TGSI_OPCODE_SUB:
+ case TGSI_OPCODE_MAD:
+ return TRUE;
+ case TGSI_OPCODE_POW:
+ return (i == 1) ? TRUE : FALSE;
+ default:
+ return FALSE;
+ }
+}
+
static struct nv50_reg *
tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst)
{
}
static struct nv50_reg *
-tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src)
+tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src,
+ boolean neg)
{
struct nv50_reg *r = NULL;
struct nv50_reg *temp;
r = temp;
break;
case TGSI_UTIL_SIGN_TOGGLE:
- temp = temp_temp(pc);
- emit_neg(pc, temp, r);
- r = temp;
+ if (neg)
+ r->neg = 1;
+ else {
+ temp = temp_temp(pc);
+ emit_neg(pc, temp, r);
+ r = temp;
+ }
break;
case TGSI_UTIL_SIGN_SET:
temp = temp_temp(pc);
emit_abs(pc, temp, r);
- emit_neg(pc, temp, temp);
+ if (neg)
+ temp->neg = 1;
+ else
+ emit_neg(pc, temp, temp);
r = temp;
break;
default:
unit = fs->SrcRegister.Index;
for (c = 0; c < 4; c++)
- src[i][c] = tgsi_src(pc, c, fs);
+ src[i][c] = tgsi_src(pc, c, fs,
+ negate_supported(inst, i));
}
if (sat) {
}
break;
case TGSI_OPCODE_TEX:
+ emit_tex(pc, dst, mask, src[0], unit,
+ inst->InstructionExtTexture.Texture, FALSE);
+ break;
case TGSI_OPCODE_TXP:
- {
- struct nv50_reg *t[4];
- struct nv50_program_exec *e;
-
- alloc_temp4(pc, t, 0);
- emit_mov(pc, t[0], src[0][0]);
- emit_mov(pc, t[1], src[0][1]);
-
- e = exec(pc);
- e->inst[0] = 0xf6400000;
- e->inst[0] |= (unit << 9);
- set_long(pc, e);
- e->inst[1] |= 0x0000c004;
- set_dst(pc, t[0], e);
- emit(pc, e);
-
- if (mask & (1 << 0)) emit_mov(pc, dst[0], t[0]);
- if (mask & (1 << 1)) emit_mov(pc, dst[1], t[1]);
- if (mask & (1 << 2)) emit_mov(pc, dst[2], t[2]);
- if (mask & (1 << 3)) emit_mov(pc, dst[3], t[3]);
-
- free_temp4(pc, t);
- }
+ emit_tex(pc, dst, mask, src[0], unit,
+ inst->InstructionExtTexture.Texture, TRUE);
break;
case TGSI_OPCODE_XPD:
temp = temp_temp(pc);
if (sat) {
for (c = 0; c < 4; c++) {
- struct nv50_program_exec *e;
-
if (!(mask & (1 << c)))
continue;
- e = exec(pc);
-
- e->inst[0] = 0xa0000000; /* cvt */
- set_long(pc, e);
- e->inst[1] |= (6 << 29); /* cvt */
- e->inst[1] |= 0x04000000; /* 32 bit */
- e->inst[1] |= (1 << 14); /* src .f32 */
- e->inst[1] |= ((1 << 5) << 14); /* .sat */
- set_dst(pc, rdst[c], e);
- set_src_0(pc, dst[c], e);
- emit(pc, e);
+ emit_cvt(pc, rdst[c], dst[c], -1, CVTOP_SAT,
+ CVT_F32_F32);
}
} else if (assimilate) {
for (c = 0; c < 4; c++)
continue;
if (src[i][c]->index == -1 && src[i][c]->type == P_IMMD)
FREE(src[i][c]);
+ else
+ if (src[i][c]->acc == pc->insn_cur)
+ release_hw(pc, src[i][c]);
}
}
}
if (pc->immd_nr) {
- int rid = pc->param_nr * 4;
+ int rid = 0;
pc->immd = CALLOC(pc->immd_nr * 4, sizeof(struct nv50_reg));
if (!pc->immd)
static void
free_nv50_pc(struct nv50_pc *pc)
{
- unsigned i;
-
if (pc->immd)
FREE(pc->immd);
if (pc->param)
if (pc->temp)
FREE(pc->temp);
- for (i = 0; i < NV50_SU_MAX_TEMP; i++) {
- /* deallocate fragment program attributes */
- if (pc->r_temp[i] && pc->r_temp[i]->index == -1)
- FREE(pc->r_temp[i]);
- }
-
FREE(pc);
}
static void
nv50_program_upload_data(struct nv50_context *nv50, float *map,
- unsigned start, unsigned count)
+ unsigned start, unsigned count, unsigned cbuf)
{
struct nouveau_channel *chan = nv50->screen->nvws->channel;
struct nouveau_grobj *tesla = nv50->screen->tesla;
unsigned nr = count > 2047 ? 2047 : count;
BEGIN_RING(chan, tesla, 0x00000f00, 1);
- OUT_RING (chan, (NV50_CB_PMISC << 0) | (start << 8));
+ OUT_RING (chan, (cbuf << 0) | (start << 8));
BEGIN_RING(chan, tesla, 0x40000f04, nr);
OUT_RINGp (chan, map, nr);
{
struct nouveau_winsys *nvws = nv50->screen->nvws;
struct pipe_winsys *ws = nv50->pipe.winsys;
- unsigned nr = p->param_nr + p->immd_nr;
- if (!p->data && nr) {
- struct nouveau_resource *heap = nv50->screen->vp_data_heap;
+ if (!p->data[0] && p->immd_nr) {
+ struct nouveau_resource *heap = nv50->screen->immd_heap[0];
- if (nvws->res_alloc(heap, nr, p, &p->data)) {
- while (heap->next && heap->size < nr) {
+ if (nvws->res_alloc(heap, p->immd_nr, p, &p->data[0])) {
+ while (heap->next && heap->size < p->immd_nr) {
struct nv50_program *evict = heap->next->priv;
- nvws->res_free(&evict->data);
+ nvws->res_free(&evict->data[0]);
}
- if (nvws->res_alloc(heap, nr, p, &p->data))
+ if (nvws->res_alloc(heap, p->immd_nr, p, &p->data[0]))
+ assert(0);
+ }
+
+ /* immediates only need to be uploaded again when freed */
+ nv50_program_upload_data(nv50, p->immd, p->data[0]->start,
+ p->immd_nr, NV50_CB_PMISC);
+ }
+
+ if (!p->data[1] && p->param_nr) {
+ struct nouveau_resource *heap =
+ nv50->screen->parm_heap[p->type];
+
+ if (nvws->res_alloc(heap, p->param_nr, p, &p->data[1])) {
+ while (heap->next && heap->size < p->param_nr) {
+ struct nv50_program *evict = heap->next->priv;
+ nvws->res_free(&evict->data[1]);
+ }
+
+ if (nvws->res_alloc(heap, p->param_nr, p, &p->data[1]))
assert(0);
}
}
if (p->param_nr) {
+ unsigned cbuf = NV50_CB_PVP;
float *map = ws->buffer_map(ws, nv50->constbuf[p->type],
PIPE_BUFFER_USAGE_CPU_READ);
- nv50_program_upload_data(nv50, map, p->data->start,
- p->param_nr);
+ if (p->type == PIPE_SHADER_FRAGMENT)
+ cbuf = NV50_CB_PFP;
+ nv50_program_upload_data(nv50, map, p->data[1]->start,
+ p->param_nr, cbuf);
ws->buffer_unmap(ws, nv50->constbuf[p->type]);
}
-
- if (p->immd_nr) {
- nv50_program_upload_data(nv50, p->immd,
- p->data->start + p->param_nr,
- p->immd_nr);
- }
}
static void
upload = TRUE;
}
- if (p->data && p->data->start != p->data_start) {
+ if ((p->data[0] && p->data[0]->start != p->data_start[0]) ||
+ (p->data[1] && p->data[1]->start != p->data_start[1])) {
for (e = p->exec_head; e; e = e->next) {
- unsigned ei, ci;
+ unsigned ei, ci, bs;
if (e->param.index < 0)
continue;
+ bs = (e->inst[1] >> 22) & 0x07;
+ assert(bs < 2);
ei = e->param.shift >> 5;
- ci = e->param.index + p->data->start;
+ ci = e->param.index + p->data[bs]->start;
e->inst[ei] &= ~e->param.mask;
e->inst[ei] |= (ci << e->param.shift);
}
- p->data_start = p->data->start;
+ if (p->data[0])
+ p->data_start[0] = p->data[0]->start;
+ if (p->data[1])
+ p->data_start[1] = p->data[1]->start;
+
upload = TRUE;
}
if (p->buffer)
pipe_buffer_reference(&p->buffer, NULL);
- nv50->screen->nvws->res_free(&p->data);
+ nv50->screen->nvws->res_free(&p->data[0]);
+ nv50->screen->nvws->res_free(&p->data[1]);
p->translated = 0;
}