#include "pipe/p_state.h"
#include "util/u_memory.h"
#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_dump.h"
#include "vc4_context.h"
#include "vc4_qpu.h"
+#include "vc4_qir.h"
+
+struct tgsi_to_qir {
+ struct tgsi_parse_context parser;
+ struct qcompile *c;
+ struct qreg *temps;
+ struct qreg *inputs;
+ struct qreg *outputs;
+ struct qreg *uniforms;
+ struct qreg *consts;
+ uint32_t num_consts;
+
+ uint32_t *uniform_data;
+ enum quniform_contents *uniform_contents;
+ uint32_t num_uniforms;
+};
+
+static struct qreg
+get_temp_for_uniform(struct tgsi_to_qir *trans, uint32_t uniform)
+{
+ struct qcompile *c = trans->c;
+ struct qreg u = { QFILE_UNIF, uniform };
+
+ struct qreg t = qir_MOV(c, u);
+ trans->uniforms[uniform] = t;
+ return t;
+}
+
+static struct qreg
+qir_uniform_ui(struct tgsi_to_qir *trans, uint32_t ui)
+{
+ for (int i = 0; i < trans->num_uniforms; i++) {
+ if (trans->uniform_contents[i] == QUNIFORM_CONSTANT &&
+ trans->uniform_data[i] == ui)
+ return trans->uniforms[i];
+ }
+
+ trans->uniform_contents[trans->num_uniforms] = QUNIFORM_CONSTANT;
+ trans->uniform_data[trans->num_uniforms] = ui;
+ return get_temp_for_uniform(trans, trans->num_uniforms++);
+}
+
+static struct qreg
+qir_uniform_f(struct tgsi_to_qir *trans, float f)
+{
+ return qir_uniform_ui(trans, fui(f));
+}
+
+static struct qreg
+qir_uniform(struct tgsi_to_qir *trans, uint32_t index)
+{
+ for (int i = 0; i < trans->num_uniforms; i++) {
+ if (trans->uniform_contents[i] == QUNIFORM_UNIFORM &&
+ trans->uniform_data[i] == index)
+ return trans->uniforms[i];
+ }
+
+ trans->uniform_contents[trans->num_uniforms] = QUNIFORM_UNIFORM;
+ trans->uniform_data[trans->num_uniforms] = index;
+ return get_temp_for_uniform(trans, trans->num_uniforms++);
+}
+
+static struct qreg
+get_src(struct tgsi_to_qir *trans, struct tgsi_src_register *src, int i)
+{
+ struct qcompile *c = trans->c;
+ struct qreg r = c->undef;
+
+ uint32_t s = i;
+ switch (i) {
+ case TGSI_SWIZZLE_X:
+ s = src->SwizzleX;
+ break;
+ case TGSI_SWIZZLE_Y:
+ s = src->SwizzleY;
+ break;
+ case TGSI_SWIZZLE_Z:
+ s = src->SwizzleZ;
+ break;
+ case TGSI_SWIZZLE_W:
+ s = src->SwizzleW;
+ break;
+ default:
+ abort();
+ }
+
+ assert(!src->Indirect);
+
+ switch (src->File) {
+ case TGSI_FILE_NULL:
+ return r;
+ case TGSI_FILE_TEMPORARY:
+ r = trans->temps[src->Index * 4 + s];
+ break;
+ case TGSI_FILE_IMMEDIATE:
+ r = trans->consts[src->Index * 4 + s];
+ break;
+ case TGSI_FILE_CONSTANT:
+ r = qir_uniform(trans, src->Index * 4 + s);
+ break;
+ case TGSI_FILE_INPUT:
+ r = trans->inputs[src->Index * 4 + s];
+ break;
+ default:
+ fprintf(stderr, "unknown src file %d\n", src->File);
+ abort();
+ }
+
+ if (src->Absolute)
+ r = qir_FMAXABS(c, r, r);
+
+ if (src->Negate)
+ r = qir_FSUB(c, qir_uniform_f(trans, 0), r);
+
+ return r;
+};
+
static void
-vc4_dump_program(const uint64_t *insts, uint count)
+update_dst(struct tgsi_to_qir *trans, struct tgsi_full_instruction *tgsi_inst,
+ int i, struct qreg val)
{
- for (int i = 0; i < count; i++) {
- fprintf(stderr, "0x%016"PRIx64" ", insts[i]);
- vc4_qpu_disasm(&insts[i], 1);
- fprintf(stderr, "\n");
+ struct tgsi_dst_register *tgsi_dst = &tgsi_inst->Dst[0].Register;
+
+ assert(!tgsi_dst->Indirect);
+
+ switch (tgsi_dst->File) {
+ case TGSI_FILE_TEMPORARY:
+ trans->temps[tgsi_dst->Index * 4 + i] = val;
+ break;
+ case TGSI_FILE_OUTPUT:
+ trans->outputs[tgsi_dst->Index * 4 + i] = val;
+ break;
+ default:
+ fprintf(stderr, "unknown dst file %d\n", tgsi_dst->File);
+ abort();
}
+};
+
+static struct qreg
+tgsi_to_qir_alu(struct tgsi_to_qir *trans,
+ struct tgsi_full_instruction *tgsi_inst,
+ enum qop op, struct qreg *src, int i)
+{
+ struct qcompile *c = trans->c;
+ struct qreg dst = qir_get_temp(c);
+ qir_emit(c, qir_inst(op, dst, src[0 * 4 + i], src[1 * 4 + i]));
+ return dst;
}
-static struct vc4_shader_state *
-vc4_shader_state_create(struct pipe_context *pctx,
- const struct pipe_shader_state *cso)
+static struct qreg
+tgsi_to_qir_mad(struct tgsi_to_qir *trans,
+ struct tgsi_full_instruction *tgsi_inst,
+ enum qop op, struct qreg *src, int i)
{
- struct vc4_shader_state *so = CALLOC_STRUCT(vc4_shader_state);
- if (!so)
- return NULL;
+ struct qcompile *c = trans->c;
+ return qir_FADD(c,
+ qir_FMUL(c,
+ src[0 * 4 + i],
+ src[1 * 4 + i]),
+ src[2 * 4 + i]);
+}
- so->base.tokens = tgsi_dup_tokens(cso->tokens);
+static struct qreg
+tgsi_to_qir_dp(struct tgsi_to_qir *trans,
+ struct tgsi_full_instruction *tgsi_inst,
+ int num, struct qreg *src, int i)
+{
+ struct qcompile *c = trans->c;
- return so;
+ struct qreg sum = qir_FMUL(c, src[0 * 4 + 0], src[1 * 4 + 0]);
+ for (int j = 1; j < num; j++) {
+ sum = qir_FADD(c, sum, qir_FMUL(c,
+ src[0 * 4 + j],
+ src[1 * 4 + j]));
+ }
+ return sum;
}
-static void *
-vc4_fs_state_create(struct pipe_context *pctx,
- const struct pipe_shader_state *cso)
+static struct qreg
+tgsi_to_qir_dp2(struct tgsi_to_qir *trans,
+ struct tgsi_full_instruction *tgsi_inst,
+ enum qop op, struct qreg *src, int i)
{
- struct vc4_context *vc4 = vc4_context(pctx);
- struct vc4_shader_state *so = vc4_shader_state_create(pctx, cso);
- if (!so)
- return NULL;
+ return tgsi_to_qir_dp(trans, tgsi_inst, 2, src, i);
+}
- uint64_t gen_fsc[100];
- uint64_t cur_inst;
- int gen_fsc_len = 0;
-#if 0
- cur_inst = qpu_load_imm_f(qpu_r5(), 0.0f);
- gen_fsc[gen_fsc_len++] = cur_inst;
-
- cur_inst = qpu_inst(qpu_a_MOV(qpu_r0(), qpu_vary()),
- qpu_m_MOV(qpu_r3(), qpu_r5()));
- cur_inst |= QPU_PM;
- cur_inst |= QPU_SET_FIELD(QPU_PACK_MUL_8D, QPU_PACK);
- gen_fsc[gen_fsc_len++] = cur_inst;
-
- cur_inst = qpu_inst(qpu_a_FADD(qpu_r0(), qpu_r0(), qpu_r5()),
- qpu_m_MOV(qpu_r1(), qpu_vary()));
- gen_fsc[gen_fsc_len++] = cur_inst;
-
- cur_inst = qpu_inst(qpu_a_FADD(qpu_r1(), qpu_r1(), qpu_r5()),
- qpu_m_MOV(qpu_r2(), qpu_vary()));
- cur_inst = (cur_inst & ~QPU_SIG_MASK) | QPU_SET_FIELD(QPU_SIG_WAIT_FOR_SCOREBOARD, QPU_SIG);
- gen_fsc[gen_fsc_len++] = cur_inst;
-
- cur_inst = qpu_inst(qpu_a_FADD(qpu_r2(), qpu_r2(), qpu_r5()),
- qpu_m_MOV(qpu_r3(), qpu_r0()));
- cur_inst |= QPU_PM;
- cur_inst |= QPU_SET_FIELD(QPU_PACK_MUL_8A, QPU_PACK);
- gen_fsc[gen_fsc_len++] = cur_inst;
-
- cur_inst = qpu_inst(qpu_a_NOP(),
- qpu_m_MOV(qpu_r3(), qpu_r1()));
- cur_inst |= QPU_PM;
- cur_inst |= QPU_SET_FIELD(QPU_PACK_MUL_8B, QPU_PACK);
- gen_fsc[gen_fsc_len++] = cur_inst;
-
- cur_inst = qpu_inst(qpu_a_NOP(),
- qpu_m_MOV(qpu_r3(), qpu_r2()));
- cur_inst |= QPU_PM;
- cur_inst |= QPU_SET_FIELD(QPU_PACK_MUL_8C, QPU_PACK);
- gen_fsc[gen_fsc_len++] = cur_inst;
-
- cur_inst = qpu_inst(qpu_a_MOV(qpu_tlbc(), qpu_r3()),
- qpu_m_NOP());
- cur_inst = (cur_inst & ~QPU_SIG_MASK) | QPU_SET_FIELD(QPU_SIG_PROG_END, QPU_SIG);
- gen_fsc[gen_fsc_len++] = cur_inst;
-
- cur_inst = qpu_inst(qpu_a_NOP(), qpu_m_NOP());
- gen_fsc[gen_fsc_len++] = cur_inst;
-
- cur_inst = qpu_inst(qpu_a_NOP(), qpu_m_NOP());
- cur_inst = (cur_inst & ~QPU_SIG_MASK) | QPU_SET_FIELD(QPU_SIG_SCOREBOARD_UNLOCK, QPU_SIG);
- gen_fsc[gen_fsc_len++] = cur_inst;
-
-#else
-
- /* drain the varyings. */
- for (int i = 0; i < 3; i++) {
- cur_inst = qpu_inst(qpu_a_MOV(qpu_ra(QPU_W_NOP), qpu_rb(QPU_R_NOP)),
- qpu_m_NOP());
- if (i == 1)
- cur_inst = (cur_inst & ~QPU_SIG_MASK) | QPU_SET_FIELD(QPU_SIG_WAIT_FOR_SCOREBOARD, QPU_SIG);
- gen_fsc[gen_fsc_len++] = cur_inst;
-
- cur_inst = qpu_inst(qpu_a_NOP(), qpu_m_NOP());
- gen_fsc[gen_fsc_len++] = cur_inst;
+static struct qreg
+tgsi_to_qir_dp3(struct tgsi_to_qir *trans,
+ struct tgsi_full_instruction *tgsi_inst,
+ enum qop op, struct qreg *src, int i)
+{
+ return tgsi_to_qir_dp(trans, tgsi_inst, 3, src, i);
+}
+
+static struct qreg
+tgsi_to_qir_dp4(struct tgsi_to_qir *trans,
+ struct tgsi_full_instruction *tgsi_inst,
+ enum qop op, struct qreg *src, int i)
+{
+ return tgsi_to_qir_dp(trans, tgsi_inst, 4, src, i);
+}
+
+static void
+emit_tgsi_instruction(struct tgsi_to_qir *trans,
+ struct tgsi_full_instruction *tgsi_inst)
+{
+ struct qcompile *c = trans->c;
+ struct {
+ enum qop op;
+ struct qreg (*func)(struct tgsi_to_qir *trans,
+ struct tgsi_full_instruction *tgsi_inst,
+ enum qop op,
+ struct qreg *src, int i);
+ } op_trans[] = {
+ [TGSI_OPCODE_MOV] = { QOP_MOV, tgsi_to_qir_alu },
+ [TGSI_OPCODE_ABS] = { QOP_FMAXABS, tgsi_to_qir_alu },
+ [TGSI_OPCODE_MUL] = { QOP_FMUL, tgsi_to_qir_alu },
+ [TGSI_OPCODE_ADD] = { QOP_FADD, tgsi_to_qir_alu },
+ [TGSI_OPCODE_SUB] = { QOP_FSUB, tgsi_to_qir_alu },
+ [TGSI_OPCODE_MIN] = { QOP_FMIN, tgsi_to_qir_alu },
+ [TGSI_OPCODE_MAX] = { QOP_FMAX, tgsi_to_qir_alu },
+ [TGSI_OPCODE_RSQ] = { QOP_RSQ, tgsi_to_qir_alu },
+ [TGSI_OPCODE_MAD] = { 0, tgsi_to_qir_mad },
+ [TGSI_OPCODE_DP2] = { 0, tgsi_to_qir_dp2 },
+ [TGSI_OPCODE_DP3] = { 0, tgsi_to_qir_dp3 },
+ [TGSI_OPCODE_DP4] = { 0, tgsi_to_qir_dp4 },
+ [TGSI_OPCODE_LIT] = { QOP_MOV, tgsi_to_qir_alu }, /* XXX */
+ };
+ static int asdf = 0;
+ uint32_t tgsi_op = tgsi_inst->Instruction.Opcode;
+
+ if (tgsi_op == TGSI_OPCODE_END)
+ return;
+
+ tgsi_dump_instruction(tgsi_inst, asdf++);
+
+ if (tgsi_op > ARRAY_SIZE(op_trans) || !op_trans[tgsi_op].func) {
+ fprintf(stderr, "unknown tgsi inst: ");
+ tgsi_dump_instruction(tgsi_inst, asdf++);
+ fprintf(stderr, "\n");
+ abort();
+ }
+
+ struct qreg src_regs[12];
+ for (int s = 0; s < 3; s++) {
+ for (int i = 0; i < 4; i++) {
+ src_regs[4 * s + i] =
+ get_src(trans, &tgsi_inst->Src[s].Register, i);
+ }
}
- /* some colors */
-#if 1
for (int i = 0; i < 4; i++) {
- cur_inst = qpu_load_imm_f(qpu_rn(i), .2 + i / 4.0);
- gen_fsc[gen_fsc_len++] = cur_inst;
+ if (!(tgsi_inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
+
+ struct qreg result;
+
+ result = op_trans[tgsi_op].func(trans, tgsi_inst,
+ op_trans[tgsi_op].op,
+ src_regs, i);
+
+ if (tgsi_inst->Instruction.Saturate) {
+ float low = (tgsi_inst->Instruction.Saturate ==
+ TGSI_SAT_MINUS_PLUS_ONE ? -1.0 : 0.0);
+ result = qir_FMAX(c,
+ qir_FMIN(c,
+ result,
+ qir_uniform_f(trans, 1.0)),
+ qir_uniform_f(trans, low));
+ }
+
+ update_dst(trans, tgsi_inst, i, result);
}
+}
+static void
+parse_tgsi_immediate(struct tgsi_to_qir *trans, struct tgsi_full_immediate *imm)
+{
for (int i = 0; i < 4; i++) {
- cur_inst = qpu_inst(qpu_a_NOP(),
- qpu_m_FMUL(qpu_ra(1),
- qpu_rn(i), qpu_rn(i)));
- cur_inst |= QPU_PM;
- cur_inst |= QPU_SET_FIELD(QPU_PACK_A_8A + i, QPU_PACK);
- gen_fsc[gen_fsc_len++] = cur_inst;
+ unsigned n = trans->num_consts++;
+ trans->consts[n] = qir_uniform_ui(trans, imm->u[i].Uint);
}
-#else
- cur_inst = qpu_load_imm_ui(qpu_ra(1), 0x22446688);
- gen_fsc[gen_fsc_len++] = cur_inst;
-#endif
+}
- cur_inst = qpu_inst(qpu_a_NOP(), qpu_m_NOP());
- gen_fsc[gen_fsc_len++] = cur_inst;
+static void
+emit_frag_init(struct tgsi_to_qir *trans, struct vc4_shader_state *so)
+{
+ /* XXX: lols */
+ for (int i = 0; i < 4; i++) {
+ trans->inputs[i] = qir_uniform_ui(trans, fui(i / 4.0));
+ }
- cur_inst = qpu_inst(qpu_a_MOV(qpu_tlbc(), qpu_ra(1)),
- qpu_m_NOP());
- cur_inst = (cur_inst & ~QPU_SIG_MASK) | QPU_SET_FIELD(QPU_SIG_PROG_END, QPU_SIG);
- gen_fsc[gen_fsc_len++] = cur_inst;
+}
- cur_inst = qpu_inst(qpu_a_NOP(), qpu_m_NOP());
- gen_fsc[gen_fsc_len++] = cur_inst;
+static void
+emit_vert_init(struct tgsi_to_qir *trans, struct vc4_shader_state *so)
+{
+ struct qcompile *c = trans->c;
- cur_inst = qpu_inst(qpu_a_NOP(), qpu_m_NOP());
- cur_inst = (cur_inst & ~QPU_SIG_MASK) | QPU_SET_FIELD(QPU_SIG_SCOREBOARD_UNLOCK, QPU_SIG);
- gen_fsc[gen_fsc_len++] = cur_inst;
-#endif
+ /* XXX: attribute type/size/count */
+ for (int i = 0; i < 4; i++) {
+ trans->inputs[i] = qir_get_temp(c);
+ qir_emit(c, qir_inst(QOP_VPM_READ, trans->inputs[i],
+ c->undef, c->undef));
+ }
+}
+static void
+emit_coord_init(struct tgsi_to_qir *trans, struct vc4_shader_state *so)
+{
+ struct qcompile *c = trans->c;
- if (1)
- vc4_dump_program(gen_fsc, gen_fsc_len);
- vc4_qpu_validate(gen_fsc, gen_fsc_len);
+ /* XXX: attribute type/size/count */
+ for (int i = 0; i < 4; i++) {
+ trans->inputs[i] = qir_get_temp(c);
+ qir_emit(c, qir_inst(QOP_VPM_READ, trans->inputs[i],
+ c->undef, c->undef));
+ }
+}
- so->bo = vc4_bo_alloc_mem(vc4->screen, gen_fsc,
- gen_fsc_len * sizeof(uint64_t), "fs_code");
+static void
+emit_frag_end(struct tgsi_to_qir *trans, struct vc4_shader_state *so)
+{
+ struct qcompile *c = trans->c;
+
+ struct qreg t = qir_get_temp(c);
+ qir_emit(c, qir_inst4(QOP_PACK_COLORS, t,
+ trans->outputs[0],
+ trans->outputs[1],
+ trans->outputs[2],
+ trans->outputs[3]));
+ qir_emit(c, qir_inst(QOP_TLB_COLOR_WRITE, c->undef,
+ t, c->undef));
+}
- return so;
+static void
+emit_scaled_viewport_write(struct tgsi_to_qir *trans)
+{
+ struct qcompile *c = trans->c;
+ struct qreg xyi[2];
+
+ for (int i = 0; i < 2; i++) {
+ trans->uniform_contents[trans->num_uniforms] =
+ QUNIFORM_VIEWPORT_X_SCALE + i;
+ struct qreg scale = { QFILE_UNIF, trans->num_uniforms++ };
+
+ xyi[i] = qir_FTOI(c, qir_FMUL(c, trans->outputs[i], scale));
+ }
+
+ qir_VPM_WRITE(c, qir_PACK_SCALED(c, xyi[0], xyi[1]));
}
-static int
-gen_vs_cs_code(uint64_t *gen, bool is_vs)
+static void
+emit_zs_write(struct tgsi_to_qir *trans)
{
- uint32_t count = 0;
- uint64_t cur_inst;
- struct qpu_reg x = qpu_ra(0);
- struct qpu_reg y = qpu_ra(1);
- struct qpu_reg z = qpu_ra(2);
- struct qpu_reg w = qpu_ra(3);
- struct qpu_reg xy = qpu_ra(10);
- struct qpu_reg xs = qpu_ra(12);
- struct qpu_reg ys = qpu_ra(13);
- struct qpu_reg vpmread = qpu_ra(QPU_R_VPM);
- struct qpu_reg vpm = qpu_ra(QPU_W_VPM);
+ struct qcompile *c = trans->c;
- gen[count++] = qpu_load_imm_ui(qpu_vrsetup(), 0x00401a00);
- gen[count++] = qpu_load_imm_ui(qpu_vwsetup(), 0x00001a00);
+ /* XXX: rescale */
+ qir_VPM_WRITE(c, trans->outputs[2]);
+}
-#if 1
- gen[count++] = qpu_inst(qpu_a_MOV(x, vpmread), qpu_m_NOP());
- gen[count++] = qpu_inst(qpu_a_MOV(y, vpmread), qpu_m_NOP());
- gen[count++] = qpu_inst(qpu_a_MOV(z, vpmread), qpu_m_NOP());
- gen[count++] = qpu_inst(qpu_a_MOV(w, vpmread), qpu_m_NOP());
-
-
- gen[count++] = qpu_inst(qpu_a_NOP(), qpu_m_FMUL(xs, x,
- qpu_rb(QPU_R_UNIF)));
- gen[count++] = qpu_inst(qpu_a_NOP(), qpu_m_FMUL(ys, y,
- qpu_rb(QPU_R_UNIF)));
-
- cur_inst = qpu_inst(qpu_a_FTOI(xy, xs), qpu_m_NOP());
- cur_inst |= QPU_SET_FIELD(QPU_PACK_A_16A, QPU_PACK);
- gen[count++] = cur_inst;
- cur_inst = qpu_inst(qpu_a_FTOI(xy, ys), qpu_m_NOP());
- cur_inst |= QPU_SET_FIELD(QPU_PACK_A_16B, QPU_PACK);
- gen[count++] = cur_inst;
-
-#else
-
- struct qpu_reg t = qpu_ra(20);
- struct qpu_reg hundred = qpu_rb(21);
- gen[count++] = qpu_inst(qpu_a_NOP(),
- qpu_m_MUL24(t,
- qpu_ra(QPU_R_ELEM_QPU),
- qpu_ra(QPU_R_ELEM_QPU)));
- gen[count++] = qpu_inst(qpu_a_NOP(), qpu_m_NOP());
-
- gen[count++] = qpu_load_imm_ui(hundred, 400);
- gen[count++] = qpu_inst(qpu_a_NOP(), qpu_m_NOP());
-
- struct qpu_reg xm = qpu_ra(22), ym = qpu_ra(23);
- gen[count++] = qpu_inst(qpu_a_NOP(),
- qpu_m_MUL24(xm, hundred, qpu_ra(QPU_R_ELEM_QPU)));
- gen[count++] = qpu_inst(qpu_a_NOP(), qpu_m_NOP());
-
- gen[count++] = qpu_inst(qpu_a_NOP(),
- qpu_m_MUL24(ym, hundred, t));
- gen[count++] = qpu_inst(qpu_a_NOP(), qpu_m_NOP());
-
- cur_inst = qpu_inst(qpu_a_MOV(xy, xm), qpu_m_NOP());
- cur_inst |= QPU_SET_FIELD(QPU_PACK_A_16A, QPU_PACK);
- gen[count++] = cur_inst;
- cur_inst = qpu_inst(qpu_a_MOV(xy, ym), qpu_m_NOP());
- cur_inst |= QPU_SET_FIELD(QPU_PACK_A_16B, QPU_PACK);
- gen[count++] = cur_inst;
-#endif
+static void
+emit_1_wc_write(struct tgsi_to_qir *trans)
+{
+ struct qcompile *c = trans->c;
+
+ /* XXX: RCP */
+ qir_VPM_WRITE(c, trans->outputs[3]);
+}
- gen[count++] = qpu_inst(qpu_a_NOP(), qpu_m_NOP());
+static void
+emit_vert_end(struct tgsi_to_qir *trans, struct vc4_shader_state *so)
+{
+ emit_scaled_viewport_write(trans);
+ emit_zs_write(trans);
+ emit_1_wc_write(trans);
+ /* XXX: write varyings */
+}
- if (is_vs) {
- gen[count++] = qpu_inst(qpu_a_MOV(vpm, xy), qpu_m_NOP());
+static void
+emit_coord_end(struct tgsi_to_qir *trans, struct vc4_shader_state *so)
+{
+ struct qcompile *c = trans->c;
- /* XXX */
- gen[count++] = qpu_inst(qpu_a_MOV(vpm, z), qpu_m_NOP());
- gen[count++] = qpu_inst(qpu_a_MOV(vpm, w), qpu_m_NOP());
+ for (int i = 0; i < 4; i++)
+ qir_VPM_WRITE(c, trans->outputs[i]);
- } else {
- gen[count++] = qpu_inst(qpu_a_MOV(vpm, x), qpu_m_NOP());
- gen[count++] = qpu_inst(qpu_a_MOV(vpm, y), qpu_m_NOP());
- gen[count++] = qpu_inst(qpu_a_MOV(vpm, z), qpu_m_NOP());
- gen[count++] = qpu_inst(qpu_a_MOV(vpm, w), qpu_m_NOP());
- gen[count++] = qpu_inst(qpu_a_MOV(vpm, xy), qpu_m_NOP());
+ emit_scaled_viewport_write(trans);
+ emit_zs_write(trans);
+ emit_1_wc_write(trans);
+}
- /* XXX */
- gen[count++] = qpu_inst(qpu_a_MOV(vpm, z), qpu_m_NOP());
- gen[count++] = qpu_inst(qpu_a_MOV(vpm, w), qpu_m_NOP());
+static struct tgsi_to_qir *
+vc4_shader_tgsi_to_qir(struct vc4_shader_state *so, enum qstage stage)
+{
+ struct tgsi_to_qir *trans = CALLOC_STRUCT(tgsi_to_qir);
+ struct qcompile *c;
+ int ret;
+
+ c = qir_compile_init();
+ c->stage = stage;
+
+ memset(trans, 0, sizeof(*trans));
+ /* XXX sizing */
+ trans->temps = calloc(sizeof(struct qreg), 1024);
+ trans->inputs = calloc(sizeof(struct qreg), 8 * 4);
+ trans->outputs = calloc(sizeof(struct qreg), 1024);
+ trans->uniforms = calloc(sizeof(struct qreg), 1024);
+ trans->consts = calloc(sizeof(struct qreg), 1024);
+
+ trans->uniform_data = calloc(sizeof(uint32_t), 1024);
+ trans->uniform_contents = calloc(sizeof(enum quniform_contents), 1024);
+
+ trans->c = c;
+ ret = tgsi_parse_init(&trans->parser, so->base.tokens);
+ assert(ret == TGSI_PARSE_OK);
+
+ fprintf(stderr, "TGSI:\n");
+ tgsi_dump(so->base.tokens, 0);
+
+ switch (stage) {
+ case QSTAGE_FRAG:
+ emit_frag_init(trans, so);
+ break;
+ case QSTAGE_VERT:
+ emit_vert_init(trans, so);
+ break;
+ case QSTAGE_COORD:
+ emit_coord_init(trans, so);
+ break;
}
- /* PROGRAM END */
- cur_inst = qpu_inst(qpu_a_NOP(), qpu_m_NOP());
- cur_inst = (cur_inst & ~QPU_SIG_MASK) | QPU_SET_FIELD(QPU_SIG_PROG_END, QPU_SIG);
- gen[count++] = cur_inst;
+ while (!tgsi_parse_end_of_tokens(&trans->parser)) {
+ tgsi_parse_token(&trans->parser);
- cur_inst = qpu_inst(qpu_a_NOP(), qpu_m_NOP());
- gen[count++] = cur_inst;
+ switch (trans->parser.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ emit_tgsi_instruction(trans,
+ &trans->parser.FullToken.FullInstruction);
+ break;
- cur_inst = qpu_inst(qpu_a_NOP(), qpu_m_NOP());
- gen[count++] = cur_inst;
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ parse_tgsi_immediate(trans,
+ &trans->parser.FullToken.FullImmediate);
+ break;
+ }
+ }
+
+ switch (stage) {
+ case QSTAGE_FRAG:
+ emit_frag_end(trans, so);
+ break;
+ case QSTAGE_VERT:
+ emit_vert_end(trans, so);
+ break;
+ case QSTAGE_COORD:
+ emit_coord_end(trans, so);
+ break;
+ }
+
+ qir_dump(c);
+
+ tgsi_parse_free(&trans->parser);
+ free(trans->temps);
- vc4_qpu_validate(gen, count);
+ vc4_generate_code(c);
- return count;
+ return trans;
+}
+
+static struct vc4_shader_state *
+vc4_shader_state_create(struct pipe_context *pctx,
+ const struct pipe_shader_state *cso)
+{
+ struct vc4_shader_state *so = CALLOC_STRUCT(vc4_shader_state);
+ if (!so)
+ return NULL;
+
+ so->base.tokens = tgsi_dup_tokens(cso->tokens);
+
+ return so;
+}
+
+static void
+copy_uniform_state_to_shader(struct vc4_shader_state *so,
+ int shader_index,
+ struct tgsi_to_qir *trans)
+{
+ int count = trans->num_uniforms;
+ struct vc4_shader_uniform_info *uinfo = &so->uniforms[shader_index];
+
+ uinfo->count = count;
+ uinfo->data = malloc(count * sizeof(*uinfo->data));
+ memcpy(uinfo->data, trans->uniform_data,
+ count * sizeof(*uinfo->data));
+ uinfo->contents = malloc(count * sizeof(*uinfo->contents));
+ memcpy(uinfo->contents, trans->uniform_contents,
+ count * sizeof(*uinfo->contents));
}
static void *
-vc4_vs_state_create(struct pipe_context *pctx,
+vc4_fs_state_create(struct pipe_context *pctx,
const struct pipe_shader_state *cso)
{
struct vc4_context *vc4 = vc4_context(pctx);
if (!so)
return NULL;
- uint64_t gen[100];
- uint64_t count = 0;
- uint64_t *vsc = gen;
+ struct tgsi_to_qir *trans = vc4_shader_tgsi_to_qir(so, QSTAGE_FRAG);
+ copy_uniform_state_to_shader(so, 0, trans);
- /* VS */
- count += gen_vs_cs_code(gen + count, true);
- fprintf(stderr, "VS:\n");
- vc4_dump_program(vsc, count);
+ so->bo = vc4_bo_alloc_mem(vc4->screen, trans->c->qpu_insts,
+ trans->c->num_qpu_insts * sizeof(uint64_t),
+ "fs_code");
- /* CS */
+ qir_compile_destroy(trans->c);
+ free(trans);
- /* XXX alignment? */
- uint64_t *csc = gen + count;
- so->coord_shader_offset = count * sizeof(uint64_t);
- count += gen_vs_cs_code(gen + count, false);
+ return so;
+}
- fprintf(stderr, "CS:\n");
- vc4_dump_program(csc, count - (csc - gen));
+static void *
+vc4_vs_state_create(struct pipe_context *pctx,
+ const struct pipe_shader_state *cso)
+{
+ struct vc4_context *vc4 = vc4_context(pctx);
+ struct vc4_shader_state *so = vc4_shader_state_create(pctx, cso);
+ if (!so)
+ return NULL;
+
+ struct tgsi_to_qir *vs_trans = vc4_shader_tgsi_to_qir(so, QSTAGE_VERT);
+ copy_uniform_state_to_shader(so, 0, vs_trans);
+
+ struct tgsi_to_qir *cs_trans = vc4_shader_tgsi_to_qir(so, QSTAGE_COORD);
+ copy_uniform_state_to_shader(so, 1, cs_trans);
+
+ uint32_t vs_size = vs_trans->c->num_qpu_insts * sizeof(uint64_t);
+ uint32_t cs_size = cs_trans->c->num_qpu_insts * sizeof(uint64_t);
+ so->coord_shader_offset = vs_size; /* XXX: alignment? */
+ so->bo = vc4_bo_alloc(vc4->screen,
+ so->coord_shader_offset + cs_size,
+ "vs_code");
- so->bo = vc4_bo_alloc_mem(vc4->screen, gen, count * sizeof(uint64_t),
- "vs_code");
+ void *map = vc4_bo_map(so->bo);
+ memcpy(map, vs_trans->c->qpu_insts, vs_size);
+ memcpy(map + so->coord_shader_offset, cs_trans->c->qpu_insts, cs_size);
+
+ qir_compile_destroy(vs_trans->c);
+ qir_compile_destroy(cs_trans->c);
return so;
}
free(so);
}
+void
+vc4_get_uniform_bo(struct vc4_context *vc4, struct vc4_shader_state *shader,
+ struct vc4_constbuf_stateobj *cb,
+ int shader_index, struct vc4_bo **out_bo,
+ uint32_t *out_offset)
+{
+ struct vc4_shader_uniform_info *uinfo = &shader->uniforms[shader_index];
+ struct vc4_bo *ubo = vc4_bo_alloc(vc4->screen, uinfo->count * 4, "ubo");
+ uint32_t *map = vc4_bo_map(ubo);
+
+ for (int i = 0; i < uinfo->count; i++) {
+ switch (uinfo->contents[i]) {
+ case QUNIFORM_CONSTANT:
+ map[i] = uinfo->data[i];
+ break;
+ case QUNIFORM_UNIFORM:
+ map[i] = ((uint32_t *)cb->cb[0].user_buffer)[uinfo->data[i]];
+ break;
+ case QUNIFORM_VIEWPORT_X_SCALE:
+ map[i] = fui(vc4->framebuffer.width * 16.0f / 2.0f);
+ break;
+ case QUNIFORM_VIEWPORT_Y_SCALE:
+ map[i] = fui(vc4->framebuffer.height * -16.0f / 2.0f);
+ break;
+ }
+#if 1
+ fprintf(stderr, "%p/%d: %d: 0x%08x (%f)\n",
+ shader, shader_index, i, map[i], uif(map[i]));
+#endif
+ }
+
+ *out_bo = ubo;
+ *out_offset = 0;
+}
+
static void
vc4_fp_state_bind(struct pipe_context *pctx, void *hwcso)
{
--- /dev/null
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <stdio.h>
+
+#include "util/u_memory.h"
+#include "util/u_simple_list.h"
+
+#include "vc4_qir.h"
+#include "vc4_qpu.h"
+
+struct qir_op_info {
+ const char *name;
+ uint8_t ndst, nsrc;
+};
+
+static const struct qir_op_info qir_op_info[] = {
+ [QOP_MOV] = { "mov", 1, 1 },
+ [QOP_FADD] = { "fadd", 1, 2 },
+ [QOP_FSUB] = { "fsub", 1, 2 },
+ [QOP_FMUL] = { "fmul", 1, 2 },
+ [QOP_FMIN] = { "fmin", 1, 2 },
+ [QOP_FMAX] = { "fmax", 1, 2 },
+ [QOP_FMINABS] = { "fminabs", 1, 2 },
+ [QOP_FMAXABS] = { "fmaxabs", 1, 2 },
+ [QOP_FTOI] = { "ftoi", 1, 1 },
+ [QOP_RCP] = { "rcp", 1, 1 },
+ [QOP_RSQ] = { "rsq", 1, 1 },
+ [QOP_EXP2] = { "exp2", 1, 2 },
+ [QOP_LOG2] = { "log2", 1, 2 },
+ [QOP_PACK_COLORS] = { "pack_colors", 1, 4 },
+ [QOP_PACK_SCALED] = { "pack_scaled", 1, 2 },
+ [QOP_VPM_WRITE] = { "vpm_write", 0, 1 },
+ [QOP_VPM_READ] = { "vpm_read", 0, 1 },
+ [QOP_TLB_COLOR_WRITE] = { "tlb_color", 0, 1 },
+};
+
+static const char *
+qir_get_op_name(enum qop qop)
+{
+ if (qop < ARRAY_SIZE(qir_op_info) && qir_op_info[qop].name)
+ return qir_op_info[qop].name;
+ else
+ return "???";
+}
+
+int
+qir_get_op_nsrc(enum qop qop)
+{
+ if (qop < ARRAY_SIZE(qir_op_info) && qir_op_info[qop].name)
+ return qir_op_info[qop].nsrc;
+ else
+ abort();
+}
+
+static void
+qir_print_reg(struct qreg reg)
+{
+ const char *files[] = {
+ [QFILE_TEMP] = "t",
+ [QFILE_VARY] = "v",
+ [QFILE_UNIF] = "u",
+ };
+
+ if (reg.file == QFILE_NULL)
+ fprintf(stderr, "null");
+ else
+ fprintf(stderr, "%s%d", files[reg.file], reg.index);
+}
+
+void
+qir_dump_inst(struct qinst *inst)
+{
+ fprintf(stderr, "%s ", qir_get_op_name(inst->op));
+
+ qir_print_reg(inst->dst);
+ for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
+ fprintf(stderr, ", ");
+ qir_print_reg(inst->src[i]);
+ }
+}
+
+void
+qir_dump(struct qcompile *c)
+{
+ struct simple_node *node;
+
+ foreach(node, &c->instructions) {
+ struct qinst *inst = (struct qinst *)node;
+ qir_dump_inst(inst);
+ fprintf(stderr, "\n");
+ }
+}
+
+struct qreg
+qir_get_temp(struct qcompile *c)
+{
+ struct qreg reg;
+
+ reg.file = QFILE_TEMP;
+ reg.index = c->num_temps++;
+
+ return reg;
+}
+
+struct qinst *
+qir_inst(enum qop op, struct qreg dst, struct qreg src0, struct qreg src1)
+{
+ struct qinst *inst = CALLOC_STRUCT(qinst);
+
+ inst->op = op;
+ inst->dst = dst;
+ inst->src = calloc(2, sizeof(inst->src[0]));
+ inst->src[0] = src0;
+ inst->src[1] = src1;
+
+ return inst;
+}
+
+struct qinst *
+qir_inst4(enum qop op, struct qreg dst,
+ struct qreg a,
+ struct qreg b,
+ struct qreg c,
+ struct qreg d)
+{
+ struct qinst *inst = CALLOC_STRUCT(qinst);
+
+ inst->op = op;
+ inst->dst = dst;
+ inst->src = calloc(4, sizeof(*inst->src));
+ inst->src[0] = a;
+ inst->src[1] = b;
+ inst->src[2] = c;
+ inst->src[3] = d;
+
+ return inst;
+}
+
+void
+qir_emit(struct qcompile *c, struct qinst *inst)
+{
+ insert_at_tail(&c->instructions, &inst->link);
+}
+
+struct qcompile *
+qir_compile_init(void)
+{
+ struct qcompile *c = CALLOC_STRUCT(qcompile);
+
+ make_empty_list(&c->instructions);
+
+ return c;
+}
+
+void
+qir_compile_destroy(struct qcompile *c)
+{
+ free(c);
+}
+
+const char *
+qir_get_stage_name(enum qstage stage)
+{
+ static const char *names[] = {
+ [QSTAGE_FRAG] = "FS",
+ [QSTAGE_VERT] = "VS",
+ [QSTAGE_COORD] = "CS",
+ };
+
+ return names[stage];
+}
--- /dev/null
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <inttypes.h>
+
+#include "vc4_context.h"
+#include "vc4_qir.h"
+#include "vc4_qpu.h"
+
+static void
+vc4_dump_program(struct qcompile *c)
+{
+ fprintf(stderr, "%s:\n", qir_get_stage_name(c->stage));
+
+ for (int i = 0; i < c->num_qpu_insts; i++) {
+ fprintf(stderr, "0x%016"PRIx64" ", c->qpu_insts[i]);
+ vc4_qpu_disasm(&c->qpu_insts[i], 1);
+ fprintf(stderr, "\n");
+ }
+}
+
+void
+vc4_generate_code(struct qcompile *c)
+{
+ uint64_t *insts = malloc(sizeof(uint64_t) * 1024); /* XXX: sizing */
+ uint32_t ni = 0;
+ struct qpu_reg allocate_to_qpu_reg[4 + 32 + 32];
+ bool reg_in_use[ARRAY_SIZE(allocate_to_qpu_reg)];
+ int *reg_allocated = calloc(c->num_temps, sizeof(*reg_allocated));
+ int *reg_uses_remaining =
+ calloc(c->num_temps, sizeof(*reg_uses_remaining));
+
+ for (int i = 0; i < ARRAY_SIZE(reg_in_use); i++)
+ reg_in_use[i] = false;
+ for (int i = 0; i < c->num_temps; i++)
+ reg_allocated[i] = -1;
+ for (int i = 0; i < 4; i++)
+ allocate_to_qpu_reg[i] = qpu_rn(i);
+ for (int i = 0; i < 32; i++)
+ allocate_to_qpu_reg[i + 4] = qpu_ra(i);
+ for (int i = 0; i < 32; i++)
+ allocate_to_qpu_reg[i + 4 + 32] = qpu_rb(i);
+
+ struct simple_node *node;
+ foreach(node, &c->instructions) {
+ struct qinst *qinst = (struct qinst *)node;
+
+ if (qinst->dst.file == QFILE_TEMP)
+ reg_uses_remaining[qinst->dst.index]++;
+ for (int i = 0; i < qir_get_op_nsrc(qinst->op); i++) {
+ if (qinst->src[i].file == QFILE_TEMP)
+ reg_uses_remaining[qinst->src[i].index]++;
+ }
+ }
+
+ switch (c->stage) {
+ case QSTAGE_VERT:
+ case QSTAGE_COORD:
+ insts[ni++] = qpu_load_imm_ui(qpu_vrsetup(), 0x00401a00);
+ insts[ni++] = qpu_load_imm_ui(qpu_vwsetup(), 0x00001a00);
+ break;
+ case QSTAGE_FRAG:
+ break;
+ }
+
+ foreach(node, &c->instructions) {
+ struct qinst *qinst = (struct qinst *)node;
+
+#if 0
+ fprintf(stderr, "translating qinst to qpu: ");
+ qir_dump_inst(qinst);
+ fprintf(stderr, "\n");
+#endif
+
+ static const struct {
+ uint32_t op;
+ bool is_mul;
+ } translate[] = {
+#define A(name) [QOP_##name] = {QPU_A_##name, false}
+#define M(name) [QOP_##name] = {QPU_M_##name, true}
+ A(FADD),
+ A(FSUB),
+ A(FMIN),
+ A(FMAX),
+ A(FMINABS),
+ A(FMAXABS),
+ A(FTOI),
+
+ M(FMUL),
+ };
+
+ struct qpu_reg src[4];
+ for (int i = 0; i < qir_get_op_nsrc(qinst->op); i++) {
+ int index = qinst->src[i].index;
+ switch (qinst->src[i].file) {
+ case QFILE_NULL:
+ src[i] = qpu_rn(0);
+ break;
+ case QFILE_TEMP:
+ assert(reg_allocated[index] != -1);
+ src[i] = allocate_to_qpu_reg[reg_allocated[index]];
+ reg_uses_remaining[index]--;
+ if (reg_uses_remaining[index] == 0)
+ reg_in_use[reg_allocated[index]] = false;
+ break;
+ case QFILE_UNIF:
+ src[i] = qpu_unif();
+ break;
+ case QFILE_VARY:
+ src[i] = qpu_vary();
+ break;
+ }
+ }
+
+ struct qpu_reg dst;
+ switch (qinst->dst.file) {
+ case QFILE_NULL:
+ dst = qpu_ra(QPU_W_NOP);
+ break;
+
+ case QFILE_TEMP:
+ if (reg_allocated[qinst->dst.index] == -1) {
+ int alloc;
+ for (alloc = 0;
+ alloc < ARRAY_SIZE(reg_in_use);
+ alloc++) {
+ /* The pack flags require an A-file register. */
+ if (qinst->op == QOP_PACK_SCALED &&
+ allocate_to_qpu_reg[alloc].mux != QPU_MUX_A) {
+ continue;
+ }
+
+ if (!reg_in_use[alloc])
+ break;
+ }
+ assert(alloc != ARRAY_SIZE(reg_in_use) && "need better reg alloc");
+ reg_in_use[alloc] = true;
+ reg_allocated[qinst->dst.index] = alloc;
+ }
+
+ dst = allocate_to_qpu_reg[reg_allocated[qinst->dst.index]];
+
+ reg_uses_remaining[qinst->dst.index]--;
+ if (reg_uses_remaining[qinst->dst.index] == 0) {
+ reg_in_use[reg_allocated[qinst->dst.index]] =
+ false;
+ }
+ break;
+
+ case QFILE_VARY:
+ case QFILE_UNIF:
+ assert(!"not reached");
+ break;
+ }
+
+ switch (qinst->op) {
+ case QOP_MOV:
+ /* Skip emitting the MOV if it's a no-op. */
+ if (dst.mux == QPU_MUX_A || dst.mux == QPU_MUX_B ||
+ dst.mux != src[0].mux || dst.addr != src[0].addr) {
+ insts[ni++] = qpu_inst(qpu_a_MOV(dst, src[0]),
+ qpu_m_NOP());
+ }
+ break;
+
+ case QOP_VPM_WRITE:
+ insts[ni++] = qpu_inst(qpu_a_MOV(qpu_ra(QPU_W_VPM),
+ src[0]),
+ qpu_m_NOP());
+ break;
+
+ case QOP_VPM_READ:
+ insts[ni++] = qpu_inst(qpu_a_MOV(dst,
+ qpu_ra(QPU_R_VPM)),
+ qpu_m_NOP());
+ break;
+
+ case QOP_PACK_COLORS:
+ for (int i = 0; i < 4; i++) {
+ insts[ni++] = qpu_inst(qpu_a_NOP(),
+ qpu_m_MOV(qpu_r5(), src[i]));
+ insts[ni - 1] |= QPU_PM;
+ insts[ni - 1] |= QPU_SET_FIELD(QPU_PACK_MUL_8A + i,
+ QPU_PACK);
+ }
+
+ insts[ni++] = qpu_inst(qpu_a_MOV(dst, qpu_r5()),
+ qpu_m_NOP());
+
+ break;
+
+ case QOP_TLB_COLOR_WRITE:
+ insts[ni++] = qpu_inst(qpu_a_MOV(qpu_tlbc(),
+ src[0]),
+ qpu_m_NOP());
+ break;
+
+ case QOP_PACK_SCALED:
+ insts[ni++] = qpu_inst(qpu_a_MOV(dst, src[0]),
+ qpu_m_NOP());
+ insts[ni - 1] |= QPU_SET_FIELD(QPU_PACK_A_16A, QPU_PACK);
+
+ insts[ni++] = qpu_inst(qpu_a_MOV(dst, src[1]),
+ qpu_m_NOP());
+ insts[ni - 1] |= QPU_SET_FIELD(QPU_PACK_A_16B, QPU_PACK);
+
+ break;
+
+ default:
+ assert(qinst->op < ARRAY_SIZE(translate));
+ assert(translate[qinst->op].op != 0); /* NOPs */
+
+ /* If we have only one source, put it in the second
+ * argument slot as well so that we don't take up
+ * another raddr just to get unused data.
+ */
+ if (qir_get_op_nsrc(qinst->op) == 1)
+ src[1] = src[0];
+
+ if ((src[0].mux == QPU_MUX_A || src[0].mux == QPU_MUX_B) &&
+ (src[1].mux == QPU_MUX_A || src[1].mux == QPU_MUX_B) &&
+ src[0].addr != src[1].addr) {
+ insts[ni++] = qpu_inst(qpu_a_MOV(qpu_r5(), src[1]),
+ qpu_m_NOP());
+ src[1] = qpu_r5();
+ }
+
+ if (translate[qinst->op].is_mul) {
+ insts[ni++] = qpu_inst(qpu_a_NOP(),
+ qpu_m_alu2(translate[qinst->op].op,
+ dst, src[0], src[1]));
+ } else {
+ insts[ni++] = qpu_inst(qpu_a_alu2(translate[qinst->op].op,
+ dst, src[0], src[1]),
+ qpu_m_NOP());
+ }
+ break;
+ }
+
+ if ((dst.mux == QPU_MUX_A || dst.mux == QPU_MUX_B) &&
+ dst.addr < 32)
+ insts[ni++] = qpu_inst(qpu_a_NOP(), qpu_m_NOP());
+ }
+
+ /* thread end can't have VPM write */
+ if (QPU_GET_FIELD(insts[ni - 1], QPU_WADDR_ADD) == QPU_W_VPM ||
+ QPU_GET_FIELD(insts[ni - 1], QPU_WADDR_MUL) == QPU_W_VPM)
+ insts[ni++] = qpu_inst(qpu_a_NOP(), qpu_m_NOP());
+
+ insts[ni - 1] = qpu_set_sig(insts[ni - 1], QPU_SIG_PROG_END);
+ insts[ni++] = qpu_inst(qpu_a_NOP(), qpu_m_NOP());
+ insts[ni++] = qpu_inst(qpu_a_NOP(), qpu_m_NOP());
+
+ switch (c->stage) {
+ case QSTAGE_VERT:
+ case QSTAGE_COORD:
+ break;
+ case QSTAGE_FRAG:
+ insts[2] = qpu_set_sig(insts[2], QPU_SIG_WAIT_FOR_SCOREBOARD);
+ insts[ni - 1] = qpu_set_sig(insts[ni - 1],
+ QPU_SIG_SCOREBOARD_UNLOCK);
+ break;
+ }
+
+ c->qpu_insts = insts;
+ c->num_qpu_insts = ni;
+
+ vc4_dump_program(c);
+ vc4_qpu_validate(insts, ni);
+}
+