#include "util/hash_table.h"
#include "compiler/nir/nir.h"
#include "compiler/nir/nir_builder.h"
+#include "common/v3d_device_info.h"
#include "v3d_compiler.h"
/* We don't do any address packing. */
}
static void
-emit_scaled_viewport_write(struct v3d_compile *c, struct qreg rcp_w)
+vir_VPM_WRITE(struct v3d_compile *c, struct qreg val, uint32_t *vpm_index)
+{
+ if (c->devinfo->ver >= 40) {
+ vir_STVPMV(c, vir_uniform_ui(c, *vpm_index), val);
+ *vpm_index = *vpm_index + 1;
+ } else {
+ vir_MOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_VPM), val);
+ }
+
+ c->num_vpm_writes++;
+}
+
+static void
+emit_scaled_viewport_write(struct v3d_compile *c, struct qreg rcp_w,
+ uint32_t *vpm_index)
{
for (int i = 0; i < 2; i++) {
struct qreg coord = c->outputs[c->output_position_index + i];
vir_uniform(c, QUNIFORM_VIEWPORT_X_SCALE + i,
0));
coord = vir_FMUL(c, coord, rcp_w);
- vir_FTOIN_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_VPM),
- coord);
+ vir_VPM_WRITE(c, vir_FTOIN(c, coord), vpm_index);
}
}
static void
-emit_zs_write(struct v3d_compile *c, struct qreg rcp_w)
+emit_zs_write(struct v3d_compile *c, struct qreg rcp_w, uint32_t *vpm_index)
{
struct qreg zscale = vir_uniform(c, QUNIFORM_VIEWPORT_Z_SCALE, 0);
struct qreg zoffset = vir_uniform(c, QUNIFORM_VIEWPORT_Z_OFFSET, 0);
- vir_FADD_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_VPM),
- vir_FMUL(c, vir_FMUL(c,
- c->outputs[c->output_position_index + 2],
- zscale),
- rcp_w),
- zoffset);
+ struct qreg z = c->outputs[c->output_position_index + 2];
+ z = vir_FMUL(c, z, zscale);
+ z = vir_FMUL(c, z, rcp_w);
+ z = vir_FADD(c, z, zoffset);
+ vir_VPM_WRITE(c, z, vpm_index);
}
static void
-emit_rcp_wc_write(struct v3d_compile *c, struct qreg rcp_w)
+emit_rcp_wc_write(struct v3d_compile *c, struct qreg rcp_w, uint32_t *vpm_index)
{
- vir_VPM_WRITE(c, rcp_w);
+ vir_VPM_WRITE(c, rcp_w, vpm_index);
}
static void
-emit_point_size_write(struct v3d_compile *c)
+emit_point_size_write(struct v3d_compile *c, uint32_t *vpm_index)
{
struct qreg point_size;
*/
point_size = vir_FMAX(c, point_size, vir_uniform_f(c, .125));
- vir_VPM_WRITE(c, point_size);
+ vir_VPM_WRITE(c, point_size, vpm_index);
}
static void
emit_vpm_write_setup(struct v3d_compile *c)
{
+ if (c->devinfo->ver >= 40)
+ return;
+
uint32_t packed;
struct V3D33_VPM_GENERIC_BLOCK_WRITE_SETUP unpacked = {
V3D33_VPM_GENERIC_BLOCK_WRITE_SETUP_header,
static void
emit_vert_end(struct v3d_compile *c)
{
+ uint32_t vpm_index = 0;
struct qreg rcp_w = vir_SFU(c, V3D_QPU_WADDR_RECIP,
c->outputs[c->output_position_index + 3]);
if (c->vs_key->is_coord) {
for (int i = 0; i < 4; i++)
- vir_VPM_WRITE(c, c->outputs[c->output_position_index + i]);
- emit_scaled_viewport_write(c, rcp_w);
+ vir_VPM_WRITE(c, c->outputs[c->output_position_index + i],
+ &vpm_index);
+ emit_scaled_viewport_write(c, rcp_w, &vpm_index);
if (c->vs_key->per_vertex_point_size) {
- emit_point_size_write(c);
+ emit_point_size_write(c, &vpm_index);
/* emit_rcp_wc_write(c, rcp_w); */
}
/* XXX: Z-only rendering */
if (0)
- emit_zs_write(c, rcp_w);
+ emit_zs_write(c, rcp_w, &vpm_index);
} else {
- emit_scaled_viewport_write(c, rcp_w);
- emit_zs_write(c, rcp_w);
- emit_rcp_wc_write(c, rcp_w);
+ emit_scaled_viewport_write(c, rcp_w, &vpm_index);
+ emit_zs_write(c, rcp_w, &vpm_index);
+ emit_rcp_wc_write(c, rcp_w, &vpm_index);
if (c->vs_key->per_vertex_point_size)
- emit_point_size_write(c);
+ emit_point_size_write(c, &vpm_index);
}
for (int i = 0; i < c->vs_key->num_fs_inputs; i++) {
struct v3d_varying_slot output = c->output_slots[j];
if (!memcmp(&input, &output, sizeof(input))) {
- vir_VPM_WRITE(c, c->outputs[j]);
+ vir_VPM_WRITE(c, c->outputs[j],
+ &vpm_index);
break;
}
}
* this FS input.
*/
if (j == c->num_outputs)
- vir_VPM_WRITE(c, vir_uniform_f(c, 0.0));
+ vir_VPM_WRITE(c, vir_uniform_f(c, 0.0),
+ &vpm_index);
}
}
{
struct qreg vpm = vir_reg(QFILE_VPM, vpm_index);
+ if (c->devinfo->ver >= 40 ) {
+ return vir_LDVPMV_IN(c,
+ vir_uniform_ui(c,
+ (*num_components_queued)++));
+ }
+
if (*num_components_queued != 0) {
(*num_components_queued)--;
c->num_inputs++;
}
if (c->s->info.stage == MESA_SHADER_VERTEX) {
- assert(vpm_components_queued == 0);
- assert(num_components == 0);
+ if (c->devinfo->ver >= 40) {
+ assert(vpm_components_queued == num_components);
+ } else {
+ assert(vpm_components_queued == 0);
+ assert(num_components == 0);
+ }
}
}
static bool
qpu_accesses_peripheral(const struct v3d_qpu_instr *inst)
{
+ if (v3d_qpu_uses_vpm(inst))
+ return true;
+
if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
if (inst->alu.add.op != V3D_QPU_A_NOP &&
inst->alu.add.magic_write &&
return true;
}
- if (inst->alu.add.op == V3D_QPU_A_VPMSETUP)
- return true;
-
if (inst->alu.mul.op != V3D_QPU_M_NOP &&
inst->alu.mul.magic_write &&
qpu_magic_waddr_is_periph(inst->alu.mul.waddr)) {
VIR_A_ALU2(XOR)
VIR_A_ALU2(VADD)
VIR_A_ALU2(VSUB)
+VIR_A_ALU2(STVPMV)
VIR_A_ALU1(NOT)
VIR_A_ALU1(NEG)
VIR_A_ALU1(FLAPUSH)
VIR_A_ALU1(SETREVF)
VIR_A_ALU1(TIDX)
VIR_A_ALU1(EIDX)
+VIR_A_ALU1(LDVPMV_IN)
+VIR_A_ALU1(LDVPMV_OUT)
VIR_A_ALU0(FXCD)
VIR_A_ALU0(XCD)
return t;
}
-static inline void
-vir_VPM_WRITE(struct v3d_compile *c, struct qreg val)
-{
- vir_MOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_VPM), val);
-}
-
static inline struct qinst *
vir_NOP(struct v3d_compile *c)
{
case V3D_QPU_A_SETREVF:
case V3D_QPU_A_SETMSF:
case V3D_QPU_A_VPMSETUP:
+ case V3D_QPU_A_STVPMV:
+ case V3D_QPU_A_STVPMD:
+ case V3D_QPU_A_STVPMP:
return true;
default:
break;
vir_emit(struct v3d_compile *c, struct qinst *inst)
{
list_addtail(&inst->link, &c->cur_block->instructions);
-
- if (inst->dst.file == QFILE_MAGIC &&
- inst->dst.index == V3D_QPU_WADDR_VPM)
- c->num_vpm_writes++;
}
/* Updates inst to write to a new temporary, emits it, and notes the def. */
}
}
+ if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
+ switch (inst->qpu.alu.add.op) {
+ case V3D_QPU_A_LDVPMV_IN:
+ case V3D_QPU_A_LDVPMV_OUT:
+ case V3D_QPU_A_LDVPMD_IN:
+ case V3D_QPU_A_LDVPMD_OUT:
+ case V3D_QPU_A_LDVPMP:
+ case V3D_QPU_A_LDVPMG_IN:
+ case V3D_QPU_A_LDVPMG_OUT:
+ /* LDVPMs only store to temps (the MA flag
+ * decides whether the LDVPM is in or out)
+ */
+ assert(inst->dst.file == QFILE_TEMP);
+ class_bits[temp_to_node[inst->dst.index]] &=
+ CLASS_BIT_PHYS;
+ break;
+
+ default:
+ break;
+ }
+ }
+
if (inst->src[0].file == QFILE_REG) {
switch (inst->src[0].index) {
case 0:
[V3D_QPU_A_TMUWT] = "tmuwt",
[V3D_QPU_A_VPMSETUP] = "vpmsetup",
[V3D_QPU_A_VPMWT] = "vpmwt",
- [V3D_QPU_A_LDVPMV] = "ldvpmv",
- [V3D_QPU_A_LDVPMD] = "ldvpmd",
+ [V3D_QPU_A_LDVPMV_IN] = "ldvpmv_in",
+ [V3D_QPU_A_LDVPMV_OUT] = "ldvpmv_out",
+ [V3D_QPU_A_LDVPMD_IN] = "ldvpmd_in",
+ [V3D_QPU_A_LDVPMD_OUT] = "ldvpmd_out",
[V3D_QPU_A_LDVPMP] = "ldvpmp",
- [V3D_QPU_A_LDVPMG] = "ldvpmg",
+ [V3D_QPU_A_LDVPMG_IN] = "ldvpmg_in",
+ [V3D_QPU_A_LDVPMG_OUT] = "ldvpmg_out",
[V3D_QPU_A_FCMP] = "fcmp",
[V3D_QPU_A_VFMAX] = "vfmax",
[V3D_QPU_A_FROUND] = "fround",
[V3D_QPU_A_VPMSETUP] = D | A,
- [V3D_QPU_A_LDVPMV] = D | A,
- [V3D_QPU_A_LDVPMD] = D | A,
+ [V3D_QPU_A_LDVPMV_IN] = D | A,
+ [V3D_QPU_A_LDVPMV_OUT] = D | A,
+ [V3D_QPU_A_LDVPMD_IN] = D | A,
+ [V3D_QPU_A_LDVPMD_OUT] = D | A,
[V3D_QPU_A_LDVPMP] = D | A,
- [V3D_QPU_A_LDVPMG] = D | A | B,
+ [V3D_QPU_A_LDVPMG_IN] = D | A | B,
+ [V3D_QPU_A_LDVPMG_OUT] = D | A | B,
/* FIXME: MOVABSNEG */
waddr == V3D_QPU_WADDR_SYNCU);
}
+static bool
+v3d_qpu_add_op_uses_vpm(enum v3d_qpu_add_op op)
+{
+ switch (op) {
+ case V3D_QPU_A_VPMSETUP:
+ case V3D_QPU_A_VPMWT:
+ case V3D_QPU_A_LDVPMV_IN:
+ case V3D_QPU_A_LDVPMV_OUT:
+ case V3D_QPU_A_LDVPMD_IN:
+ case V3D_QPU_A_LDVPMD_OUT:
+ case V3D_QPU_A_LDVPMP:
+ case V3D_QPU_A_LDVPMG_IN:
+ case V3D_QPU_A_LDVPMG_OUT:
+ case V3D_QPU_A_STVPMV:
+ case V3D_QPU_A_STVPMD:
+ case V3D_QPU_A_STVPMP:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool
+v3d_qpu_uses_vpm(const struct v3d_qpu_instr *inst)
+{
+ if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
+ if (v3d_qpu_add_op_uses_vpm(inst->alu.add.op))
+ return true;
+
+ if (inst->alu.add.magic_write &&
+ v3d_qpu_magic_waddr_is_vpm(inst->alu.add.waddr)) {
+ return true;
+ }
+
+ if (inst->alu.mul.magic_write &&
+ v3d_qpu_magic_waddr_is_vpm(inst->alu.mul.waddr)) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
bool
v3d_qpu_writes_r3(const struct v3d_device_info *devinfo,
const struct v3d_qpu_instr *inst)
V3D_QPU_A_TMUWT,
V3D_QPU_A_VPMSETUP,
V3D_QPU_A_VPMWT,
- V3D_QPU_A_LDVPMV,
- V3D_QPU_A_LDVPMD,
+ V3D_QPU_A_LDVPMV_IN,
+ V3D_QPU_A_LDVPMV_OUT,
+ V3D_QPU_A_LDVPMD_IN,
+ V3D_QPU_A_LDVPMD_OUT,
V3D_QPU_A_LDVPMP,
- V3D_QPU_A_LDVPMG,
+ V3D_QPU_A_LDVPMG_IN,
+ V3D_QPU_A_LDVPMG_OUT,
V3D_QPU_A_FCMP,
V3D_QPU_A_VFMAX,
V3D_QPU_A_FROUND,
bool v3d_qpu_writes_r5(const struct v3d_device_info *devinfo,
const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
bool v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux);
+bool v3d_qpu_uses_vpm(const struct v3d_qpu_instr *inst);
bool v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo,
const struct v3d_qpu_sig *sig) ATTRIBUTE_CONST;
{ 187, 187, 1 << 2, 1 << 5, V3D_QPU_A_TMUWT },
{ 187, 187, 1 << 2, 1 << 6, V3D_QPU_A_VPMWT },
- { 187, 187, 1 << 3, ANYMUX, V3D_QPU_A_VPMSETUP },
+ { 187, 187, 1 << 3, ANYMUX, V3D_QPU_A_VPMSETUP, 33 },
+ { 188, 188, 1 << 0, ANYMUX, V3D_QPU_A_LDVPMV_IN, 40 },
+ { 188, 188, 1 << 1, ANYMUX, V3D_QPU_A_LDVPMD_IN, 40 },
+ { 188, 188, 1 << 2, ANYMUX, V3D_QPU_A_LDVPMP, 40 },
+ { 189, 189, ANYMUX, ANYMUX, V3D_QPU_A_LDVPMG_IN, 40 },
/* FIXME: MORE COMPLICATED */
/* { 190, 191, ANYMUX, ANYMUX, V3D_QPU_A_VFMOVABSNEGNAB }, */
instr->alu.add.a = mux_a;
instr->alu.add.b = mux_b;
instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
- instr->alu.add.magic_write = packed_inst & VC5_QPU_MA;
+
+ instr->alu.add.magic_write = false;
+ if (packed_inst & VC5_QPU_MA) {
+ switch (instr->alu.add.op) {
+ case V3D_QPU_A_LDVPMV_IN:
+ instr->alu.add.op = V3D_QPU_A_LDVPMV_OUT;
+ break;
+ case V3D_QPU_A_LDVPMD_IN:
+ instr->alu.add.op = V3D_QPU_A_LDVPMD_OUT;
+ break;
+ case V3D_QPU_A_LDVPMG_IN:
+ instr->alu.add.op = V3D_QPU_A_LDVPMG_OUT;
+ break;
+ default:
+ instr->alu.add.magic_write = true;
+ break;
+ }
+ }
return true;
}
if (nsrc < 1)
mux_a = ffs(desc->mux_a_mask) - 1;
+ bool no_magic_write = false;
+
switch (instr->alu.add.op) {
case V3D_QPU_A_STVPMV:
waddr = 0;
+ no_magic_write = true;
break;
case V3D_QPU_A_STVPMD:
waddr = 1;
+ no_magic_write = true;
break;
case V3D_QPU_A_STVPMP:
waddr = 2;
+ no_magic_write = true;
+ break;
+
+ case V3D_QPU_A_LDVPMV_IN:
+ case V3D_QPU_A_LDVPMD_IN:
+ case V3D_QPU_A_LDVPMP:
+ case V3D_QPU_A_LDVPMG_IN:
+ assert(!instr->alu.add.magic_write);
break;
+
+ case V3D_QPU_A_LDVPMV_OUT:
+ case V3D_QPU_A_LDVPMD_OUT:
+ case V3D_QPU_A_LDVPMG_OUT:
+ assert(!instr->alu.add.magic_write);
+ *packed_instr |= VC5_QPU_MA;
+ break;
+
default:
break;
}
*packed_instr |= QPU_SET_FIELD(mux_b, VC5_QPU_ADD_B);
*packed_instr |= QPU_SET_FIELD(opcode, VC5_QPU_OP_ADD);
*packed_instr |= QPU_SET_FIELD(waddr, V3D_QPU_WADDR_A);
- if (instr->alu.add.magic_write)
+ if (instr->alu.add.magic_write && !no_magic_write)
*packed_instr |= VC5_QPU_MA;
return true;
{ 41, 0xdb3048eb9d533780ull, "fmax rf43.l, r3.h, rf30; fmul rf35.h, r4, r2.l; ldunifarf.r1" },
{ 41, 0x733620471e6ce700ull, "faddnf rf7.l, rf28.h, r1.l; fmul r1, r3.h, r3.abs; ldunifarf.rsqrt2" },
{ 41, 0x9c094adef634b000ull, "ffloor.ifb rf30.l, r3; fmul.pushz rf43.l, r5, r1.h" },
+
+ /* v4.1 opcodes */
+ { 41, 0x3de020c7bdfd200dull, "ldvpmg_in rf7, r2, r2; mov r3, 13" },
+ { 41, 0x3de02040f8ff7201ull, "stvpmv 1, rf8 ; mov r1, 1" },
};
static void