struct weight_vector_type {
weight_vector_type() : v(NULL), size(0) {}
- weight_vector_type(unsigned n) :
- v(new vector_type[DIV_ROUND_UP(n, vector_width)]()),
- size(n) {}
+ weight_vector_type(unsigned n) : v(alloc(n)), size(n) {}
weight_vector_type(const weight_vector_type &u) :
- v(new vector_type[DIV_ROUND_UP(u.size, vector_width)]()),
- size(u.size)
+ v(alloc(u.size)), size(u.size)
{
memcpy(v, u.v,
DIV_ROUND_UP(u.size, vector_width) * sizeof(vector_type));
~weight_vector_type()
{
- delete[] v;
+ free(v);
}
weight_vector_type &
vector_type *v;
unsigned size;
+
+ private:
+ static vector_type *
+ alloc(unsigned n)
+ {
+ const unsigned align = MAX2(sizeof(void *), __alignof__(vector_type));
+ const unsigned size = DIV_ROUND_UP(n, vector_width) * sizeof(vector_type);
+ void *p;
+ if (posix_memalign(&p, align, size))
+ return NULL;
+ memset(p, 0, size);
+ return reinterpret_cast<vector_type *>(p);
+ }
};
/**
for (unsigned reg = 0; reg < 2; reg++)
constrained[p.atom_of_reg(reg)] = true;
- /* Assume that anything referenced via fixed GRFs is baked into the
- * hardware's fixed-function logic and may be unsafe to move around.
- * Also take into account the source GRF restrictions of EOT
- * send-message instructions.
+ /* At Intel Broadwell PRM, vol 07, section "Instruction Set Reference",
+ * subsection "EUISA Instructions", Send Message (page 990):
+ *
+ * "r127 must not be used for return address when there is a src and
+ * dest overlap in send instruction."
+ *
+ * Register allocation ensures that, so don't move 127 around to avoid
+ * breaking that property.
*/
+ if (v->devinfo->gen >= 8)
+ constrained[p.atom_of_reg(127)] = true;
+
foreach_block_and_inst(block, fs_inst, inst, v->cfg) {
+ /* Assume that anything referenced via fixed GRFs is baked into the
+ * hardware's fixed-function logic and may be unsafe to move around.
+ * Also take into account the source GRF restrictions of EOT
+ * send-message instructions.
+ */
if (inst->dst.file == FIXED_GRF)
constrained[p.atom_of_reg(reg_of(inst->dst))] = true;
(is_grf(inst->src[i]) && inst->eot))
constrained[p.atom_of_reg(reg_of(inst->src[i]))] = true;
}
+
+ /* Preserve the original allocation of VGRFs used by the barycentric
+ * source of the LINTERP instruction on Gen6, since pair-aligned
+ * barycentrics allow the PLN instruction to be used.
+ */
+ if (v->devinfo->has_pln && v->devinfo->gen <= 6 &&
+ inst->opcode == FS_OPCODE_LINTERP)
+ constrained[p.atom_of_reg(reg_of(inst->src[0]))] = true;
+
+ /* The location of the Gen7 MRF hack registers is hard-coded in the
+ * rest of the compiler back-end. Don't attempt to move them around.
+ */
+ if (v->devinfo->gen >= 7) {
+ assert(inst->dst.file != MRF);
+
+ for (unsigned i = 0; i < inst->implied_mrf_writes(); i++) {
+ const unsigned reg = GEN7_MRF_HACK_START + inst->base_mrf + i;
+ constrained[p.atom_of_reg(reg)] = true;
+ }
+ }
}
return constrained;
delete[] constrained;
return true;
}
+
+/**
+ * Return whether the instruction incurs GRF bank conflict cycles.
+ *
+ * Note that this is only accurate after register allocation because otherwise
+ * we don't know which bank each VGRF is going to end up aligned to.
+ */
+bool
+has_bank_conflict(const gen_device_info *devinfo, const fs_inst *inst)
+{
+ return inst->is_3src(devinfo) &&
+ is_grf(inst->src[1]) && is_grf(inst->src[2]) &&
+ bank_of(reg_of(inst->src[1])) == bank_of(reg_of(inst->src[2])) &&
+ !is_conflict_optimized_out(devinfo, inst);
+}