// need to know if register is used as float or int.
// REGS_PATTERN is generated by id_regs.py (per opcode)
unsigned int floatintmap = REGS_PATTERN;
- reg_t predicate = 0;
+ reg_t predicate = ~0x0;
sv_insn_t insn(p, bits, floatintmap,
predicate, predicate, predicate, predicate);
+ bool zeroing;
+#if defined(USING_REG_RD) || defined(USING_REG_FRD)
+ predicate = insn.predicate(s_insn.rd(), floatintmap & REG_RD, zeroing);
+#endif
// identify which regs have had their CSR entries set as vectorised.
// really could do with a macro for-loop here... oh well...
// integer ops, RD, RS1, RS2, RS3 (use sv_int_tb)
#include INCLUDEFILE
if (vlen > 1)
{
- fprintf(stderr, "reg %s %x vloop %d vlen %d stop %d\n",
- xstr(INSN), INSNCODE, voffs, vlen, insn.stop_vloop());
+ fprintf(stderr, "reg %s %x vloop %d vlen %d stop %d pred %lx\n",
+ xstr(INSN), INSNCODE, voffs, vlen, insn.stop_vloop(),
+ predicate & (1<<voffs));
}
insn.reset_caches(); // ready to increment offsets in next iteration
if (insn.stop_vloop())
* of SV. it's "supposed" to "just" be a vectorisation API. it isn't:
* it's quite a bit more.
*/
-uint64_t sv_insn_t::remap(uint64_t reg, bool intreg, int &voffs)
+uint64_t sv_insn_t::remap(uint64_t reg, bool intreg, int &voffs, uint64_t &pred)
{
// okaay so first determine which map to use. intreg is passed
// in (ultimately) from id_regs.py's examination of the use of
// we return the re-mapped register...
if (!r->isvec) // scalar
{
+ // reg remapped even as scalar
vloop_continue = false;
- return reg; // ... remapped at this point...
+ return predicated(reg, voffs, pred); // returns x0 if pred bit false
}
// aaand now, as it's a "vector", FINALLY we can add on the loop-offset
// and, at last, we have "parallelism" a la contiguous registers.
reg += voffs; // wheww :)
+ // before returning, put the register through the predication wringer.
+ // this will return x0 if predication is false
+ reg = predicated(reg, voffs, pred);
+
// however... before returning, we increment the loop-offset for
// this particular register, so that on the next loop the next
// contiguous register will be used.
}
return predicate;
}
+
+uint64_t sv_insn_t::predicated(uint64_t reg, int offs, uint64_t pred)
+{
+ if (pred & (1<<offs))
+ {
+ return reg;
+ }
+ fprintf(stderr, "predication %ld %d %lx\n", reg, offs, pred);
+ return 0;
+}
+
offs_rs2(0), offs_rs3(0),
prd(p_rd), prs1(p_rs1), prs2(p_rs2), prs3(p_rs3) {}
uint64_t rd ()
- { return _remap(insn_t::rd (), fimap & REG_RD , offs_rd , cached_rd); }
+ { return _remap(insn_t::rd (), fimap & REG_RD ,
+ offs_rd , cached_rd, prd); }
uint64_t rs1()
- { return _remap(insn_t::rs1(), fimap & REG_RS1, offs_rs1, cached_rs1); }
+ { return _remap(insn_t::rs1(), fimap & REG_RS1,
+ offs_rs1, cached_rs1, prs1); }
uint64_t rs2()
- { return _remap(insn_t::rs2(), fimap & REG_RS2, offs_rs2, cached_rs2); }
+ { return _remap(insn_t::rs2(), fimap & REG_RS2,
+ offs_rs2, cached_rs2, prs2); }
uint64_t rs3()
- { return _remap(insn_t::rs3(), fimap & REG_RS3, offs_rs3, cached_rs3); }
+ { return _remap(insn_t::rs3(), fimap & REG_RS3,
+ offs_rs3, cached_rs3, prs3); }
uint64_t rvc_rs1 ()
{ return _remap(insn_t::rvc_rs1(), fimap & REG_RVC_RS1,
- offs_rs1, cached_rs1); }
+ offs_rs1, cached_rs1, prs1); }
uint64_t rvc_rs1s ()
{ return _remap(insn_t::rvc_rs1s(), fimap & REG_RVC_RS1S,
- offs_rs1, cached_rs1); }
+ offs_rs1, cached_rs1, prs1); }
uint64_t rvc_rs2 ()
{ return _remap(insn_t::rvc_rs2(), fimap & REG_RVC_RS2,
- offs_rs2, cached_rs2); }
+ offs_rs2, cached_rs2, prs2); }
uint64_t rvc_rs2s ()
{ return _remap(insn_t::rvc_rs2s(), fimap & REG_RVC_RS2S,
- offs_rs2, cached_rs2); }
+ offs_rs2, cached_rs2, prs2); }
void reset_caches(void)
{
int offs_rs1;
int offs_rs2;
int offs_rs3;
- uint64_t prd;
- uint64_t prs1;
- uint64_t prs2;
- uint64_t prs3;
+ uint64_t &prd;
+ uint64_t &prs1;
+ uint64_t &prs2;
+ uint64_t &prs3;
+
// remaps the register through the lookup table.
// will need to take the current loop index/offset somehow
- uint64_t remap(uint64_t reg, bool isint, int &offs);
+ uint64_t remap(uint64_t reg, bool isint, int &offs, uint64_t &pred);
// cached version of remap: if remap is called multiple times
// by an emulated instruction it would increment the loop offset
// before it's supposed to.
- uint64_t _remap(uint64_t reg, bool isint, int &offs, uint64_t &cached)
+ uint64_t _remap(uint64_t reg, bool isint, int &offs,
+ uint64_t &cached, uint64_t &pred)
{
if (cached == 0xff)
{
- cached = remap(reg, isint, offs);
+ cached = remap(reg, isint, offs, pred);
}
- else if (!sv_check_reg(isint, reg))
+ else
{
- vloop_continue = false;
+ if (!sv_check_reg(isint, reg))
+ {
+ vloop_continue = false;
+ }
+ reg = predicated(reg, offs, pred);
+ if (reg == 0)
+ {
+ return 0;
+ }
}
return cached;
}
+
+ uint64_t predicated(uint64_t reg, int offs, uint64_t pred);
};
#endif