// need to know if register is used as float or int.
// REGS_PATTERN is generated by id_regs.py (per opcode)
unsigned int floatintmap = REGS_PATTERN;
- reg_t predicate = ~0x0;
+ reg_t dest_pred = ~0x0;
sv_insn_t insn(p, bits, floatintmap,
- predicate, predicate, predicate, predicate);
+ dest_pred, dest_pred, dest_pred, dest_pred);
bool zeroing;
#if defined(USING_REG_RD) || defined(USING_REG_FRD)
- predicate = insn.predicate(s_insn.rd(), floatintmap & REG_RD, zeroing);
+ dest_pred = insn.predicate(s_insn.rd(), floatintmap & REG_RD, zeroing);
#endif
// identify which regs have had their CSR entries set as vectorised.
// really could do with a macro for-loop here... oh well...
{
insn.reset_vloop_check();
#include INCLUDEFILE
+#if defined(USING_REG_RD) || defined(USING_REG_FRD)
+ if (zeroing && ((dest_pred & (1<<voffs)) == 0))
+ {
+ WRITE_REG(insn._rd(), 0);
+ }
+#endif
if (vlen > 1)
{
fprintf(stderr, "reg %s %x vloop %d vlen %d stop %d pred %lx\n",
xstr(INSN), INSNCODE, voffs, vlen, insn.stop_vloop(),
- predicate & (1<<voffs));
+ dest_pred & (1<<voffs));
}
insn.reset_caches(); // ready to increment offsets in next iteration
if (insn.stop_vloop())
* of SV. it's "supposed" to "just" be a vectorisation API. it isn't:
* it's quite a bit more.
*/
-uint64_t sv_insn_t::remap(uint64_t reg, bool intreg, int &voffs, uint64_t &pred)
+uint64_t sv_insn_t::remap(uint64_t reg, bool intreg, int &voffs, int &newoffs)
{
// okaay so first determine which map to use. intreg is passed
// in (ultimately) from id_regs.py's examination of the use of
// we return the re-mapped register...
if (!r->isvec) // scalar
{
- // reg remapped even as scalar
vloop_continue = false;
- return predicated(reg, voffs, pred); // returns x0 if pred bit false
+ return reg;
}
// aaand now, as it's a "vector", FINALLY we can add on the loop-offset
// and, at last, we have "parallelism" a la contiguous registers.
reg += voffs; // wheww :)
- // before returning, put the register through the predication wringer.
- // this will return x0 if predication is false
- reg = predicated(reg, voffs, pred);
-
// however... before returning, we increment the loop-offset for
// this particular register, so that on the next loop the next
// contiguous register will be used.
- voffs += 1;
+ newoffs = voffs + 1;
return reg;
}
cached_rs2(0xff), cached_rs3(0xff),
offs_rd(0), offs_rs1(0),
offs_rs2(0), offs_rs3(0),
+ new_offs_rd(0), new_offs_rs1(0),
+ new_offs_rs2(0), new_offs_rs3(0),
prd(p_rd), prs1(p_rs1), prs2(p_rs2), prs3(p_rs3) {}
- uint64_t rd ()
- { return _remap(insn_t::rd (), fimap & REG_RD ,
- offs_rd , cached_rd, prd); }
- uint64_t rs1()
- { return _remap(insn_t::rs1(), fimap & REG_RS1,
- offs_rs1, cached_rs1, prs1); }
- uint64_t rs2()
- { return _remap(insn_t::rs2(), fimap & REG_RS2,
- offs_rs2, cached_rs2, prs2); }
- uint64_t rs3()
- { return _remap(insn_t::rs3(), fimap & REG_RS3,
- offs_rs3, cached_rs3, prs3); }
- uint64_t rvc_rs1 ()
- { return _remap(insn_t::rvc_rs1(), fimap & REG_RVC_RS1,
- offs_rs1, cached_rs1, prs1); }
- uint64_t rvc_rs1s ()
- { return _remap(insn_t::rvc_rs1s(), fimap & REG_RVC_RS1S,
- offs_rs1, cached_rs1, prs1); }
- uint64_t rvc_rs2 ()
- { return _remap(insn_t::rvc_rs2(), fimap & REG_RVC_RS2,
- offs_rs2, cached_rs2, prs2); }
- uint64_t rvc_rs2s ()
- { return _remap(insn_t::rvc_rs2s(), fimap & REG_RVC_RS2S,
- offs_rs2, cached_rs2, prs2); }
+ uint64_t rd () { return predicated(_rd (), offs_rd , prd); }
+ uint64_t rs1() { return predicated(_rs1(), offs_rs1, prs1); }
+ uint64_t rs2() { return predicated(_rs2(), offs_rs2, prs2); }
+ uint64_t rs3() { return predicated(_rs3(), offs_rs3, prs3); }
+ uint64_t rvc_rs1 () { return predicated(_rvc_rs1 (), offs_rs1, prs1); }
+ uint64_t rvc_rs1s() { return predicated(_rvc_rs1s(), offs_rs1, prs1); }
+ uint64_t rvc_rs2 () { return predicated(_rvc_rs2 (), offs_rs2, prs2); }
+ uint64_t rvc_rs2s() { return predicated(_rvc_rs2s(), offs_rs2, prs2); }
+
+ uint64_t _rd () { return _remap(insn_t::rd (), fimap & REG_RD ,
+ offs_rd , cached_rd, new_offs_rd); }
+ uint64_t _rs1() { return _remap(insn_t::rs1(), fimap & REG_RS1,
+ offs_rs1, cached_rs1, new_offs_rs1); }
+ uint64_t _rs2() { return _remap(insn_t::rs2(), fimap & REG_RS2,
+ offs_rs2, cached_rs2, new_offs_rs2); }
+ uint64_t _rs3() { return _remap(insn_t::rs3(), fimap & REG_RS3,
+ offs_rs3, cached_rs3, new_offs_rs3); }
+ uint64_t _rvc_rs1 () { return _remap(insn_t::rvc_rs1(), fimap & REG_RVC_RS1,
+ offs_rs1, cached_rs1, new_offs_rs1); }
+ uint64_t _rvc_rs1s() { return _remap(insn_t::rvc_rs1s(), fimap & REG_RVC_RS1S,
+ offs_rs1, cached_rs1, new_offs_rs1); }
+ uint64_t _rvc_rs2 () { return _remap(insn_t::rvc_rs2(), fimap & REG_RVC_RS2,
+ offs_rs2, cached_rs2, new_offs_rs2); }
+ uint64_t _rvc_rs2s() { return _remap(insn_t::rvc_rs2s(), fimap & REG_RVC_RS2S,
+ offs_rs2, cached_rs2, new_offs_rs2); }
void reset_caches(void)
{
cached_rs1 = 0xff;
cached_rs2 = 0xff;
cached_rs3 = 0xff;
+ offs_rd = new_offs_rd;
+ offs_rs1 = new_offs_rs1;
+ offs_rs2 = new_offs_rs2;
+ offs_rs3 = new_offs_rs3;
}
bool sv_check_reg(bool intreg, uint64_t reg);
int offs_rs1;
int offs_rs2;
int offs_rs3;
+ int new_offs_rd;
+ int new_offs_rs1;
+ int new_offs_rs2;
+ int new_offs_rs3;
uint64_t &prd;
uint64_t &prs1;
uint64_t &prs2;
// remaps the register through the lookup table.
// will need to take the current loop index/offset somehow
- uint64_t remap(uint64_t reg, bool isint, int &offs, uint64_t &pred);
+ uint64_t remap(uint64_t reg, bool isint, int &offs, int &newoffs);
// cached version of remap: if remap is called multiple times
// by an emulated instruction it would increment the loop offset
// before it's supposed to.
uint64_t _remap(uint64_t reg, bool isint, int &offs,
- uint64_t &cached, uint64_t &pred)
+ uint64_t &cached, int &newoffs)
{
if (cached == 0xff)
{
- cached = remap(reg, isint, offs, pred);
+ cached = remap(reg, isint, offs, newoffs);
}
else
{
{
vloop_continue = false;
}
- reg = predicated(reg, offs, pred);
- if (reg == 0)
- {
- return 0;
- }
}
return cached;
}