From e2fd8dfd141a596242c7c71311c2daebae29d9cb Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Wed, 26 Sep 2018 16:09:42 +0100 Subject: [PATCH] remembered that the use of sv registers have to be loop-incremented separately the SV parallelism loop has to respect whether each *individual* register is a vector or a scalar. --- riscv/insn_template_sv.cc | 5 ++--- riscv/sv.cc | 10 ++++++++-- riscv/sv_decode.h | 21 +++++++++++++-------- 3 files changed, 23 insertions(+), 13 deletions(-) diff --git a/riscv/insn_template_sv.cc b/riscv/insn_template_sv.cc index 222cde2..b8bad7a 100644 --- a/riscv/insn_template_sv.cc +++ b/riscv/insn_template_sv.cc @@ -11,12 +11,11 @@ reg_t FN(processor_t* p, insn_t s_insn, reg_t pc) // any registers that are marked as "vectorised" insn_bits_t bits = s_insn.bits(); #ifndef USING_NOREGS - int voffs = 0; int vlen = 1; // need to know if register is used as float or int. // REGS_PATTERN is generated by id_regs.py (per opcode) unsigned int floatintmap = REGS_PATTERN; - sv_insn_t insn(bits, voffs, floatintmap); + sv_insn_t insn(bits, floatintmap); bool vectorop = false; reg_t predicate = 0; // identify which regs have had their CSR entries set as vectorised. @@ -54,7 +53,7 @@ reg_t FN(processor_t* p, insn_t s_insn, reg_t pc) { // TODO: vlen = p->CSR(SIMPLEV_VL); // something like that... } - for (; voffs < vlen; voffs++) + for (int voffs=0; voffs < vlen; voffs++) { #include INCLUDEFILE } diff --git a/riscv/sv.cc b/riscv/sv.cc index e665f69..6be5872 100644 --- a/riscv/sv.cc +++ b/riscv/sv.cc @@ -45,7 +45,7 @@ bool sv_check_reg(bool intreg, uint64_t reg) * of SV. it's "supposed" to "just" be a vectorisation API. it isn't: * it's quite a bit more. */ -uint64_t sv_insn_t::remap(uint64_t reg, bool intreg) +uint64_t sv_insn_t::remap(uint64_t reg, bool intreg, int &voffs) { // okaay so first determine which map to use. intreg is passed // in (ultimately) from id_regs.py's examination of the use of @@ -83,6 +83,12 @@ uint64_t sv_insn_t::remap(uint64_t reg, bool intreg) // aaand now, as it's a "vector", FINALLY we can add on the loop-offset // which was passed in to the sv_insn_t constructor (by reference) // and, at last, we have "parallelism" a la contiguous registers. - return reg + this->voffs; // wheww :) + reg += voffs; // wheww :) + + // however... before returning, we increment the loop-offset for + // this particular register, so that on the next loop the next + // contiguous register will be used. + voffs += 1; + return reg; } diff --git a/riscv/sv_decode.h b/riscv/sv_decode.h index 7faf48a..49a1678 100644 --- a/riscv/sv_decode.h +++ b/riscv/sv_decode.h @@ -14,18 +14,23 @@ class sv_insn_t: public insn_t { public: - sv_insn_t(insn_bits_t bits, int& v, unsigned int f) : - insn_t(bits), voffs(v), fimap(f) {} - uint64_t rd () { return remap(insn_t::rd (), fimap & REG_RD); } - uint64_t rs1() { return remap(insn_t::rs1(), fimap & REG_RS1); } - uint64_t rs2() { return remap(insn_t::rs2(), fimap & REG_RS2); } - uint64_t rs3() { return remap(insn_t::rs3(), fimap & REG_RS3); } + sv_insn_t(insn_bits_t bits, unsigned int f) : + insn_t(bits), fimap(f), + offs_rd(0), offs_rs1(0), + offs_rs2(0), offs_rs3(0) {} + uint64_t rd () { return remap(insn_t::rd (), fimap & REG_RD , offs_rd); } + uint64_t rs1() { return remap(insn_t::rs1(), fimap & REG_RS1, offs_rs1); } + uint64_t rs2() { return remap(insn_t::rs2(), fimap & REG_RS2, offs_rs2); } + uint64_t rs3() { return remap(insn_t::rs3(), fimap & REG_RS3, offs_rs3); } private: - int &voffs; unsigned int fimap; + int offs_rd; + int offs_rs1; + int offs_rs2; + int offs_rs3; // remaps the register through the lookup table. // will need to take the current loop index/offset somehow - uint64_t remap(uint64_t reg, bool isint); + uint64_t remap(uint64_t reg, bool isint, int &offs); }; #endif -- 2.30.2