From bdcc04f3a94d6584061ac4779e91526db63ad83d Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Sun, 30 Sep 2018 08:00:09 +0100 Subject: [PATCH] add sv support for zeroing predication in dest register bit of a major rework: * access to the "unpredicated" (non-zero-hacked) register was needed * therefore all rd/rs1-3/rvc_xxx functions had to have _ variants * the underscored variants are not predicated * this in turn meant that the offset for each register was wrong as it is incremented *after* being checked * therefore a newoffs had to be added * and the reset_cache function copies the newoffs values bit of a mess but it works: this is a state machine after all... --- riscv/insn_template_sv.cc | 14 +++++--- riscv/sv.cc | 11 ++---- riscv/sv_decode.h | 70 +++++++++++++++++++++------------------ 3 files changed, 51 insertions(+), 44 deletions(-) diff --git a/riscv/insn_template_sv.cc b/riscv/insn_template_sv.cc index 97ca8cb..9cd5a41 100644 --- a/riscv/insn_template_sv.cc +++ b/riscv/insn_template_sv.cc @@ -18,12 +18,12 @@ reg_t FN(processor_t* p, insn_t s_insn, reg_t pc) // need to know if register is used as float or int. // REGS_PATTERN is generated by id_regs.py (per opcode) unsigned int floatintmap = REGS_PATTERN; - reg_t predicate = ~0x0; + reg_t dest_pred = ~0x0; sv_insn_t insn(p, bits, floatintmap, - predicate, predicate, predicate, predicate); + dest_pred, dest_pred, dest_pred, dest_pred); bool zeroing; #if defined(USING_REG_RD) || defined(USING_REG_FRD) - predicate = insn.predicate(s_insn.rd(), floatintmap & REG_RD, zeroing); + dest_pred = insn.predicate(s_insn.rd(), floatintmap & REG_RD, zeroing); #endif // identify which regs have had their CSR entries set as vectorised. // really could do with a macro for-loop here... oh well... @@ -44,11 +44,17 @@ reg_t FN(processor_t* p, insn_t s_insn, reg_t pc) { insn.reset_vloop_check(); #include INCLUDEFILE +#if defined(USING_REG_RD) || defined(USING_REG_FRD) + if (zeroing && ((dest_pred & (1< 1) { fprintf(stderr, "reg %s %x vloop %d vlen %d stop %d pred %lx\n", xstr(INSN), INSNCODE, voffs, vlen, insn.stop_vloop(), - predicate & (1<isvec) // scalar { - // reg remapped even as scalar vloop_continue = false; - return predicated(reg, voffs, pred); // returns x0 if pred bit false + return reg; } // aaand now, as it's a "vector", FINALLY we can add on the loop-offset @@ -102,14 +101,10 @@ uint64_t sv_insn_t::remap(uint64_t reg, bool intreg, int &voffs, uint64_t &pred) // and, at last, we have "parallelism" a la contiguous registers. reg += voffs; // wheww :) - // before returning, put the register through the predication wringer. - // this will return x0 if predication is false - reg = predicated(reg, voffs, pred); - // however... before returning, we increment the loop-offset for // this particular register, so that on the next loop the next // contiguous register will be used. - voffs += 1; + newoffs = voffs + 1; return reg; } diff --git a/riscv/sv_decode.h b/riscv/sv_decode.h index 039c439..3999d06 100644 --- a/riscv/sv_decode.h +++ b/riscv/sv_decode.h @@ -27,31 +27,34 @@ public: cached_rs2(0xff), cached_rs3(0xff), offs_rd(0), offs_rs1(0), offs_rs2(0), offs_rs3(0), + new_offs_rd(0), new_offs_rs1(0), + new_offs_rs2(0), new_offs_rs3(0), prd(p_rd), prs1(p_rs1), prs2(p_rs2), prs3(p_rs3) {} - uint64_t rd () - { return _remap(insn_t::rd (), fimap & REG_RD , - offs_rd , cached_rd, prd); } - uint64_t rs1() - { return _remap(insn_t::rs1(), fimap & REG_RS1, - offs_rs1, cached_rs1, prs1); } - uint64_t rs2() - { return _remap(insn_t::rs2(), fimap & REG_RS2, - offs_rs2, cached_rs2, prs2); } - uint64_t rs3() - { return _remap(insn_t::rs3(), fimap & REG_RS3, - offs_rs3, cached_rs3, prs3); } - uint64_t rvc_rs1 () - { return _remap(insn_t::rvc_rs1(), fimap & REG_RVC_RS1, - offs_rs1, cached_rs1, prs1); } - uint64_t rvc_rs1s () - { return _remap(insn_t::rvc_rs1s(), fimap & REG_RVC_RS1S, - offs_rs1, cached_rs1, prs1); } - uint64_t rvc_rs2 () - { return _remap(insn_t::rvc_rs2(), fimap & REG_RVC_RS2, - offs_rs2, cached_rs2, prs2); } - uint64_t rvc_rs2s () - { return _remap(insn_t::rvc_rs2s(), fimap & REG_RVC_RS2S, - offs_rs2, cached_rs2, prs2); } + uint64_t rd () { return predicated(_rd (), offs_rd , prd); } + uint64_t rs1() { return predicated(_rs1(), offs_rs1, prs1); } + uint64_t rs2() { return predicated(_rs2(), offs_rs2, prs2); } + uint64_t rs3() { return predicated(_rs3(), offs_rs3, prs3); } + uint64_t rvc_rs1 () { return predicated(_rvc_rs1 (), offs_rs1, prs1); } + uint64_t rvc_rs1s() { return predicated(_rvc_rs1s(), offs_rs1, prs1); } + uint64_t rvc_rs2 () { return predicated(_rvc_rs2 (), offs_rs2, prs2); } + uint64_t rvc_rs2s() { return predicated(_rvc_rs2s(), offs_rs2, prs2); } + + uint64_t _rd () { return _remap(insn_t::rd (), fimap & REG_RD , + offs_rd , cached_rd, new_offs_rd); } + uint64_t _rs1() { return _remap(insn_t::rs1(), fimap & REG_RS1, + offs_rs1, cached_rs1, new_offs_rs1); } + uint64_t _rs2() { return _remap(insn_t::rs2(), fimap & REG_RS2, + offs_rs2, cached_rs2, new_offs_rs2); } + uint64_t _rs3() { return _remap(insn_t::rs3(), fimap & REG_RS3, + offs_rs3, cached_rs3, new_offs_rs3); } + uint64_t _rvc_rs1 () { return _remap(insn_t::rvc_rs1(), fimap & REG_RVC_RS1, + offs_rs1, cached_rs1, new_offs_rs1); } + uint64_t _rvc_rs1s() { return _remap(insn_t::rvc_rs1s(), fimap & REG_RVC_RS1S, + offs_rs1, cached_rs1, new_offs_rs1); } + uint64_t _rvc_rs2 () { return _remap(insn_t::rvc_rs2(), fimap & REG_RVC_RS2, + offs_rs2, cached_rs2, new_offs_rs2); } + uint64_t _rvc_rs2s() { return _remap(insn_t::rvc_rs2s(), fimap & REG_RVC_RS2S, + offs_rs2, cached_rs2, new_offs_rs2); } void reset_caches(void) { @@ -59,6 +62,10 @@ public: cached_rs1 = 0xff; cached_rs2 = 0xff; cached_rs3 = 0xff; + offs_rd = new_offs_rd; + offs_rs1 = new_offs_rs1; + offs_rs2 = new_offs_rs2; + offs_rs3 = new_offs_rs3; } bool sv_check_reg(bool intreg, uint64_t reg); @@ -81,6 +88,10 @@ private: int offs_rs1; int offs_rs2; int offs_rs3; + int new_offs_rd; + int new_offs_rs1; + int new_offs_rs2; + int new_offs_rs3; uint64_t &prd; uint64_t &prs1; uint64_t &prs2; @@ -88,17 +99,17 @@ private: // remaps the register through the lookup table. // will need to take the current loop index/offset somehow - uint64_t remap(uint64_t reg, bool isint, int &offs, uint64_t &pred); + uint64_t remap(uint64_t reg, bool isint, int &offs, int &newoffs); // cached version of remap: if remap is called multiple times // by an emulated instruction it would increment the loop offset // before it's supposed to. uint64_t _remap(uint64_t reg, bool isint, int &offs, - uint64_t &cached, uint64_t &pred) + uint64_t &cached, int &newoffs) { if (cached == 0xff) { - cached = remap(reg, isint, offs, pred); + cached = remap(reg, isint, offs, newoffs); } else { @@ -106,11 +117,6 @@ private: { vloop_continue = false; } - reg = predicated(reg, offs, pred); - if (reg == 0) - { - return 0; - } } return cached; } -- 2.30.2