From 6613cfeb94dbfa8a43c17594829c12d4aca0aad4 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Sun, 30 Sep 2018 05:13:50 +0100 Subject: [PATCH] use an alternative logic for detecting scalar / loop-end instead of pre-checking do the check for "all-scalar" during the first loop iteration i.e. when registers are first accessed --- riscv/insn_template_sv.cc | 68 +++++++++------------------------------ riscv/sv.cc | 2 ++ riscv/sv_decode.h | 10 +++++- 3 files changed, 27 insertions(+), 53 deletions(-) diff --git a/riscv/insn_template_sv.cc b/riscv/insn_template_sv.cc index 9533e83..45fa97d 100644 --- a/riscv/insn_template_sv.cc +++ b/riscv/insn_template_sv.cc @@ -14,7 +14,7 @@ reg_t FN(processor_t* p, insn_t s_insn, reg_t pc) // any registers that are marked as "vectorised" insn_bits_t bits = s_insn.bits(); #ifndef USING_NOREGS - int vlen = 1; + int vlen = p->get_state()->vl; // need to know if register is used as float or int. // REGS_PATTERN is generated by id_regs.py (per opcode) unsigned int floatintmap = REGS_PATTERN; @@ -23,68 +23,32 @@ reg_t FN(processor_t* p, insn_t s_insn, reg_t pc) // identify which regs have had their CSR entries set as vectorised. // really could do with a macro for-loop here... oh well... // integer ops, RD, RS1, RS2, RS3 (use sv_int_tb) - bool vectorop = -#ifdef USING_REG_RD - insn.sv_check_reg(true, s_insn.rd()) | -#endif -#ifdef USING_REG_RS1 - insn.sv_check_reg(true, s_insn.rs1()) | -#endif -#ifdef USING_REG_RS2 - insn.sv_check_reg(true, s_insn.rs2()) | -#endif -#ifdef USING_REG_RS2 - insn.sv_check_reg(true, s_insn.rs3()) | -#endif -#ifdef USING_REG_RVC_RS1 - insn.sv_check_reg(true, s_insn.rvc_rs1()) | -#endif -#ifdef USING_REG_RVC_RS1S - insn.sv_check_reg(true, s_insn.rvc_rs1s()) | -#endif -#ifdef USING_REG_RVC_RS2 - insn.sv_check_reg(true, s_insn.rvc_rs2()) | -#endif -#ifdef USING_REG_RVC_RS2S - insn.sv_check_reg(true, s_insn.rvc_rs2s()) | -#endif - // fp ops, RD, RS1, RS2, RS3 (use sv_fp_tb) -#ifdef USING_REG_FRD - insn.sv_check_reg(false, s_insn.rd()) | -#endif -#ifdef USING_REG_FRS1 - insn.sv_check_reg(false, s_insn.rs1()) | -#endif -#ifdef USING_REG_FRS2 - insn.sv_check_reg(false, s_insn.rs2()) | -#endif -#ifdef USING_REG_FRS3 - insn.sv_check_reg(false, s_insn.rs3()) | -#endif -#ifdef USING_REG_RVC_FRS2 - insn.sv_check_reg(false, s_insn.rvc_rs2()) | -#endif -#ifdef USING_REG_RVC_FRS2S - insn.sv_check_reg(false, s_insn.rvc_rs2s()) | -#endif - false; // save a few cycles by |ing the checks together. - if (insn.sv_check_reg(true, 16)) { - fprintf(stderr, "reg %s %x rd %ld rs1 %ld rs2 %ld\n", - xstr(INSN), INSNCODE, s_insn.rd(), s_insn.rs1(), s_insn.rs2()); + fprintf(stderr, "reg %s %x rd %ld rs1 %ld rs2 %ld vlen %d\n", + xstr(INSN), INSNCODE, s_insn.rd(), s_insn.rs1(), s_insn.rs2(), + vlen); } // if vectorop is set, one of the regs is not a scalar, // so we must read the VL CSR and do a loop - if (vectorop) + if (vlen == 0) { - vlen = p->get_state()->vl; - fprintf(stderr, "vectorop %x vlen %d\n", INSNCODE, vlen); + vlen = 1; // minimum of one loop } for (int voffs=0; voffs < vlen; voffs++) { + insn.reset_vloop_check(); #include INCLUDEFILE + if (vlen > 1) + { + fprintf(stderr, "reg %s %x vloop %d vlen %d stop %d\n", + xstr(INSN), INSNCODE, voffs, vlen, insn.stop_vloop()); + } insn.reset_caches(); // ready to increment offsets in next iteration + if (insn.stop_vloop()) + { + break; + } } #else insn_t insn(bits); diff --git a/riscv/sv.cc b/riscv/sv.cc index ba7f691..b180b59 100644 --- a/riscv/sv.cc +++ b/riscv/sv.cc @@ -78,6 +78,7 @@ uint64_t sv_insn_t::remap(uint64_t reg, bool intreg, int &voffs) // is not being "redirected", so just return the actual reg. if (!r->active) { + vloop_continue = false; return reg; // not active: return as-is } @@ -91,6 +92,7 @@ uint64_t sv_insn_t::remap(uint64_t reg, bool intreg, int &voffs) // we return the re-mapped register... if (!r->isvec) // scalar { + vloop_continue = false; return reg; // ... remapped at this point... } diff --git a/riscv/sv_decode.h b/riscv/sv_decode.h index 3433bda..f2f344c 100644 --- a/riscv/sv_decode.h +++ b/riscv/sv_decode.h @@ -21,7 +21,7 @@ class sv_insn_t: public insn_t { public: sv_insn_t(processor_t *pr, insn_bits_t bits, unsigned int f) : - insn_t(bits), p(pr), fimap(f), + insn_t(bits), p(pr), vloop_continue(true), fimap(f), cached_rd(0xff), cached_rs1(0xff), cached_rs2(0xff), cached_rs3(0xff), offs_rd(0), offs_rs1(0), @@ -60,8 +60,12 @@ public: sv_pred_entry* get_predentry(uint64_t reg, bool isint); reg_t predicate(uint64_t reg, bool isint, bool &zeroing); + void reset_vloop_check(void) { vloop_continue = true; } + bool stop_vloop(void) { return !vloop_continue; } + private: processor_t *p; + bool vloop_continue; unsigned int fimap; uint64_t cached_rd; uint64_t cached_rs1; @@ -84,6 +88,10 @@ private: { cached = remap(reg, isint, offs); } + else if (!sv_check_reg(isint, reg)) + { + vloop_continue = false; + } return cached; } }; -- 2.30.2