#define CRD do_writeback(XPR,(insn.bits >> 5) & 0x1f)
#define CIMM6 ((int32_t)((insn.bits >> 10) & 0x3f) << 26 >> 26)
+// vector stuff
+#define VL vl
+
+#define UT_RS1(idx) uts[idx]->XPR[insn.rtype.rs1]
+#define UT_RS2(idx) uts[idx]->XPR[insn.rtype.rs2]
+#define UT_RD(idx) do_writeback(uts[idx]->XPR,insn.rtype.rd)
+#define UT_RA(idx) do_writeback(uts[idx]->XPR,1)
+#define UT_FRS1(idx) uts[idx]->FPR[insn.ftype.rs1]
+#define UT_FRS2(idx) uts[idx]->FPR[insn.ftype.rs2]
+#define UT_FRS3(idx) uts[idx]->FPR[insn.ftype.rs3]
+#define UT_FRD(idx) uts[idx]->FPR[insn.ftype.rd]
+#define UT_RM(idx) ((insn.ftype.rm != 7) ? insn.ftype.rm : \
+ ((uts[idx]->fsr & FSR_RD) >> FSR_RD_SHIFT))
+
+#define UT_LOOP_START for (int i=0;i<VL; i++) {
+#define UT_LOOP_END }
+#define UT_LOOP_RS1 UT_RS1(i)
+#define UT_LOOP_RS2 UT_RS2(i)
+#define UT_LOOP_RD UT_RD(i)
+#define UT_LOOP_RA UT_RA(i)
+#define UT_LOOP_FRS1 UT_FRS1(i)
+#define UT_LOOP_FRS2 UT_FRS2(i)
+#define UT_LOOP_FRS3 UT_FRS3(i)
+#define UT_LOOP_FRD UT_FRD(i)
+#define UT_LOOP_RM UT_RM(i)
+
+#define VEC_LOAD(dst, func, inc) \
+ reg_t addr = RS1; \
+ UT_LOOP_START \
+ UT_LOOP_##dst = mmu.func(addr); \
+ addr += inc; \
+ UT_LOOP_END
+
+#define VEC_STORE(src, func, inc) \
+ reg_t addr = RS1; \
+ UT_LOOP_START \
+ mmu.func(addr, UT_LOOP_##src); \
+ addr += inc; \
+ UT_LOOP_END
+
+enum vt_command_t
+{
+ vt_command_stop,
+};
+
#endif
+require_fp;
+VEC_LOAD(FRD, load_int64, 8);
+require_fp;
+VEC_LOAD(FRD, load_int64, RS2);
+require_fp;
+VEC_LOAD(FRD, load_int32, 4);
+require_fp;
+VEC_LOAD(FRD, load_int32, RS2);
+require_fp;
+demand(0 <= RS2 && RS2 < MAX_UTS, "ut not in range!");
+UT_FRD(RS2) = FRS1;
+require_fp;
+UT_LOOP_START
+ UT_LOOP_FRD = FRS1;
+UT_LOOP_END
+require_fp;
+demand(0 <= RS2 && RS2 < MAX_UTS, "ut not in range");
+FRD = UT_FRS1(RS2);
+require_fp;
+UT_LOOP_START
+ UT_LOOP_FRD = UT_LOOP_FRS1;
+UT_LOOP_END
+require_fp;
+VEC_STORE(FRD, store_uint64, 8);
+require_fp;
+VEC_STORE(FRD, store_uint64, RS2);
+require_fp;
+VEC_STORE(FRD, store_uint32, 4);
+require_fp;
+VEC_STORE(FRD, store_uint32, RS2);
+VEC_LOAD(RD, load_int8, 1);
+VEC_LOAD(RD, load_int8, RS2);
+VEC_LOAD(RD, load_uint8, 1);
+VEC_LOAD(RD, load_uint8, RS2);
+require_xpr64;
+VEC_LOAD(RD, load_int64, 8);
+require_xpr64;
+VEC_LOAD(RD, load_int64, RS2);
+VEC_LOAD(RD, load_int16, 2);
+VEC_LOAD(RD, load_int16, RS2);
+VEC_LOAD(RD, load_uint16, 2);
+VEC_LOAD(RD, load_uint16, RS2);
+VEC_LOAD(RD, load_int32, 4);
+VEC_LOAD(RD, load_int32, RS2);
+VEC_LOAD(RD, load_uint32, 4);
+VEC_LOAD(RD, load_uint32, RS2);
+demand(0 <= RS2 && RS2 < MAX_UTS, "ut not in range!");
+UT_RD(RS2) = RS1;
+UT_LOOP_START
+ UT_LOOP_RD = RS1;
+UT_LOOP_END
+demand(0 <= RS2 && RS2 < MAX_UTS, "ut not in range");
+RD = UT_RS1(RS2);
+UT_LOOP_START
+ UT_LOOP_RD = UT_LOOP_RS1;
+UT_LOOP_END
+VEC_STORE(RD, store_uint8, 1);
+VEC_STORE(RD, store_uint8, RS2);
+require_xpr64;
+VEC_STORE(RD, store_uint64, 8);
+require_xpr64;
+VEC_STORE(RD, store_uint64, RS2);
+VEC_STORE(RD, store_uint16, 2);
+VEC_STORE(RD, store_uint16, RS2);
+utmode = false;
+throw vt_command_stop;
+VEC_STORE(RD, store_uint32, 4);
+VEC_STORE(RD, store_uint32, RS2);
+nxpr_use = SIMM & 0x3f;
+nfpr_use = (SIMM >> 6) & 0x3f;
+vcfg();
+setvl(RS1);
+RD = VL;
+for (int i=0; i<VL; i++)
+{
+ uts[i]->pc = RS1+SIMM;
+ uts[i]->utmode = true;
+ while (uts[i]->utmode)
+ uts[i]->step(n, noisy);
+}
memset(counters,0,sizeof(counters));
+ // vector stuff
+ utidx = -1;
+ vlmax = 8;
+ vl = 0;
+ nxpr_all = 256;
+ nfpr_all = 256;
+ nxpr_use = 0;
+ nfpr_use = 0;
+ for (int i=0; i<MAX_UTS; i++)
+ uts[i] = NULL;
+
// a few assumptions about endianness, including freg_t union
static_assert(BYTE_ORDER == LITTLE_ENDIAN);
static_assert(sizeof(freg_t) == 8);
static_assert(sizeof(uint128_t) == 16 && sizeof(int128_t) == 16);
}
-void processor_t::init(uint32_t _id)
+void processor_t::init(uint32_t _id, char* _mem, size_t _memsz)
{
id = _id;
+
+ for (int i=0; i<MAX_UTS; i++)
+ {
+ uts[i] = new processor_t(sim, _mem, _memsz);
+ uts[i]->set_sr(uts[i]->sr | SR_EF);
+ uts[i]->utidx = i;
+ }
}
void processor_t::set_sr(uint32_t val)
fsr = val & ~FSR_ZERO;
}
+void processor_t::vcfg()
+{
+ if (nxpr_use == 0 && nfpr_use == 0)
+ vlmax = 8;
+ else if (nfpr_use == 0)
+ vlmax = (nxpr_all-1) / (nxpr_use-1);
+ else if (nxpr_use == 0)
+ vlmax = (nfpr_all-1) / (nfpr_use-1);
+ else
+ vlmax = std::min((nxpr_all-1) / (nxpr_use-1), (nfpr_all-1) / (nfpr_use-1));
+
+ vlmax = std::min(vlmax, MAX_UTS);
+}
+
+void processor_t::setvl(int vlapp)
+{
+ vl = std::min(vlmax, vlapp);
+}
+
void processor_t::step(size_t n, bool noisy)
{
size_t i = 0;
i++;
take_trap(t,noisy);
}
+ catch(vt_command_t cmd)
+ {
+ if (cmd == vt_command_stop)
+ return;
+ }
}
void processor_t::take_trap(trap_t t, bool noisy)
#include "trap.h"
#include "mmu.h"
+#define MAX_UTS 32
+
class sim_t;
class processor_t
{
public:
processor_t(sim_t* _sim, char* _mem, size_t _memsz);
- void init(uint32_t _id);
+ void init(uint32_t _id, char* _mem, size_t _memsz);
void step(size_t n, bool noisy);
private:
void take_trap(trap_t t, bool noisy);
void disasm(insn_t insn, reg_t pc);
+ // vector stuff
+ void vcfg();
+ void setvl(int vlapp);
+
+ bool utmode;
+ int utidx;
+ int vlmax;
+ int vl;
+ int nxpr_all;
+ int nfpr_all;
+ int nxpr_use;
+ int nfpr_use;
+ processor_t* uts[MAX_UTS];
+
friend class sim_t;
};
demand(mem != MAP_FAILED, "couldn't allocate target machine's memory");
for(int i = 0; i < (int)procs.size(); i++)
- procs[i].init(i);
+ procs[i].init(i, mem, memsz);
applink->init(this);
}