demand(p.data_size % APP_DATA_ALIGN == 0, "misaligned data");
demand(p.data_size <= APP_MAX_DATA_SIZE, "long read data");
demand(p.addr <= sim->memsz && p.addr+p.data_size <= sim->memsz, "out of bounds: 0x%llx",(unsigned long long)p.addr);
- memcpy(ackpacket.data,sim->mem+p.addr,p.data_size);
ackpacket.data_size = p.data_size;
+
+ static_assert(APP_DATA_ALIGN >= sizeof(uint64_t))
+ for(size_t i = 0; i < p.data_size/8; i++)
+ ((uint64_t*)ackpacket.data)[i] = sim->mmu->load_uint64(p.addr+i*8);
break;
case APP_CMD_WRITE_MEM:
demand(p.addr % APP_DATA_ALIGN == 0, "misaligned address");
demand(p.data_size % APP_DATA_ALIGN == 0, "misaligned data");
demand(p.data_size <= bytes - offsetof(packet,data), "short packet");
demand(p.addr <= sim->memsz && p.addr+p.data_size <= sim->memsz, "out of bounds: 0x%llx",(unsigned long long)p.addr);
- memcpy(sim->mem+p.addr,p.data,p.data_size);
+
+ for(size_t i = 0; i < p.data_size/8; i++)
+ sim->mmu->store_uint64(p.addr+i*8, ((uint64_t*)p.data)[i]);
break;
case APP_CMD_READ_CONTROL_REG:
demand(p.addr == 16,"bad control reg");
break;
case 8:
- val = MEMSIZE >> PGSHIFT;
+ val = mmu.memsz >> PGSHIFT;
break;
case 9:
#include "mmu.h"
+#include "sim.h"
+#include "processor.h"
+
+mmu_t::mmu_t(char* _mem, size_t _memsz)
+ : mem(_mem), memsz(_memsz), badvaddr(0),
+ ptbr(0), supervisor(true), vm_enabled(false),
+ icsim(NULL), dcsim(NULL), itlbsim(NULL), dtlbsim(NULL)
+{
+ flush_tlb();
+}
+
+mmu_t::~mmu_t()
+{
+}
void mmu_t::flush_tlb()
{
- memset(tlb_data, 0, sizeof(tlb_data)); // TLB entry itself has valid bit
+ memset(tlb_insn_tag, -1, sizeof(tlb_insn_tag));
+ memset(tlb_load_tag, -1, sizeof(tlb_load_tag));
+ memset(tlb_store_tag, -1, sizeof(tlb_store_tag));
flush_icache();
}
void mmu_t::flush_icache()
{
- memset(icache_tag, 0, sizeof(icache_tag)); // I$ tag contains valid bit
+ memset(icache_tag, -1, sizeof(icache_tag));
+}
+
+reg_t mmu_t::refill(reg_t addr, bool store, bool fetch)
+{
+ reg_t idx = (addr >> PGSHIFT) % TLB_ENTRIES;
+ reg_t expected_tag = addr & ~(PGSIZE-1);
+
+ reg_t pte = walk(addr);
+
+ reg_t pte_perm = pte & PTE_PERM;
+ if(supervisor) // shift supervisor permission bits into user perm bits
+ pte_perm = (pte_perm >> 3) & PTE_PERM;
+ pte_perm |= pte & PTE_E;
+
+ reg_t perm = (fetch ? PTE_UX : store ? PTE_UW : PTE_UR) | PTE_E;
+ if(unlikely((pte_perm & perm) != perm))
+ {
+ badvaddr = addr;
+ throw store ? trap_store_access_fault
+ : fetch ? trap_instruction_access_fault
+ : trap_load_access_fault;
+ }
+
+ tlb_load_tag[idx] = (pte_perm & PTE_UR) ? expected_tag : -1;
+ tlb_store_tag[idx] = (pte_perm & PTE_UW) ? expected_tag : -1;
+ tlb_insn_tag[idx] = (pte_perm & PTE_UX) ? expected_tag : -1;
+ tlb_data[idx] = pte >> PTE_PPN_SHIFT << PGSHIFT;
+
+ return (addr & (PGSIZE-1)) | tlb_data[idx];
+}
+
+pte_t mmu_t::walk(reg_t addr)
+{
+ pte_t pte = 0;
+
+ if(!vm_enabled)
+ {
+ if(addr < memsz)
+ pte = PTE_E | PTE_PERM | ((addr >> PGSHIFT) << PTE_PPN_SHIFT);
+ }
+ else
+ {
+ reg_t base = ptbr;
+ reg_t ptd;
+
+ int ptshift = (LEVELS-1)*PTIDXBITS;
+ for(reg_t i = 0; i < LEVELS; i++, ptshift -= PTIDXBITS)
+ {
+ reg_t idx = (addr >> (PGSHIFT+ptshift)) & ((1<<PTIDXBITS)-1);
+
+ reg_t pte_addr = base + idx*sizeof(pte_t);
+ if(pte_addr >= memsz)
+ break;
+
+ ptd = *(pte_t*)(mem+pte_addr);
+ if(ptd & PTE_E)
+ {
+ // if this PTE is from a larger PT, fake a leaf
+ // PTE so the TLB will work right
+ reg_t vpn = addr >> PGSHIFT;
+ ptd |= (vpn & ((1<<(ptshift))-1)) << PTE_PPN_SHIFT;
+
+ // fault if physical addr is invalid
+ reg_t ppn = ptd >> PTE_PPN_SHIFT;
+ if((ppn << PGSHIFT) + (addr & (PGSIZE-1)) < memsz)
+ pte = ptd;
+ break;
+ }
+ else if(!(ptd & PTE_T))
+ break;
+
+ base = (ptd >> PTE_PPN_SHIFT) << PGSHIFT;
+ }
+ }
+
+ return pte;
}
+#ifndef _RISCV_MMU_H
+#define _RISCV_MMU_H
+
#include "decode.h"
#include "trap.h"
#include "icsim.h"
#include "common.h"
-#include <assert.h>
class processor_t;
class mmu_t
{
public:
- mmu_t(char* _mem, size_t _memsz)
- : mem(_mem), memsz(_memsz), badvaddr(0),
- ptbr(0), supervisor(true), vm_enabled(false),
- icsim(NULL), dcsim(NULL), itlbsim(NULL), dtlbsim(NULL)
- {
- }
+ mmu_t(char* _mem, size_t _memsz);
+ ~mmu_t();
#ifdef RISCV_ENABLE_ICSIM
# define dcsim_tick(dcsim, dtlbsim, addr, size, st) \
{
insn_t insn;
- reg_t idx = (addr/sizeof(insn_t)) % ICACHE_ENTRIES;
- bool hit = addr % 4 == 0 && icache_tag[idx] == (addr | 1);
- if(likely(hit))
- return icache_data[idx];
-
#ifdef RISCV_ENABLE_RVC
- if(addr % 4 == 2 && rvc)
+ if(addr % 4 == 2 && rvc) // fetch across word boundary
{
reg_t paddr_lo = translate(addr, false, true);
insn.bits = *(uint16_t*)(mem+paddr_lo);
else
#endif
{
- if(unlikely(addr % 4))
- {
- badvaddr = addr;
- throw trap_instruction_address_misaligned;
- }
+ reg_t idx = (addr/sizeof(insn_t)) % ICACHE_ENTRIES;
+ bool hit = icache_tag[idx] == addr;
+ if(likely(hit))
+ return icache_data[idx];
+
+ // the processor guarantees alignment based upon rvc mode
reg_t paddr = translate(addr, false, true);
insn = *(insn_t*)(mem+paddr);
- icache_tag[idx] = addr | 1;
+ icache_tag[idx] = addr;
icache_data[idx] = insn;
}
static const reg_t TLB_ENTRIES = 256;
pte_t tlb_data[TLB_ENTRIES];
- reg_t tlb_tag[TLB_ENTRIES];
+ reg_t tlb_insn_tag[TLB_ENTRIES];
+ reg_t tlb_load_tag[TLB_ENTRIES];
+ reg_t tlb_store_tag[TLB_ENTRIES];
static const reg_t ICACHE_ENTRIES = 256;
insn_t icache_data[ICACHE_ENTRIES];
icsim_t* itlbsim;
icsim_t* dtlbsim;
+ reg_t refill(reg_t addr, bool store, bool fetch);
+ pte_t walk(reg_t addr);
+
reg_t translate(reg_t addr, bool store, bool fetch)
{
reg_t idx = (addr >> PGSHIFT) % TLB_ENTRIES;
- pte_t pte = tlb_data[idx];
- reg_t tag = tlb_tag[idx];
-
- trap_t trap = store ? trap_store_access_fault
- : fetch ? trap_instruction_access_fault
- : trap_load_access_fault;
-
- bool hit = (pte & PTE_E) && tag == (addr >> PGSHIFT);
- if(unlikely(!hit))
- {
- pte = walk(addr);
- if(!(pte & PTE_E))
- {
- badvaddr = addr;
- throw trap;
- }
-
- tlb_data[idx] = pte;
- tlb_tag[idx] = addr >> PGSHIFT;
- }
-
- reg_t access_type = store ? PTE_UW : fetch ? PTE_UX : PTE_UR;
- if(supervisor)
- access_type <<= 3;
- if(unlikely(!(access_type & pte & PTE_PERM)))
- {
- badvaddr = addr;
- throw trap;
- }
- return (addr & (PGSIZE-1)) | ((pte >> PTE_PPN_SHIFT) << PGSHIFT);
- }
+ reg_t* tlb_tag = fetch ? tlb_insn_tag : store ? tlb_store_tag :tlb_load_tag;
+ reg_t expected_tag = addr & ~(PGSIZE-1);
+ if(likely(tlb_tag[idx] == expected_tag))
+ return (addr & (PGSIZE-1)) | tlb_data[idx];
- pte_t walk(reg_t addr)
- {
- pte_t pte = 0;
-
- if(!vm_enabled)
- {
- if(addr < memsz)
- pte = PTE_E | PTE_PERM | ((addr >> PGSHIFT) << PTE_PPN_SHIFT);
- }
- else
- {
- reg_t base = ptbr;
- reg_t ptd;
-
- int ptshift = (LEVELS-1)*PTIDXBITS;
- for(reg_t i = 0; i < LEVELS; i++, ptshift -= PTIDXBITS)
- {
- reg_t idx = (addr >> (PGSHIFT+ptshift)) & ((1<<PTIDXBITS)-1);
-
- reg_t pte_addr = base + idx*sizeof(pte_t);
- if(pte_addr >= memsz)
- break;
-
- ptd = *(pte_t*)(mem+pte_addr);
- if(ptd & PTE_E)
- {
- // if this PTE is from a larger PT, fake a leaf
- // PTE so the TLB will work right
- reg_t vpn = addr >> PGSHIFT;
- ptd |= (vpn & ((1<<(ptshift))-1)) << PTE_PPN_SHIFT;
-
- // fault if physical addr is invalid
- reg_t ppn = ptd >> PTE_PPN_SHIFT;
- if((ppn << PGSHIFT) + (addr & (PGSIZE-1)) < memsz)
- pte = ptd;
- break;
- }
- else if(!(ptd & PTE_T))
- break;
-
- base = (ptd >> PTE_PPN_SHIFT) << PGSHIFT;
- }
- }
-
- return pte;
+ return refill(addr, store, fetch);
}
friend class processor_t;
};
+
+#endif
#include "sim.h"
#include "icsim.h"
-processor_t::processor_t(sim_t* _sim, char* _mem, size_t _memsz)
- : sim(_sim), mmu(_mem,_memsz)
+processor_t::processor_t(sim_t* _sim, mmu_t* _mmu)
+ : sim(_sim), mmu(*_mmu)
{
initialize_dispatch_table();
// a few assumptions about endianness, including freg_t union
for (int i=0; i<MAX_UTS; i++)
{
- uts[i] = new processor_t(sim, mmu.mem, mmu.memsz);
+ uts[i] = new processor_t(sim, &mmu);
uts[i]->id = id;
uts[i]->set_sr(uts[i]->sr | SR_EF);
uts[i]->set_sr(uts[i]->sr | SR_EV);
class processor_t
{
public:
- processor_t(sim_t* _sim, char* _mem, size_t _memsz);
+ processor_t(sim_t* _sim, mmu_t* _mmu);
~processor_t();
void init(uint32_t _id, icsim_t* defualt_icache, icsim_t* default_dcache);
void step(size_t n, bool noisy);
int xprlen;
// shared memory
- mmu_t mmu;
+ mmu_t& mmu;
// counters
reg_t cycle;
appserver_link_t applink(tohost_fd, fromhost_fd);
- sim_t s(nprocs, MEMSIZE, &applink, &icache, &dcache);
+ sim_t s(nprocs, &applink, &icache, &dcache);
try
{
s.run(debug);
#include <map>
#include <iostream>
#include <climits>
+#include <assert.h>
-sim_t::sim_t(int _nprocs, size_t _memsz, appserver_link_t* _applink, icsim_t* default_icache, icsim_t* default_dcache)
+sim_t::sim_t(int _nprocs, appserver_link_t* _applink, icsim_t* default_icache, icsim_t* default_dcache)
: applink(_applink),
- memsz(_memsz),
- mem((char*)mmap64(NULL, memsz, PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0)),
- procs(std::vector<processor_t>(_nprocs,processor_t(this,mem,memsz)))
+ procs(_nprocs)
{
- demand(mem != MAP_FAILED, "couldn't allocate target machine's memory");
+ size_t memsz0 = sizeof(size_t) == 8 ? 0x100000000ULL : 0x70000000UL;
+ size_t quantum = std::max(PGSIZE, (reg_t)sysconf(_SC_PAGESIZE));
+ memsz0 = memsz0/quantum*quantum;
- for(int i = 0; i < (int)num_cores(); i++)
- procs[i].init(i, default_icache, default_dcache);
+ memsz = memsz0;
+ mem = (char*)mmap64(NULL, memsz, PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
+
+ if(mem == MAP_FAILED)
+ {
+ while(mem == MAP_FAILED && (memsz = memsz*10/11/quantum*quantum))
+ mem = (char*)mmap64(NULL, memsz, PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
+ assert(mem != MAP_FAILED);
+ fprintf(stderr, "warning: only got %lu bytes of target mem (wanted %lu)\n",
+ (unsigned long)memsz, (unsigned long)memsz0);
+ }
+
+ mmu = new mmu_t(mem, memsz);
+
+ for(size_t i = 0; i < num_cores(); i++)
+ {
+ procs[i] = new processor_t(this, new mmu_t(mem, memsz));
+ procs[i]->init(i, default_icache, default_dcache);
+ }
applink->init(this);
}
sim_t::~sim_t()
{
+ for(size_t i = 0; i < num_cores(); i++)
+ {
+ mmu_t* pmmu = &procs[i]->mmu;
+ delete procs[i];
+ delete pmmu;
+ }
+ delete mmu;
+ munmap(mem, memsz);
}
void sim_t::set_tohost(reg_t val)
void sim_t::send_ipi(reg_t who)
{
if(who < num_cores())
- procs[who].deliver_ipi();
+ procs[who]->deliver_ipi();
}
void sim_t::run(bool debug)
{
for(size_t j = 0; j < n; j+=interleave)
for(int i = 0; i < (int)num_cores(); i++)
- procs[i].step(interleave,noisy);
+ procs[i]->step(interleave,noisy);
}
void sim_t::interactive_run_noisy(const std::string& cmd, const std::vector<std::string>& args)
return;
if(a.size() == 2)
- procs[p].step(atoi(a[1].c_str()),noisy);
+ procs[p]->step(atoi(a[1].c_str()),noisy);
else
- while(1) procs[p].step(1,noisy);
+ while(1) procs[p]->step(1,noisy);
}
void sim_t::interactive_quit(const std::string& cmd, const std::vector<std::string>& args)
if(p >= (int)num_cores())
throw trap_illegal_instruction;
- return procs[p].pc;
+ return procs[p]->pc;
}
reg_t sim_t::get_reg(const std::vector<std::string>& args)
if(p >= (int)num_cores() || r >= NXPR)
throw trap_illegal_instruction;
- return procs[p].XPR[r];
+ return procs[p]->XPR[r];
}
reg_t sim_t::get_freg(const std::vector<std::string>& args)
if(p >= (int)num_cores() || r >= NFPR)
throw trap_illegal_instruction;
- return procs[p].FPR[r];
+ return procs[p]->FPR[r];
}
reg_t sim_t::get_tohost(const std::vector<std::string>& args)
if(p >= (int)num_cores())
throw trap_illegal_instruction;
- return procs[p].tohost;
+ return procs[p]->tohost;
}
void sim_t::interactive_reg(const std::string& cmd, const std::vector<std::string>& args)
throw trap_illegal_instruction;
std::string addr_str = args[0];
- mmu_t mmu(mem, memsz);
- mmu.set_supervisor(true);
if(args.size() == 2)
{
int p = atoi(args[0].c_str());
if(p >= (int)num_cores())
throw trap_illegal_instruction;
- mmu.set_vm_enabled(!!(procs[p].sr & SR_VM));
- mmu.set_ptbr(procs[p].mmu.get_ptbr());
+ mmu->set_vm_enabled(!!(procs[p]->sr & SR_VM));
+ mmu->set_ptbr(procs[p]->mmu.get_ptbr());
addr_str = args[1];
}
switch(addr % 8)
{
case 0:
- val = mmu.load_uint64(addr);
+ val = mmu->load_uint64(addr);
break;
case 4:
- val = mmu.load_uint32(addr);
+ val = mmu->load_uint32(addr);
break;
case 2:
case 6:
- val = mmu.load_uint16(addr);
+ val = mmu->load_uint16(addr);
break;
default:
- val = mmu.load_uint8(addr);
+ val = mmu->load_uint8(addr);
break;
}
return val;
reg_t addr = strtol(args[0].c_str(),NULL,16);
- mmu_t mmu(mem,memsz);
char ch;
-
- while((ch = mmu.load_uint8(addr++)))
+ while((ch = mmu->load_uint8(addr++)))
putchar(ch);
putchar('\n');
#include <string>
#include "processor.h"
-const long MEMSIZE = 0x100000000;
-
class appserver_link_t;
class sim_t
{
public:
- sim_t(int _nprocs, size_t _memsz, appserver_link_t* _applink, icsim_t* _default_icache, icsim_t* default_dcache);
+ sim_t(int _nprocs, appserver_link_t* _applink, icsim_t* _default_icache, icsim_t* default_dcache);
~sim_t();
void run(bool debug);
size_t memsz;
char* mem;
- std::vector<processor_t> procs;
+ mmu_t* mmu;
+ std::vector<processor_t*> procs;
void step_all(size_t n, size_t interleave, bool noisy);