From 7f457c47b339cc7c79f56bb277ed8ed989e88ae1 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 17 Dec 2013 10:18:47 -0800 Subject: [PATCH] Speed things up quite a bit --- hwacha/decode_hwacha.h | 2 +- riscv/decode.h | 22 ++++++------ riscv/memtracer.h | 1 + riscv/mmu.cc | 9 ++--- riscv/mmu.h | 71 ++++++++++++++++++++++----------------- riscv/processor.cc | 76 +++++++++++++++++++++++++++--------------- riscv/processor.h | 16 +++++---- 7 files changed, 118 insertions(+), 79 deletions(-) diff --git a/hwacha/decode_hwacha.h b/hwacha/decode_hwacha.h index b7069fa..fa94b72 100644 --- a/hwacha/decode_hwacha.h +++ b/hwacha/decode_hwacha.h @@ -25,7 +25,7 @@ #define INSN_RS2 (insn.rs2()) #define INSN_RS3 (insn.rs3()) #define INSN_RD (insn.rd()) -#define INSN_SEG ((insn.i_imm() >> 9)+1) +#define INSN_SEG (((reg_t)insn.i_imm() >> 9)+1) static inline reg_t read_xpr(hwacha_t* h, insn_t insn, uint32_t idx, size_t src) { diff --git a/riscv/decode.h b/riscv/decode.h index 8e506e2..6c26a68 100644 --- a/riscv/decode.h +++ b/riscv/decode.h @@ -52,21 +52,22 @@ class insn_t { public: uint32_t bits() { return b; } - reg_t i_imm() { return int64_t(int32_t(b) >> 20); } - reg_t s_imm() { return x(7, 5) | (x(25, 7) << 5) | (imm_sign() << 12); } - reg_t sb_imm() { return (x(8, 4) << 1) | (x(25,6) << 5) | (x(7,1) << 11) | (imm_sign() << 12); } - reg_t u_imm() { return int64_t(int32_t(b) >> 12 << 12); } - reg_t uj_imm() { return (x(21, 10) << 1) | (x(20, 1) << 11) | (x(12, 8) << 12) | (imm_sign() << 20); } + int32_t i_imm() { return int32_t(b) >> 20; } + int32_t s_imm() { return x(7, 5) + (xs(25, 7) << 5); } + int32_t sb_imm() { return (x(8, 4) << 1) + (x(25,6) << 5) + (x(7,1) << 11) + (imm_sign() << 12); } + int32_t u_imm() { return int32_t(b) >> 12 << 12; } + int32_t uj_imm() { return (x(21, 10) << 1) + (x(20, 1) << 11) + (x(12, 8) << 12) + (imm_sign() << 20); } uint32_t rd() { return x(7, 5); } uint32_t rs1() { return x(15, 5); } uint32_t rs2() { return x(20, 5); } uint32_t rs3() { return x(27, 5); } uint32_t rm() { return x(12, 3); } - reg_t csr() { return x(20, 12); } + uint32_t csr() { return x(20, 12); } private: uint32_t b; - reg_t x(int lo, int len) { return b << (32-lo-len) >> (32-len); } - reg_t imm_sign() { return int64_t(int32_t(b) >> 31); } + uint32_t x(int lo, int len) { return b << (32-lo-len) >> (32-len); } + uint32_t xs(int lo, int len) { return int32_t(b) << (32-lo-len) >> (32-len); } + uint32_t imm_sign() { return xs(31, 1); } }; template @@ -79,12 +80,11 @@ public: } void write(size_t i, T value) { - data[i] = value; + if (!(zero_reg && i == 0)) + data[i] = value; } const T& operator [] (size_t i) const { - if (zero_reg) - const_cast(data[0]) = 0; return data[i]; } private: diff --git a/riscv/memtracer.h b/riscv/memtracer.h index 127a641..e223c43 100644 --- a/riscv/memtracer.h +++ b/riscv/memtracer.h @@ -20,6 +20,7 @@ class memtracer_t class memtracer_list_t : public memtracer_t { public: + bool empty() { return list.empty(); } bool interested_in_range(uint64_t begin, uint64_t end, bool store, bool fetch) { for (std::vector::iterator it = list.begin(); it != list.end(); ++it) diff --git a/riscv/mmu.cc b/riscv/mmu.cc index 96884d6..f8efd5a 100644 --- a/riscv/mmu.cc +++ b/riscv/mmu.cc @@ -16,7 +16,8 @@ mmu_t::~mmu_t() void mmu_t::flush_icache() { - memset(icache_tag, -1, sizeof(icache_tag)); + for (size_t i = 0; i < ICACHE_ENTRIES; i++) + icache[i].tag = -1; } void mmu_t::flush_tlb() @@ -28,7 +29,7 @@ void mmu_t::flush_tlb() flush_icache(); } -reg_t mmu_t::refill_tlb(reg_t addr, reg_t bytes, bool store, bool fetch) +void* mmu_t::refill_tlb(reg_t addr, reg_t bytes, bool store, bool fetch) { reg_t idx = (addr >> PGSHIFT) % TLB_ENTRIES; reg_t expected_tag = addr & ~(PGSIZE-1); @@ -62,10 +63,10 @@ reg_t mmu_t::refill_tlb(reg_t addr, reg_t bytes, bool store, bool fetch) tlb_load_tag[idx] = (pte_perm & PTE_UR) ? expected_tag : -1; tlb_store_tag[idx] = (pte_perm & PTE_UW) ? expected_tag : -1; tlb_insn_tag[idx] = (pte_perm & PTE_UX) ? expected_tag : -1; - tlb_data[idx] = pgbase; + tlb_data[idx] = mem + pgbase - (addr & ~(PGSIZE-1)); } - return paddr; + return mem + paddr; } pte_t mmu_t::walk(reg_t addr) diff --git a/riscv/mmu.h b/riscv/mmu.h index 227d5c7..551fa46 100644 --- a/riscv/mmu.h +++ b/riscv/mmu.h @@ -31,11 +31,11 @@ public: // template for functions that load an aligned value from memory #define load_func(type) \ - type##_t load_##type(reg_t addr) { \ + type##_t load_##type(reg_t addr) __attribute__((always_inline)) { \ if(unlikely(addr % sizeof(type##_t))) \ throw trap_load_address_misaligned(addr); \ - reg_t paddr = translate(addr, sizeof(type##_t), false, false); \ - return *(type##_t*)(mem + paddr); \ + void* paddr = translate(addr, sizeof(type##_t), false, false); \ + return *(type##_t*)paddr; \ } // load value from memory at aligned address; zero extend to register width @@ -55,8 +55,8 @@ public: void store_##type(reg_t addr, type##_t val) { \ if(unlikely(addr % sizeof(type##_t))) \ throw trap_store_address_misaligned(addr); \ - reg_t paddr = translate(addr, sizeof(type##_t), true, false); \ - *(type##_t*)(mem + paddr) = val; \ + void* paddr = translate(addr, sizeof(type##_t), true, false); \ + *(type##_t*)paddr = val; \ } // store value to memory at aligned address @@ -77,25 +77,28 @@ public: // load instruction from memory at aligned address. inline insn_fetch_t load_insn(reg_t addr) { - reg_t idx = (addr/sizeof(insn_t)) % ICACHE_ENTRIES; - if (unlikely(icache_tag[idx] != addr)) + reg_t offset = addr & (sizeof(insn_t) * (ICACHE_ENTRIES-1)); + offset *= sizeof(icache_entry_t) / sizeof(insn_t); + icache_entry_t* entry = (icache_entry_t*)((char*)icache + offset); + insn_fetch_t data = entry->data; + if (likely(entry->tag == addr)) + return data; + + void* iaddr = translate(addr, sizeof(insn_t), false, true); + insn_fetch_t fetch; + fetch.insn.pad = *(decltype(fetch.insn.insn.bits())*)iaddr; + fetch.func = proc->decode_insn(fetch.insn.insn); + + entry->tag = addr; + entry->data = fetch; + + reg_t paddr = (char*)iaddr - mem; + if (!tracer.empty() && tracer.interested_in_range(paddr, paddr + sizeof(insn_t), false, true)) { - reg_t paddr = translate(addr, sizeof(insn_t), false, true); - insn_fetch_t fetch; - fetch.insn.insn = *(insn_t*)(mem + paddr); - fetch.func = proc->decode_insn(fetch.insn.insn); - - reg_t idx = (paddr/sizeof(insn_t)) % ICACHE_ENTRIES; - icache_tag[idx] = addr; - icache_data[idx] = fetch; - - if (tracer.interested_in_range(paddr, paddr + sizeof(insn_t), false, true)) - { - icache_tag[idx] = -1; - tracer.trace(paddr, sizeof(insn_t), false, true); - } + entry->tag = -1; + tracer.trace(paddr, sizeof(insn_t), false, true); } - return icache_data[idx]; + return entry->data; } void set_processor(processor_t* p) { proc = p; flush_tlb(); } @@ -112,32 +115,38 @@ private: memtracer_list_t tracer; // implement an instruction cache for simulator performance - static const reg_t ICACHE_ENTRIES = 256; - insn_fetch_t icache_data[ICACHE_ENTRIES]; + static const reg_t ICACHE_ENTRIES = 2048; + struct icache_entry_t { + reg_t tag; + reg_t pad; + insn_fetch_t data; + }; + icache_entry_t icache[ICACHE_ENTRIES]; // implement a TLB for simulator performance static const reg_t TLB_ENTRIES = 256; - reg_t tlb_data[TLB_ENTRIES]; + char* tlb_data[TLB_ENTRIES]; reg_t tlb_insn_tag[TLB_ENTRIES]; reg_t tlb_load_tag[TLB_ENTRIES]; reg_t tlb_store_tag[TLB_ENTRIES]; - reg_t icache_tag[ICACHE_ENTRIES]; // finish translation on a TLB miss and upate the TLB - reg_t refill_tlb(reg_t addr, reg_t bytes, bool store, bool fetch); + void* refill_tlb(reg_t addr, reg_t bytes, bool store, bool fetch); // perform a page table walk for a given virtual address pte_t walk(reg_t addr); // translate a virtual address to a physical address - reg_t translate(reg_t addr, reg_t bytes, bool store, bool fetch) + void* translate(reg_t addr, reg_t bytes, bool store, bool fetch) + __attribute__((always_inline)) { reg_t idx = (addr >> PGSHIFT) % TLB_ENTRIES; + reg_t expected_tag = addr & ~(PGSIZE-1); reg_t* tlb_tag = fetch ? tlb_insn_tag : store ? tlb_store_tag :tlb_load_tag; - reg_t expected_tag = addr & ~(PGSIZE-1); - if(likely(tlb_tag[idx] == expected_tag)) - return ((uintptr_t)addr & (PGSIZE-1)) + tlb_data[idx]; + void* data = tlb_data[idx] + addr; + if (likely(tlb_tag[idx] == expected_tag)) + return data; return refill_tlb(addr, bytes, store, fetch); } diff --git a/riscv/processor.cc b/riscv/processor.cc index 5e2910f..b12a8e0 100644 --- a/riscv/processor.cc +++ b/riscv/processor.cc @@ -13,10 +13,11 @@ #include #include #include +#include processor_t::processor_t(sim_t* _sim, mmu_t* _mmu, uint32_t _id) - : sim(_sim), mmu(_mmu), ext(NULL), id(_id), run(false), debug(false), - opcode_bits(0) + : sim(_sim), mmu(_mmu), ext(NULL), disassembler(new disassembler_t), + id(_id), run(false), debug(false) { reset(true); mmu->set_processor(this); @@ -24,6 +25,7 @@ processor_t::processor_t(sim_t* _sim, mmu_t* _mmu, uint32_t _id) #define DECLARE_INSN(name, match, mask) REGISTER_INSN(this, name, match, mask) #include "encoding.h" #undef DECLARE_INSN + build_opcode_map(); } processor_t::~processor_t() @@ -35,10 +37,7 @@ void state_t::reset() // the ISA guarantees on boot that the PC is 0x2000 and the the processor // is in supervisor mode, and in 64-bit mode, if supported, with traps // and virtual memory disabled. - sr = SR_S; -#ifdef RISCV_ENABLE_64BIT - sr |= SR_S64; -#endif + sr = SR_S | SR_S64; pc = 0x2000; // the following state is undefined upon boot-up, @@ -74,6 +73,8 @@ void processor_t::reset(bool value) run = !value; state.reset(); // reset the core + set_pcr(CSR_STATUS, state.sr); + if (ext) ext->reset(); // reset the extension } @@ -185,7 +186,7 @@ void processor_t::disasm(insn_t insn) { // the disassembler is stateless, so we share it fprintf(stderr, "core %3d: 0x%016" PRIx64 " (0x%08" PRIx32 ") %s\n", - id, state.pc, insn.bits(), disassembler.disassemble(insn).c_str()); + id, state.pc, insn.bits(), disassembler->disassemble(insn).c_str()); } reg_t processor_t::set_pcr(int which, reg_t val) @@ -215,6 +216,7 @@ reg_t processor_t::set_pcr(int which, reg_t val) if (!ext) state.sr &= ~SR_EA; state.sr &= ~SR_ZERO; + rv64 = (state.sr & SR_S) ? (state.sr & SR_S64) : (state.sr & SR_U64); mmu->flush_tlb(); break; case CSR_EPC: @@ -328,42 +330,64 @@ reg_t illegal_instruction(processor_t* p, insn_t insn, reg_t pc) insn_func_t processor_t::decode_insn(insn_t insn) { - bool rv64 = (state.sr & SR_S) ? (state.sr & SR_S64) : (state.sr & SR_U64); + size_t mask = opcode_map.size()-1; + insn_desc_t* desc = opcode_map[insn.bits() & mask]; - auto key = insn.bits() & ((1L << opcode_bits)-1); - for (auto it = opcode_map.find(key); it != opcode_map.end() && it->first == key; ++it) - if ((insn.bits() & it->second.mask) == it->second.match) - return rv64 ? it->second.rv64 : it->second.rv32; + while ((insn.bits() & desc->mask) != desc->match) + desc++; - return &illegal_instruction; + return rv64 ? desc->rv64 : desc->rv32; } void processor_t::register_insn(insn_desc_t desc) { assert(desc.mask & 1); - if (opcode_bits == 0 || (desc.mask & ((1L << opcode_bits)-1)) != ((1L << opcode_bits)-1)) + instructions.push_back(desc); +} + +void processor_t::build_opcode_map() +{ + size_t buckets = -1; + for (auto& inst : instructions) + while ((inst.mask & buckets) != buckets) + buckets /= 2; + buckets++; + + struct cmp { + decltype(insn_desc_t::match) mask; + cmp(decltype(mask) mask) : mask(mask) {} + bool operator()(const insn_desc_t& lhs, const insn_desc_t& rhs) { + if ((lhs.match & mask) != (rhs.match & mask)) + return (lhs.match & mask) < (rhs.match & mask); + return lhs.match < rhs.match; + } + }; + std::sort(instructions.begin(), instructions.end(), cmp(buckets-1)); + + opcode_map.resize(buckets); + opcode_store.resize(instructions.size() + 1); + + size_t j = 0; + for (size_t b = 0, i = 0; b < buckets; b++) { - unsigned x = 0; - while ((desc.mask & ((1L << (x+1))-1)) == ((1L << (x+1))-1) && - (opcode_bits == 0 || x <= opcode_bits)) - x++; - opcode_bits = x; - - decltype(opcode_map) new_map; - for (auto it = opcode_map.begin(); it != opcode_map.end(); ++it) - new_map.insert(std::make_pair(it->second.match & ((1L<second)); - opcode_map = new_map; + opcode_map[b] = &opcode_store[j]; + while (i < instructions.size() && b == (instructions[i].match & (buckets-1))) + opcode_store[j++] = instructions[i++]; } - opcode_map.insert(std::make_pair(desc.match & ((1L<get_instructions()) register_insn(insn); + build_opcode_map(); for (auto disasm_insn : x->get_disasms()) - disassembler.add_insn(disasm_insn); + disassembler->add_insn(disasm_insn); if (ext != NULL) throw std::logic_error("only one extension may be registered"); ext = x; diff --git a/riscv/processor.h b/riscv/processor.h index f53b269..e27aa82 100644 --- a/riscv/processor.h +++ b/riscv/processor.h @@ -3,10 +3,10 @@ #define _RISCV_PROCESSOR_H #include "decode.h" -#include "disasm.h" -#include #include "config.h" -#include +#include +#include +#include class processor_t; class mmu_t; @@ -14,6 +14,7 @@ typedef reg_t (*insn_func_t)(processor_t*, insn_t, reg_t); class sim_t; class trap_t; class extension_t; +class disassembler_t; struct insn_desc_t { @@ -78,14 +79,16 @@ private: sim_t* sim; mmu_t* mmu; // main memory is always accessed via the mmu extension_t* ext; - disassembler_t disassembler; + std::unique_ptr disassembler; state_t state; uint32_t id; bool run; // !reset bool debug; + bool rv64; - unsigned opcode_bits; - std::multimap opcode_map; + std::vector instructions; + std::vector opcode_map; + std::vector opcode_store; void take_interrupt(); // take a trap if any interrupts are pending void take_trap(reg_t pc, trap_t& t); // take an exception @@ -96,6 +99,7 @@ private: friend class extension_t; friend class htif_isasim_t; + void build_opcode_map(); insn_func_t decode_insn(insn_t insn); }; -- 2.30.2