#include "processor.h"
#include "mmu.h"
-#include "sim.h"
#include <cassert>
-static void commit_log_stash_privilege(state_t* state)
+static void commit_log_stash_privilege(processor_t* p)
{
#ifdef RISCV_ENABLE_COMMITLOG
+ state_t* state = p->get_state();
state->last_inst_priv = state->prv;
+ state->last_inst_xlen = p->get_xlen();
+ state->last_inst_flen = p->get_flen();
#endif
}
+static void commit_log_print_value(int width, uint64_t hi, uint64_t lo)
+{
+ switch (width) {
+ case 16:
+ fprintf(stderr, "0x%04" PRIx16, (uint16_t)lo);
+ break;
+ case 32:
+ fprintf(stderr, "0x%08" PRIx32, (uint32_t)lo);
+ break;
+ case 64:
+ fprintf(stderr, "0x%016" PRIx64, lo);
+ break;
+ case 128:
+ fprintf(stderr, "0x%016" PRIx64 "%016" PRIx64, hi, lo);
+ break;
+ default:
+ abort();
+ }
+}
+
static void commit_log_print_insn(state_t* state, reg_t pc, insn_t insn)
{
#ifdef RISCV_ENABLE_COMMITLOG
- int32_t priv = state->last_inst_priv;
- uint64_t mask = (insn.length() == 8 ? uint64_t(0) : (uint64_t(1) << (insn.length() * 8))) - 1;
- if (state->log_reg_write.addr) {
- fprintf(stderr, "%1d 0x%016" PRIx64 " (0x%08" PRIx64 ") %c%2" PRIu64 " 0x%016" PRIx64 "\n",
- priv,
- pc,
- insn.bits() & mask,
- state->log_reg_write.addr & 1 ? 'f' : 'x',
- state->log_reg_write.addr >> 1,
- state->log_reg_write.data);
+ auto& reg = state->log_reg_write;
+ int priv = state->last_inst_priv;
+ int xlen = state->last_inst_xlen;
+ int flen = state->last_inst_flen;
+
+ fprintf(stderr, "%1d ", priv);
+ commit_log_print_value(xlen, 0, pc);
+ fprintf(stderr, " (");
+ commit_log_print_value(insn.length() * 8, 0, insn.bits());
+
+ if (reg.addr) {
+ bool fp = reg.addr & 1;
+ int rd = reg.addr >> 1;
+ int size = fp ? flen : xlen;
+ fprintf(stderr, ") %c%2d ", fp ? 'f' : 'x', rd);
+ commit_log_print_value(size, reg.data.v[1], reg.data.v[0]);
+ fprintf(stderr, "\n");
} else {
- fprintf(stderr, "%1d 0x%016" PRIx64 " (0x%08" PRIx64 ")\n", priv, pc, insn.bits() & mask);
+ fprintf(stderr, ")\n");
}
- state->log_reg_write.addr = 0;
+ reg.addr = 0;
#endif
}
#endif
}
+// This is expected to be inlined by the compiler so each use of execute_insn
+// includes a duplicated body of the function to get separate fetch.func
+// function calls.
static reg_t execute_insn(processor_t* p, reg_t pc, insn_fetch_t fetch)
{
- commit_log_stash_privilege(p->get_state());
+ commit_log_stash_privilege(p);
reg_t npc = fetch.func(p, fetch.insn, pc);
- if (!invalid_pc(npc)) {
+ if (npc != PC_SERIALIZE_BEFORE) {
commit_log_print_insn(p->get_state(), pc, fetch.insn);
p->update_histogram(pc);
}
return npc;
}
+bool processor_t::slow_path()
+{
+ return debug || state.single_step != state.STEP_NONE || state.dcsr.cause;
+}
+
// fetch/decode/execute loop
void processor_t::step(size_t n)
{
- // TODO: get_interrupt() isn't super fast. Does that matter?
- if (state.dcsr.cause == DCSR_CAUSE_NONE &&
- sim->debug_module.get_interrupt(id)) {
- enter_debug_mode(DCSR_CAUSE_DEBUGINT);
- }
-
- if (state.dcsr.cause != DCSR_CAUSE_NONE) {
- // In Debug Mode, just do 10 steps at a time. Otherwise we're going to be
- // spinning the rest of the time anyway.
- n = std::max(n, (size_t) 10);
+ if (state.dcsr.cause == DCSR_CAUSE_NONE) {
+ if (halt_request) {
+ enter_debug_mode(DCSR_CAUSE_DEBUGINT);
+ } // !!!The halt bit in DCSR is deprecated.
+ else if (state.dcsr.halt) {
+ enter_debug_mode(DCSR_CAUSE_HALT);
+ }
}
while (n > 0) {
if (unlikely(invalid_pc(pc))) { \
switch (pc) { \
case PC_SERIALIZE_BEFORE: state.serialized = true; break; \
- case PC_SERIALIZE_AFTER: instret++; break; \
+ case PC_SERIALIZE_AFTER: ++instret; break; \
+ case PC_SERIALIZE_WFI: n = ++instret; break; \
default: abort(); \
} \
pc = state.pc; \
try
{
- take_interrupt();
+ take_pending_interrupt();
- if (unlikely(debug))
+ if (unlikely(slow_path()))
{
while (instret < n)
{
+ if (unlikely(!state.serialized && state.single_step == state.STEP_STEPPED)) {
+ state.single_step = state.STEP_NONE;
+ if (state.dcsr.cause == DCSR_CAUSE_NONE) {
+ enter_debug_mode(DCSR_CAUSE_STEP);
+ // enter_debug_mode changed state.pc, so we can't just continue.
+ break;
+ }
+ }
+
+ if (unlikely(state.single_step == state.STEP_STEPPING)) {
+ state.single_step = state.STEP_STEPPED;
+ }
+
insn_fetch_t fetch = mmu->load_insn(pc);
- if (!state.serialized)
+ if (debug && !state.serialized)
disasm(fetch.insn);
pc = execute_insn(this, pc, fetch);
+
advance_pc();
+
+ if (unlikely(state.pc >= DEBUG_ROM_ENTRY &&
+ state.pc < DEBUG_END)) {
+ // We're waiting for the debugger to tell us something.
+ return;
+ }
+
}
}
else while (instret < n)
{
+ // This code uses a modified Duff's Device to improve the performance
+ // of executing instructions. While typical Duff's Devices are used
+ // for software pipelining, the switch statement below primarily
+ // benefits from separate call points for the fetch.func function call
+ // found in each execute_insn. This function call is an indirect jump
+ // that depends on the current instruction. By having an indirect jump
+ // dedicated for each icache entry, you improve the performance of the
+ // host's next address predictor. Each case in the switch statement
+ // allows for the program flow to contine to the next case if it
+ // corresponds to the next instruction in the program and instret is
+ // still less than n.
+ //
+ // According to Andrew Waterman's recollection, this optimization
+ // resulted in approximately a 2x performance increase.
+
+ // This figures out where to jump to in the switch statement
size_t idx = _mmu->icache_index(pc);
+
+ // This gets the cached decoded instruction from the MMU. If the MMU
+ // does not have the current pc cached, it will refill the MMU and
+ // return the correct entry. ic_entry->data.func is the C++ function
+ // corresponding to the instruction.
auto ic_entry = _mmu->access_icache(pc);
+ // This macro is included in "icache.h" included within the switch
+ // statement below. The indirect jump corresponding to the instruction
+ // is located within the execute_insn() function call.
#define ICACHE_ACCESS(i) { \
insn_fetch_t fetch = ic_entry->data; \
- ic_entry++; \
pc = execute_insn(this, pc, fetch); \
+ ic_entry = ic_entry->next; \
if (i == mmu_t::ICACHE_ENTRIES-1) break; \
- if (unlikely(ic_entry->tag != pc)) goto miss; \
+ if (unlikely(ic_entry->tag != pc)) break; \
if (unlikely(instret+1 == n)) break; \
instret++; \
state.pc = pc; \
}
+ // This switch statement implements the modified Duff's device as
+ // explained above.
switch (idx) {
+ // "icache.h" is generated by the gen_icache script
#include "icache.h"
}
advance_pc();
- continue;
-
-miss:
- advance_pc();
- // refill I$ if it looks like there wasn't a taken branch
- if (pc > (ic_entry-1)->tag && pc <= (ic_entry-1)->tag + MAX_INSN_LENGTH)
- _mmu->refill_icache(pc, ic_entry);
}
}
catch(trap_t& t)
{
take_trap(t, pc);
n = instret;
+
+ if (unlikely(state.single_step == state.STEP_STEPPED)) {
+ state.single_step = state.STEP_NONE;
+ enter_debug_mode(DCSR_CAUSE_STEP);
+ }
+ }
+ catch (trigger_matched_t& t)
+ {
+ if (mmu->matched_trigger) {
+ // This exception came from the MMU. That means the instruction hasn't
+ // fully executed yet. We start it again, but this time it won't throw
+ // an exception because matched_trigger is already set. (All memory
+ // instructions are idempotent so restarting is safe.)
+
+ insn_fetch_t fetch = mmu->load_insn(pc);
+ pc = execute_insn(this, pc, fetch);
+ advance_pc();
+
+ delete mmu->matched_trigger;
+ mmu->matched_trigger = NULL;
+ }
+ switch (state.mcontrol[t.index].action) {
+ case ACTION_DEBUG_MODE:
+ enter_debug_mode(DCSR_CAUSE_HWBP);
+ break;
+ case ACTION_DEBUG_EXCEPTION: {
+ mem_trap_t trap(CAUSE_BREAKPOINT, t.address);
+ take_trap(trap, pc);
+ break;
+ }
+ default:
+ abort();
+ }
}
state.minstret += instret;