riscv/execute.cc

   1 // See LICENSE for license details.
   2
   3 #include "processor.h"
   4 #include "mmu.h"
   5 #include "sim.h"
   6 #include <cassert>
   7
   8
   9 static void commit_log_stash_privilege(state_t* state)
  10 {
  11 #ifdef RISCV_ENABLE_COMMITLOG
  12   state->last_inst_priv = state->prv;
  13 #endif
  14 }
  15
  16 static void commit_log_print_insn(state_t* state, reg_t pc, insn_t insn)
  17 {
  18 #ifdef RISCV_ENABLE_COMMITLOG
  19   int32_t priv = state->last_inst_priv;
  20   uint64_t mask = (insn.length() == 8 ? uint64_t(0) : (uint64_t(1) << (insn.length() * 8))) - 1;
  21   if (state->log_reg_write.addr) {
  22     fprintf(stderr, "%1d 0x%016" PRIx64 " (0x%08" PRIx64 ") %c%2" PRIu64 " 0x%016" PRIx64 "\n",
  23             priv,
  24             pc,
  25             insn.bits() & mask,
  26             state->log_reg_write.addr & 1 ? 'f' : 'x',
  27             state->log_reg_write.addr >> 1,
  28             state->log_reg_write.data);
  29   } else {
  30     fprintf(stderr, "%1d 0x%016" PRIx64 " (0x%08" PRIx64 ")\n", priv, pc, insn.bits() & mask);
  31   }
  32   state->log_reg_write.addr = 0;
  33 #endif
  34 }
  35
  36 inline void processor_t::update_histogram(reg_t pc)
  37 {
  38 #ifdef RISCV_ENABLE_HISTOGRAM
  39   pc_histogram[pc]++;
  40 #endif
  41 }
  42
  43 // This is expected to be inlined by the compiler so each use of execute_insn
  44 // includes a duplicated body of the function to get separate fetch.func
  45 // function calls.
  46 static reg_t execute_insn(processor_t* p, reg_t pc, insn_fetch_t fetch)
  47 {
  48   commit_log_stash_privilege(p->get_state());
  49   reg_t npc = fetch.func(p, fetch.insn, pc);
  50   if (!invalid_pc(npc)) {
  51     commit_log_print_insn(p->get_state(), pc, fetch.insn);
  52     p->update_histogram(pc);
  53   }
  54   return npc;
  55 }
  56
  57 bool processor_t::slow_path()
  58 {
  59   return debug || state.single_step != state.STEP_NONE || state.dcsr.cause;
  60 }
  61
  62 // fetch/decode/execute loop
  63 void processor_t::step(size_t n)
  64 {
  65   if (state.dcsr.cause == DCSR_CAUSE_NONE) {
  66     // TODO: get_interrupt() isn't super fast. Does that matter?
  67     if (sim->debug_module.get_interrupt(id)) {
  68       enter_debug_mode(DCSR_CAUSE_DEBUGINT);
  69     } else if (state.dcsr.halt) {
  70       enter_debug_mode(DCSR_CAUSE_HALT);
  71     }
  72   } else {
  73     // In Debug Mode, just do 11 steps at a time. Otherwise we're going to be
  74     // spinning the rest of the time anyway.
  75     n = std::min(n, (size_t) 11);
  76   }
  77
  78   while (n > 0) {
  79     size_t instret = 0;
  80     reg_t pc = state.pc;
  81     mmu_t* _mmu = mmu;
  82
  83     #define advance_pc() \
  84      if (unlikely(invalid_pc(pc))) { \
  85        switch (pc) { \
  86          case PC_SERIALIZE_BEFORE: state.serialized = true; break; \
  87          case PC_SERIALIZE_AFTER: instret++; break; \
  88          default: abort(); \
  89        } \
  90        pc = state.pc; \
  91        break; \
  92      } else { \
  93        state.pc = pc; \
  94        instret++; \
  95      }
  96
  97     try
  98     {
  99       take_interrupt();
 100
 101       if (unlikely(slow_path()))
 102       {
 103         while (instret < n)
 104         {
 105           if (unlikely(state.single_step == state.STEP_STEPPING)) {
 106             state.single_step = state.STEP_STEPPED;
 107           }
 108
 109           insn_fetch_t fetch = mmu->load_insn(pc);
 110           if (debug && !state.serialized)
 111             disasm(fetch.insn);
 112           pc = execute_insn(this, pc, fetch);
 113           bool serialize_before = (pc == PC_SERIALIZE_BEFORE);
 114
 115           advance_pc();
 116
 117           if (unlikely(state.single_step == state.STEP_STEPPED) && !serialize_before) {
 118             state.single_step = state.STEP_NONE;
 119             enter_debug_mode(DCSR_CAUSE_STEP);
 120             // enter_debug_mode changed state.pc, so we can't just continue.
 121             break;
 122           }
 123         }
 124       }
 125       else while (instret < n)
 126       {
 127         // This code uses a modified Duff's Device to improve the performance
 128         // of executing instructions. While typical Duff's Devices are used
 129         // for software pipelining, the switch statement below primarily
 130         // benefits from separate call points for the fetch.func function call
 131         // found in each execute_insn. This function call is an indirect jump
 132         // that depends on the current instruction. By having an indirect jump
 133         // dedicated for each icache entry, you improve the performance of the
 134         // host's next address predictor. Each case in the switch statement
 135         // allows for the program flow to contine to the next case if it
 136         // corresponds to the next instruction in the program and instret is
 137         // still less than n.
 138         //
 139         // According to Andrew Waterman's recollection, this optimization
 140         // resulted in approximately a 2x performance increase.
 141         //
 142         // If there is support for compressed instructions, the mmu and the
 143         // switch statement get more complicated. Each branch target is stored
 144         // in the index corresponding to mmu->icache_index(), but consecutive
 145         // non-branching instructions are stored in consecutive indices even if
 146         // mmu->icache_index() specifies a different index (which is the case
 147         // for 32-bit instructions in the presence of compressed instructions).
 148
 149         // This figures out where to jump to in the switch statement
 150         size_t idx = _mmu->icache_index(pc);
 151
 152         // This gets the cached decoded instruction form the MMU. If the MMU
 153         // does not have the current pc cached, it will refill the MMU and
 154         // return the correct entry. ic_entry->data.func is the C++ function
 155         // corresponding to the instruction.
 156         auto ic_entry = _mmu->access_icache(pc);
 157
 158         // This macro is included in "icache.h" included within the switch
 159         // statement below. The indirect jump corresponding to the instruction
 160         // is located within the execute_insn() function call.
 161         #define ICACHE_ACCESS(i) { \
 162           insn_fetch_t fetch = ic_entry->data; \
 163           ic_entry++; \
 164           pc = execute_insn(this, pc, fetch); \
 165           if (i == mmu_t::ICACHE_ENTRIES-1) break; \
 166           if (unlikely(ic_entry->tag != pc)) goto miss; \
 167           if (unlikely(instret+1 == n)) break; \
 168           instret++; \
 169           state.pc = pc; \
 170         }
 171
 172         // This switch statement implements the modified Duff's device as
 173         // explained above.
 174         switch (idx) {
 175           // "icache.h" is generated by the gen_icache script
 176           #include "icache.h"
 177         }
 178
 179         advance_pc();
 180         continue;
 181
 182 miss:
 183         advance_pc();
 184         // refill I$ if it looks like there wasn't a taken branch
 185         if (pc > (ic_entry-1)->tag && pc <= (ic_entry-1)->tag + MAX_INSN_LENGTH)
 186           _mmu->refill_icache(pc, ic_entry);
 187       }
 188     }
 189     catch(trap_t& t)
 190     {
 191       take_trap(t, pc);
 192       n = instret;
 193
 194       if (unlikely(state.single_step == state.STEP_STEPPED)) {
 195         state.single_step = state.STEP_NONE;
 196         enter_debug_mode(DCSR_CAUSE_STEP);
 197       }
 198     }
 199     catch (trigger_matched_t& t)
 200     {
 201       if (mmu->matched_trigger) {
 202         // This exception came from the MMU. That means the instruction hasn't
 203         // fully executed yet. We start it again, but this time it won't throw
 204         // an exception because matched_trigger is already set. (All memory
 205         // instructions are idempotent so restarting is safe.)
 206
 207         insn_fetch_t fetch = mmu->load_insn(pc);
 208         pc = execute_insn(this, pc, fetch);
 209         advance_pc();
 210
 211         delete mmu->matched_trigger;
 212         mmu->matched_trigger = NULL;
 213       }
 214       switch (state.mcontrol[t.index].action) {
 215         case ACTION_DEBUG_MODE:
 216           enter_debug_mode(DCSR_CAUSE_HWBP);
 217           break;
 218         case ACTION_DEBUG_EXCEPTION: {
 219           mem_trap_t trap(CAUSE_BREAKPOINT, t.address);
 220           take_trap(trap, pc);
 221           break;
 222         }
 223         default:
 224           abort();
 225       }
 226     }
 227
 228     state.minstret += instret;
 229     n -= instret;
 230   }
 231 }