riscv/execute.cc

   1 // See LICENSE for license details.
   2
   3 #include "processor.h"
   4 #include "mmu.h"
   5 #include <cassert>
   6
   7
   8 static void commit_log_stash_privilege(processor_t* p)
   9 {
  10 #ifdef RISCV_ENABLE_COMMITLOG
  11   state_t* state = p->get_state();
  12   state->last_inst_priv = state->prv;
  13   state->last_inst_xlen = p->get_xlen();
  14   state->last_inst_flen = p->get_flen();
  15 #endif
  16 }
  17
  18 static void commit_log_print_value(int width, uint64_t hi, uint64_t lo)
  19 {
  20   switch (width) {
  21     case 16:
  22       fprintf(stderr, "0x%04" PRIx16, (uint16_t)lo);
  23       break;
  24     case 32:
  25       fprintf(stderr, "0x%08" PRIx32, (uint32_t)lo);
  26       break;
  27     case 64:
  28       fprintf(stderr, "0x%016" PRIx64, lo);
  29       break;
  30     case 128:
  31       fprintf(stderr, "0x%016" PRIx64 "%016" PRIx64, hi, lo);
  32       break;
  33     default:
  34       abort();
  35   }
  36 }
  37
  38 static void commit_log_print_insn(state_t* state, reg_t pc, insn_t insn)
  39 {
  40 #ifdef RISCV_ENABLE_COMMITLOG
  41   auto& reg = state->log_reg_write;
  42   int priv = state->last_inst_priv;
  43   int xlen = state->last_inst_xlen;
  44   int flen = state->last_inst_flen;
  45
  46   fprintf(stderr, "%1d ", priv);
  47   commit_log_print_value(xlen, 0, pc);
  48   fprintf(stderr, " (");
  49   commit_log_print_value(insn.length() * 8, 0, insn.bits());
  50
  51   if (reg.addr) {
  52     bool fp = reg.addr & 1;
  53     int rd = reg.addr >> 1;
  54     int size = fp ? flen : xlen;
  55     fprintf(stderr, ") %c%2d ", fp ? 'f' : 'x', rd);
  56     commit_log_print_value(size, reg.data.v[1], reg.data.v[0]);
  57     fprintf(stderr, "\n");
  58   } else {
  59     fprintf(stderr, ")\n");
  60   }
  61   reg.addr = 0;
  62 #endif
  63 }
  64
  65 inline void processor_t::update_histogram(reg_t pc)
  66 {
  67 #ifdef RISCV_ENABLE_HISTOGRAM
  68   pc_histogram[pc]++;
  69 #endif
  70 }
  71
  72 // This is expected to be inlined by the compiler so each use of execute_insn
  73 // includes a duplicated body of the function to get separate fetch.func
  74 // function calls.
  75 static reg_t execute_insn(processor_t* p, reg_t pc, insn_fetch_t fetch)
  76 {
  77   commit_log_stash_privilege(p);
  78   reg_t npc = fetch.func(p, fetch.insn, pc);
  79   if (npc != PC_SERIALIZE_BEFORE) {
  80     commit_log_print_insn(p->get_state(), pc, fetch.insn);
  81     p->update_histogram(pc);
  82   }
  83   return npc;
  84 }
  85
  86 bool processor_t::slow_path()
  87 {
  88   return debug || state.single_step != state.STEP_NONE || state.dcsr.cause;
  89 }
  90
  91 // fetch/decode/execute loop
  92 void processor_t::step(size_t n)
  93 {
  94   if (state.dcsr.cause == DCSR_CAUSE_NONE) {
  95     if (halt_request) {
  96       enter_debug_mode(DCSR_CAUSE_DEBUGINT);
  97     } // !!!The halt bit in DCSR is deprecated.
  98     else if (state.dcsr.halt) {
  99       enter_debug_mode(DCSR_CAUSE_HALT);
 100     }
 101   }
 102
 103   while (n > 0) {
 104     size_t instret = 0;
 105     reg_t pc = state.pc;
 106     mmu_t* _mmu = mmu;
 107
 108     #define advance_pc() \
 109      if (unlikely(invalid_pc(pc))) { \
 110        switch (pc) { \
 111          case PC_SERIALIZE_BEFORE: state.serialized = true; break; \
 112          case PC_SERIALIZE_AFTER: ++instret; break; \
 113          case PC_SERIALIZE_WFI: n = ++instret; break; \
 114          default: abort(); \
 115        } \
 116        pc = state.pc; \
 117        break; \
 118      } else { \
 119        state.pc = pc; \
 120        instret++; \
 121      }
 122
 123     try
 124     {
 125       take_pending_interrupt();
 126
 127       if (unlikely(slow_path()))
 128       {
 129         while (instret < n)
 130         {
 131           if (unlikely(!state.serialized && state.single_step == state.STEP_STEPPED)) {
 132             state.single_step = state.STEP_NONE;
 133             if (state.dcsr.cause == DCSR_CAUSE_NONE) {
 134               enter_debug_mode(DCSR_CAUSE_STEP);
 135               // enter_debug_mode changed state.pc, so we can't just continue.
 136               break;
 137             }
 138           }
 139
 140           if (unlikely(state.single_step == state.STEP_STEPPING)) {
 141             state.single_step = state.STEP_STEPPED;
 142           }
 143
 144           insn_fetch_t fetch = mmu->load_insn(pc);
 145           if (debug && !state.serialized)
 146             disasm(fetch.insn);
 147           pc = execute_insn(this, pc, fetch);
 148
 149           advance_pc();
 150
 151           if (unlikely(state.pc >= DEBUG_ROM_ENTRY &&
 152                        state.pc < DEBUG_END)) {
 153             // We're waiting for the debugger to tell us something.
 154             return;
 155           }
 156
 157         }
 158       }
 159       else while (instret < n)
 160       {
 161         // This code uses a modified Duff's Device to improve the performance
 162         // of executing instructions. While typical Duff's Devices are used
 163         // for software pipelining, the switch statement below primarily
 164         // benefits from separate call points for the fetch.func function call
 165         // found in each execute_insn. This function call is an indirect jump
 166         // that depends on the current instruction. By having an indirect jump
 167         // dedicated for each icache entry, you improve the performance of the
 168         // host's next address predictor. Each case in the switch statement
 169         // allows for the program flow to contine to the next case if it
 170         // corresponds to the next instruction in the program and instret is
 171         // still less than n.
 172         //
 173         // According to Andrew Waterman's recollection, this optimization
 174         // resulted in approximately a 2x performance increase.
 175
 176         // This figures out where to jump to in the switch statement
 177         size_t idx = _mmu->icache_index(pc);
 178
 179         // This gets the cached decoded instruction from the MMU. If the MMU
 180         // does not have the current pc cached, it will refill the MMU and
 181         // return the correct entry. ic_entry->data.func is the C++ function
 182         // corresponding to the instruction.
 183         auto ic_entry = _mmu->access_icache(pc);
 184
 185         // This macro is included in "icache.h" included within the switch
 186         // statement below. The indirect jump corresponding to the instruction
 187         // is located within the execute_insn() function call.
 188         #define ICACHE_ACCESS(i) { \
 189           insn_fetch_t fetch = ic_entry->data; \
 190           pc = execute_insn(this, pc, fetch); \
 191           ic_entry = ic_entry->next; \
 192           if (i == mmu_t::ICACHE_ENTRIES-1) break; \
 193           if (unlikely(ic_entry->tag != pc)) break; \
 194           if (unlikely(instret+1 == n)) break; \
 195           instret++; \
 196           state.pc = pc; \
 197         }
 198
 199         // This switch statement implements the modified Duff's device as
 200         // explained above.
 201         switch (idx) {
 202           // "icache.h" is generated by the gen_icache script
 203           #include "icache.h"
 204         }
 205
 206         advance_pc();
 207       }
 208     }
 209     catch(trap_t& t)
 210     {
 211       take_trap(t, pc);
 212       n = instret;
 213
 214       if (unlikely(state.single_step == state.STEP_STEPPED)) {
 215         state.single_step = state.STEP_NONE;
 216         enter_debug_mode(DCSR_CAUSE_STEP);
 217       }
 218     }
 219     catch (trigger_matched_t& t)
 220     {
 221       if (mmu->matched_trigger) {
 222         // This exception came from the MMU. That means the instruction hasn't
 223         // fully executed yet. We start it again, but this time it won't throw
 224         // an exception because matched_trigger is already set. (All memory
 225         // instructions are idempotent so restarting is safe.)
 226
 227         insn_fetch_t fetch = mmu->load_insn(pc);
 228         pc = execute_insn(this, pc, fetch);
 229         advance_pc();
 230
 231         delete mmu->matched_trigger;
 232         mmu->matched_trigger = NULL;
 233       }
 234       switch (state.mcontrol[t.index].action) {
 235         case ACTION_DEBUG_MODE:
 236           enter_debug_mode(DCSR_CAUSE_HWBP);
 237           break;
 238         case ACTION_DEBUG_EXCEPTION: {
 239           mem_trap_t trap(CAUSE_BREAKPOINT, t.address);
 240           take_trap(trap, pc);
 241           break;
 242         }
 243         default:
 244           abort();
 245       }
 246     }
 247
 248     state.minstret += instret;
 249     n -= instret;
 250   }
 251 }