parser.add_option("--caches", action="store_true")
parser.add_option("--l2cache", action="store_true")
parser.add_option("--fastmem", action="store_true")
+ parser.add_option("--simpoint-profile", action="store_true",
+ help="Enable basic block profiling for SimPoints")
+ parser.add_option("--simpoint-interval", type="int", default=10000000,
+ help="SimPoint interval in num of instructions")
parser.add_option("--clock", action="store", type="string", default='2GHz')
parser.add_option("--num-dirs", type="int", default=1)
parser.add_option("--num-l2caches", type="int", default=1)
if (options.caches or options.l2cache):
fatal("You cannot use fastmem in combination with caches!")
+if options.simpoint_profile:
+ if not options.fastmem:
+ # Atomic CPU checked with fastmem option already
+ fatal("SimPoint generation should be done with atomic cpu and fastmem")
+ if np > 1:
+ fatal("SimPoint generation not supported with more than one CPUs")
+
for i in xrange(np):
if options.smt:
system.cpu[i].workload = multiprocesses
if options.fastmem:
system.cpu[i].fastmem = True
+ if options.simpoint_profile:
+ system.cpu[i].simpoint_profile = True
+ system.cpu[i].simpoint_interval = options.simpoint_interval
+
if options.checker:
system.cpu[i].addCheckerCpu()
"terminate when all threads have reached this inst count")
max_insts_any_thread = Param.Counter(0,
"terminate when any thread reaches this inst count")
+ simpoint_start_insts = VectorParam.Counter([],
+ "starting instruction counts of simpoints")
max_loads_all_threads = Param.Counter(0,
"terminate when all threads have reached this load count")
max_loads_any_thread = Param.Counter(0,
}
}
+ // Set up instruction-count-based termination events for SimPoints
+ // Typically, there are more than one action points.
+ // Simulation.py is responsible to take the necessary actions upon
+ // exitting the simulation loop.
+ if (!p->simpoint_start_insts.empty()) {
+ const char *cause = "simpoint starting point found";
+ for (size_t i = 0; i < p->simpoint_start_insts.size(); ++i) {
+ Event *event = new SimLoopExitEvent(cause, 0);
+ comInstEventQueue[0]->schedule(event, p->simpoint_start_insts[i]);
+ }
+ }
+
if (p->max_insts_all_threads != 0) {
const char *cause = "all threads reached the max instruction count";
simulate_data_stalls = Param.Bool(False, "Simulate dcache stall cycles")
simulate_inst_stalls = Param.Bool(False, "Simulate icache stall cycles")
fastmem = Param.Bool(False, "Access memory directly")
+ simpoint_profile = Param.Bool(False, "Generate SimPoint BBVs")
+ simpoint_interval = Param.UInt64(100000000, "SimPoint Interval Size (insts)")
+ simpoint_profile_file = Param.String("simpoint.bb.gz", "SimPoint BBV file")
#include "arch/mmapped_ipr.hh"
#include "arch/utility.hh"
#include "base/bigint.hh"
+#include "base/output.hh"
#include "config/the_isa.hh"
#include "cpu/simple/atomic.hh"
#include "cpu/exetrace.hh"
drain_manager(NULL),
icachePort(name() + ".icache_port", this),
dcachePort(name() + ".dcache_port", this),
- fastmem(p->fastmem)
+ fastmem(p->fastmem),
+ simpoint(p->simpoint_profile),
+ intervalSize(p->simpoint_interval),
+ intervalCount(0),
+ intervalDrift(0),
+ simpointStream(NULL),
+ currentBBV(0, 0),
+ currentBBVInstCount(0)
{
_status = Idle;
+
+ if (simpoint) {
+ simpointStream = simout.create(p->simpoint_profile_file, false);
+ }
}
if (tickEvent.scheduled()) {
deschedule(tickEvent);
}
+ if (simpointStream) {
+ simout.close(simpointStream);
+ }
}
unsigned int
curStaticInst->isFirstMicroop()))
instCnt++;
+ // profile for SimPoints if enabled and macro inst is finished
+ if (simpoint && curStaticInst && (fault == NoFault) &&
+ (!curStaticInst->isMicroop() ||
+ curStaticInst->isLastMicroop())) {
+ profileSimPoint();
+ }
+
Tick stall_ticks = 0;
if (simulate_inst_stalls && icache_access)
stall_ticks += icache_latency;
dcachePort.printAddr(a);
}
+void
+AtomicSimpleCPU::profileSimPoint()
+{
+ if (!currentBBVInstCount)
+ currentBBV.first = thread->pcState().instAddr();
+
+ ++intervalCount;
+ ++currentBBVInstCount;
+
+ // If inst is control inst, assume end of basic block.
+ if (curStaticInst->isControl()) {
+ currentBBV.second = thread->pcState().instAddr();
+
+ auto map_itr = bbMap.find(currentBBV);
+ if (map_itr == bbMap.end()){
+ // If a new (previously unseen) basic block is found,
+ // add a new unique id, record num of insts and insert into bbMap.
+ BBInfo info;
+ info.id = bbMap.size() + 1;
+ info.insts = currentBBVInstCount;
+ info.count = currentBBVInstCount;
+ bbMap.insert(std::make_pair(currentBBV, info));
+ } else {
+ // If basic block is seen before, just increment the count by the
+ // number of insts in basic block.
+ BBInfo& info = map_itr->second;
+ assert(info.insts == currentBBVInstCount);
+ info.count += currentBBVInstCount;
+ }
+ currentBBVInstCount = 0;
+
+ // Reached end of interval if the sum of the current inst count
+ // (intervalCount) and the excessive inst count from the previous
+ // interval (intervalDrift) is greater than/equal to the interval size.
+ if (intervalCount + intervalDrift >= intervalSize) {
+ // summarize interval and display BBV info
+ std::vector<pair<uint64_t, uint64_t> > counts;
+ for (auto map_itr = bbMap.begin(); map_itr != bbMap.end();
+ ++map_itr) {
+ BBInfo& info = map_itr->second;
+ if (info.count != 0) {
+ counts.push_back(std::make_pair(info.id, info.count));
+ info.count = 0;
+ }
+ }
+ std::sort(counts.begin(), counts.end());
+
+ // Print output BBV info
+ *simpointStream << "T";
+ for (auto cnt_itr = counts.begin(); cnt_itr != counts.end();
+ ++cnt_itr) {
+ *simpointStream << ":" << cnt_itr->first
+ << ":" << cnt_itr->second << " ";
+ }
+ *simpointStream << "\n";
+
+ intervalDrift = (intervalCount + intervalDrift) - intervalSize;
+ intervalCount = 0;
+ }
+ }
+}
////////////////////////////////////////////////////////////////////////
//
#ifndef __CPU_SIMPLE_ATOMIC_HH__
#define __CPU_SIMPLE_ATOMIC_HH__
+#include "base/hashmap.hh"
#include "cpu/simple/base.hh"
#include "params/AtomicSimpleCPU.hh"
+/**
+ * Start and end address of basic block for SimPoint profiling.
+ * This structure is used to look up the hash table of BBVs.
+ * - first: PC of first inst in basic block
+ * - second: PC of last inst in basic block
+ */
+typedef std::pair<Addr, Addr> BasicBlockRange;
+
+/** Overload hash function for BasicBlockRange type */
+__hash_namespace_begin
+template <>
+class hash<BasicBlockRange>
+{
+ public:
+ size_t operator()(const BasicBlockRange &bb) const {
+ return hash<Addr>()(bb.first + bb.second);
+ }
+};
+__hash_namespace_end
+
+
class AtomicSimpleCPU : public BaseSimpleCPU
{
public:
bool dcache_access;
Tick dcache_latency;
+ /**
+ * Profile basic blocks for SimPoints.
+ * Called at every macro inst to increment basic block inst counts and
+ * to profile block if end of block.
+ */
+ void profileSimPoint();
+
+ /** Data structures for SimPoints BBV generation
+ * @{
+ */
+
+ /** Whether SimPoint BBV profiling is enabled */
+ const bool simpoint;
+ /** SimPoint profiling interval size in instructions */
+ const uint64_t intervalSize;
+
+ /** Inst count in current basic block */
+ uint64_t intervalCount;
+ /** Excess inst count from previous interval*/
+ uint64_t intervalDrift;
+ /** Pointer to SimPoint BBV output stream */
+ std::ostream *simpointStream;
+
+ /** Basic Block information */
+ struct BBInfo {
+ /** Unique ID */
+ uint64_t id;
+ /** Num of static insts in BB */
+ uint64_t insts;
+ /** Accumulated dynamic inst count executed by BB */
+ uint64_t count;
+ };
+
+ /** Hash table containing all previously seen basic blocks */
+ m5::hash_map<BasicBlockRange, BBInfo> bbMap;
+ /** Currently executing basic block */
+ BasicBlockRange currentBBV;
+ /** inst count in current basic block */
+ uint64_t currentBBVInstCount;
+
+ /** @}
+ * End of data structures for SimPoints BBV generation
+ */
+
protected:
/** Return a reference to the data port. */