From: Xiongfei Date: Tue, 10 Nov 2020 04:03:30 +0000 (+0800) Subject: cpu-minor: this is a bug fix for MinorCPU for thread cloning. X-Git-Tag: develop-gem5-snapshot~451 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=088c8a224c7dfdaeceea4e860dfeccb6ec2f6072;p=gem5.git cpu-minor: this is a bug fix for MinorCPU for thread cloning. Inside the code of cloneFunc(…) //syscall_emul.hh cp->initState(); //line 1483 p->clone(tc, ctc, cp, flags); //line 1484 … ctc->clearArchRegs(); //line 1503 OS::archClone(flags, p, cp, tc, ctc, newStack, tlsPtr); //line 1505 … At line 1483, initState() is called and the activateContext() of the corresponding MinorCPU is eventually called. The actual architecture clone happens at line 1505 where PC of the new thread could have a correct value. In the existing implementation of MinorCPU::activateContext(ThreadID thread_id), the below line 275 is called pipeline->wakeupFetch(thread_id); to start fetching instruction with current value of PC, which is 0x0, leading to panic “Page table fault when accessing virtual address 0”. This is because the OS::archClone() is not yet called. So, the below bug fix handles the wakeup fetch for a thread for two scenarios: ... if (!threads[thread_id]->getUseForClone()) { //the thread is not cloned pipeline->wakeupFetch(thread_id); } else {//the thread from clone if (fetchEventWrapper != NULL) delete fetchEventWrapper; fetchEventWrapper = new EventFunctionWrapper([this, thread_id] {pipeline->wakeupFetch(thread_id);}, "wakeupFetch"); schedule(*fetchEventWrapper, clockEdge(Cycles(0))); } ... If a thread is not cloned, pipeline->wakeupFetch() is called immediately. For the cloned thread, the above bug fix delays the execution of pipeline->wakeupFetch() after the OS::archClone is done. ThreadContext::getUseForClone() return true if a thread is cloned. A member variable fetchEventWrapper is added to MinorCPU class for delayed fetch event. A member variable useForClone and its corresponding get/set methods are added to ThreadContext class. This approach allows future reuse of this useForClone variable by other CPU models if needed and also avoid lots of changes resulted by modifying parameters of activateContext () and activate() which are defined as override. Inside the syscall cloneFunc, the useForClone member of a ThreadContext object is set via its set method right before Process's initState() is called, shown as below. ctc->setUseForClone(true); cp->initState(); p->clone(tc, ctc, cp, flags); A few previously failed RISC-V ASM tests have been open in tests.py file after the bug fix works. JIRA issue: https://gem5.atlassian.net/browse/GEM5-374 Change-Id: Ibffe46522e2617443d29f49df180692c54830f14 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/37315 Reviewed-by: Bobby R. Bruce Maintainer: Bobby R. Bruce Tested-by: kokoro --- diff --git a/src/cpu/minor/cpu.cc b/src/cpu/minor/cpu.cc index 18a3feed9..de7425630 100644 --- a/src/cpu/minor/cpu.cc +++ b/src/cpu/minor/cpu.cc @@ -77,12 +77,17 @@ MinorCPU::MinorCPU(const MinorCPUParams ¶ms) : pipeline = new Minor::Pipeline(*this, params); activityRecorder = pipeline->getActivityRecorder(); + + fetchEventWrapper = NULL; } MinorCPU::~MinorCPU() { delete pipeline; + if (fetchEventWrapper != NULL) + delete fetchEventWrapper; + for (ThreadID thread_id = 0; thread_id < threads.size(); thread_id++) { delete threads[thread_id]; } @@ -266,7 +271,17 @@ MinorCPU::activateContext(ThreadID thread_id) /* Wake up the thread, wakeup the pipeline tick */ threads[thread_id]->activate(); wakeupOnEvent(Minor::Pipeline::CPUStageId); - pipeline->wakeupFetch(thread_id); + + if (!threads[thread_id]->getUseForClone())//the thread is not cloned + { + pipeline->wakeupFetch(thread_id); + } else { //the thread from clone + if (fetchEventWrapper != NULL) + delete fetchEventWrapper; + fetchEventWrapper = new EventFunctionWrapper([this, thread_id] + { pipeline->wakeupFetch(thread_id); }, "wakeupFetch"); + schedule(*fetchEventWrapper, clockEdge(Cycles(0))); + } BaseCPU::activateContext(thread_id); } diff --git a/src/cpu/minor/cpu.hh b/src/cpu/minor/cpu.hh index ac9831aee..0e919f332 100644 --- a/src/cpu/minor/cpu.hh +++ b/src/cpu/minor/cpu.hh @@ -186,6 +186,7 @@ class MinorCPU : public BaseCPU * already been idled. The stage argument should be from the * enumeration Pipeline::StageId */ void wakeupOnEvent(unsigned int stage_id); + EventFunctionWrapper *fetchEventWrapper; }; #endif /* __CPU_MINOR_CPU_HH__ */ diff --git a/src/cpu/thread_context.hh b/src/cpu/thread_context.hh index c50eb2693..2cad1ac77 100644 --- a/src/cpu/thread_context.hh +++ b/src/cpu/thread_context.hh @@ -91,9 +91,17 @@ class ThreadContext : public PCEventScope using VecRegContainer = TheISA::VecRegContainer; using VecElem = TheISA::VecElem; using VecPredRegContainer = TheISA::VecPredRegContainer; + bool useForClone = false; public: + bool getUseForClone() { return useForClone; } + + void setUseForClone(bool newUseForClone) + { + useForClone = newUseForClone; + } + enum Status { /// Running. Instructions should be executed only when diff --git a/src/sim/syscall_emul.hh b/src/sim/syscall_emul.hh index 7d7e2652b..491a309b4 100644 --- a/src/sim/syscall_emul.hh +++ b/src/sim/syscall_emul.hh @@ -1480,6 +1480,8 @@ cloneFunc(SyscallDesc *desc, ThreadContext *tc, RegVal flags, RegVal newStack, cp->pTable->shared = true; cp->useForClone = true; } + + ctc->setUseForClone(true); cp->initState(); p->clone(tc, ctc, cp, flags); diff --git a/tests/gem5/asmtest/tests.py b/tests/gem5/asmtest/tests.py index b2a92498a..026b2304f 100755 --- a/tests/gem5/asmtest/tests.py +++ b/tests/gem5/asmtest/tests.py @@ -41,7 +41,7 @@ def asm_test(test, #The full path of the test cpu_type, num_cpus=4, max_tick=10000000000, - ruby=True, + ruby=False, debug_flags=None, # Debug flags passed to gem5 full_system = False ): @@ -96,10 +96,10 @@ cpu_types = ('AtomicSimpleCPU', 'TimingSimpleCPU', 'MinorCPU', 'DerivO3CPU') # https://gem5.atlassian.net/browse/GEM5-496 # https://gem5.atlassian.net/browse/GEM5-497 binaries = ( -# 'rv64samt-ps-sysclone_d', -# 'rv64samt-ps-sysfutex1_d', + 'rv64samt-ps-sysclone_d', + 'rv64samt-ps-sysfutex1_d', # 'rv64samt-ps-sysfutex2_d', -# 'rv64samt-ps-sysfutex3_d', + 'rv64samt-ps-sysfutex3_d', # 'rv64samt-ps-sysfutex_d', 'rv64ua-ps-amoadd_d', 'rv64ua-ps-amoadd_w', @@ -120,16 +120,16 @@ binaries = ( 'rv64ua-ps-amoxor_d', 'rv64ua-ps-amoxor_w', 'rv64ua-ps-lrsc', -# 'rv64uamt-ps-amoadd_d', -# 'rv64uamt-ps-amoand_d', -# 'rv64uamt-ps-amomax_d', -# 'rv64uamt-ps-amomaxu_d', -# 'rv64uamt-ps-amomin_d', -# 'rv64uamt-ps-amominu_d', -# 'rv64uamt-ps-amoor_d', -# 'rv64uamt-ps-amoswap_d', -# 'rv64uamt-ps-amoxor_d', -# 'rv64uamt-ps-lrsc_d', + 'rv64uamt-ps-amoadd_d', + 'rv64uamt-ps-amoand_d', + 'rv64uamt-ps-amomax_d', + 'rv64uamt-ps-amomaxu_d', + 'rv64uamt-ps-amomin_d', + 'rv64uamt-ps-amominu_d', + 'rv64uamt-ps-amoor_d', + 'rv64uamt-ps-amoswap_d', + 'rv64uamt-ps-amoxor_d', + 'rv64uamt-ps-lrsc_d', 'rv64ud-ps-fadd', 'rv64ud-ps-fclass', 'rv64ud-ps-fcmp',