From 0782d92286ded450b7e615fefbd5d6d5e738c8cd Mon Sep 17 00:00:00 2001 From: Binh Pham Date: Sat, 21 Jun 2014 10:26:43 -0700 Subject: [PATCH] o3: split load & store queue full cases in rename Check for free entries in Load Queue and Store Queue separately to avoid cases when load cannot be renamed due to full Store Queue and vice versa. This work was done while Binh was an intern at AMD Research. --- src/cpu/o3/comm.hh | 7 ++- src/cpu/o3/cpu.cc | 10 +++- src/cpu/o3/iew_impl.hh | 44 +++++++++------ src/cpu/o3/lsq.hh | 15 ++++- src/cpu/o3/lsq_impl.hh | 37 +++++++++--- src/cpu/o3/lsq_unit.hh | 8 ++- src/cpu/o3/lsq_unit_impl.hh | 28 +++++---- src/cpu/o3/rename.hh | 30 ++++++++-- src/cpu/o3/rename_impl.hh | 109 ++++++++++++++++++++++++++---------- 9 files changed, 210 insertions(+), 78 deletions(-) diff --git a/src/cpu/o3/comm.hh b/src/cpu/o3/comm.hh index 587e1d9e7..ab0a9ff42 100644 --- a/src/cpu/o3/comm.hh +++ b/src/cpu/o3/comm.hh @@ -1,5 +1,6 @@ /* * Copyright (c) 2011 ARM Limited + * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved * * The license below extends only to copyright in the software and shall @@ -144,13 +145,15 @@ struct TimeBufStruct { struct iewComm { // Also eventually include skid buffer space. unsigned freeIQEntries; - unsigned freeLSQEntries; + unsigned freeLQEntries; + unsigned freeSQEntries; + unsigned dispatchedToLQ; + unsigned dispatchedToSQ; unsigned iqCount; unsigned ldstqCount; unsigned dispatched; - unsigned dispatchedToLSQ; bool usedIQ; bool usedLSQ; }; diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index ac003bc24..4f48e29d9 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -1009,12 +1009,18 @@ FullO3CPU::activateWhenReady(ThreadID tid) "IQ entries.\n", tid); ready = false; - } else if (iew.ldstQueue.numFreeEntries() >= + } else if (iew.ldstQueue.numFreeLoadEntries() >= iew.ldstQueue.entryAmount(activeThreads.size() + 1)) { DPRINTF(O3CPU,"[tid:%i] Suspending thread due to not enough " - "LSQ entries.\n", + "LQ entries.\n", tid); ready = false; + } else if (iew.ldstQueue.numFreeStoreEntries() >= + iew.ldstQueue.entryAmount(activeThreads.size() + 1)) { + DPRINTF(O3CPU,"[tid:%i] Suspending thread due to not enough " + "SQ entries.\n", + tid); + ready = false; } if (ready) { diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index 644366dfc..20d3a1c36 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -1,5 +1,6 @@ /* * Copyright (c) 2010-2013 ARM Limited + * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved. * * The license below extends only to copyright in the software and shall @@ -315,8 +316,8 @@ DefaultIEW::startupStage() instQueue.numFreeEntries(tid); toRename->iewInfo[tid].usedLSQ = true; - toRename->iewInfo[tid].freeLSQEntries = - ldstQueue.numFreeEntries(tid); + toRename->iewInfo[tid].freeLQEntries = ldstQueue.numFreeLoadEntries(tid); + toRename->iewInfo[tid].freeSQEntries = ldstQueue.numFreeStoreEntries(tid); } // Initialize the checker's dcache port here @@ -467,9 +468,11 @@ DefaultIEW::squash(ThreadID tid) tid, fromCommit->commitInfo[tid].doneSeqNum); while (!skidBuffer[tid].empty()) { - if (skidBuffer[tid].front()->isLoad() || - skidBuffer[tid].front()->isStore() ) { - toRename->iewInfo[tid].dispatchedToLSQ++; + if (skidBuffer[tid].front()->isLoad()) { + toRename->iewInfo[tid].dispatchedToLQ++; + } + if (skidBuffer[tid].front()->isStore()) { + toRename->iewInfo[tid].dispatchedToSQ++; } toRename->iewInfo[tid].dispatched++; @@ -903,9 +906,11 @@ DefaultIEW::emptyRenameInsts(ThreadID tid) while (!insts[tid].empty()) { - if (insts[tid].front()->isLoad() || - insts[tid].front()->isStore() ) { - toRename->iewInfo[tid].dispatchedToLSQ++; + if (insts[tid].front()->isLoad()) { + toRename->iewInfo[tid].dispatchedToLQ++; + } + if (insts[tid].front()->isStore()) { + toRename->iewInfo[tid].dispatchedToSQ++; } toRename->iewInfo[tid].dispatched++; @@ -1043,9 +1048,13 @@ DefaultIEW::dispatchInsts(ThreadID tid) insts_to_dispatch.pop(); //Tell Rename That An Instruction has been processed - if (inst->isLoad() || inst->isStore()) { - toRename->iewInfo[tid].dispatchedToLSQ++; + if (inst->isLoad()) { + toRename->iewInfo[tid].dispatchedToLQ++; } + if (inst->isStore()) { + toRename->iewInfo[tid].dispatchedToSQ++; + } + toRename->iewInfo[tid].dispatched++; continue; @@ -1093,7 +1102,7 @@ DefaultIEW::dispatchInsts(ThreadID tid) add_to_iq = true; - toRename->iewInfo[tid].dispatchedToLSQ++; + toRename->iewInfo[tid].dispatchedToLQ++; } else if (inst->isStore()) { DPRINTF(IEW, "[tid:%i]: Issue: Memory instruction " "encountered, adding to LSQ.\n", tid); @@ -1116,7 +1125,7 @@ DefaultIEW::dispatchInsts(ThreadID tid) add_to_iq = true; } - toRename->iewInfo[tid].dispatchedToLSQ++; + toRename->iewInfo[tid].dispatchedToSQ++; } else if (inst->isMemBarrier() || inst->isWriteBarrier()) { // Same as non-speculative stores. inst->setCanCommit(); @@ -1613,8 +1622,11 @@ DefaultIEW::tick() toRename->iewInfo[tid].freeIQEntries = instQueue.numFreeEntries(tid); toRename->iewInfo[tid].usedLSQ = true; - toRename->iewInfo[tid].freeLSQEntries = - ldstQueue.numFreeEntries(tid); + + toRename->iewInfo[tid].freeLQEntries = + ldstQueue.numFreeLoadEntries(tid); + toRename->iewInfo[tid].freeSQEntries = + ldstQueue.numFreeStoreEntries(tid); wroteToTimeBuffer = true; } @@ -1624,9 +1636,9 @@ DefaultIEW::tick() } DPRINTF(IEW, "IQ has %i free entries (Can schedule: %i). " - "LSQ has %i free entries.\n", + "LQ has %i free entries. SQ has %i free entries.\n", instQueue.numFreeEntries(), instQueue.hasReadyInsts(), - ldstQueue.numFreeEntries()); + ldstQueue.numFreeLoadEntries(), ldstQueue.numFreeStoreEntries()); updateStatus(); diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh index 36ad75aed..e0ed05d7e 100644 --- a/src/cpu/o3/lsq.hh +++ b/src/cpu/o3/lsq.hh @@ -1,5 +1,6 @@ /* * Copyright (c) 2011-2012 ARM Limited + * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved * * The license below extends only to copyright in the software and shall @@ -204,11 +205,21 @@ class LSQ { int numStores(ThreadID tid) { return thread[tid].numStores(); } - /** Returns the number of free entries. */ - unsigned numFreeEntries(); + /** Returns the number of free load entries. */ + unsigned numFreeLoadEntries(); + + /** Returns the number of free store entries. */ + unsigned numFreeStoreEntries(); + /** Returns the number of free entries for a specific thread. */ unsigned numFreeEntries(ThreadID tid); + /** Returns the number of free entries in the LQ for a specific thread. */ + unsigned numFreeLoadEntries(ThreadID tid); + + /** Returns the number of free entries in the SQ for a specific thread. */ + unsigned numFreeStoreEntries(ThreadID tid); + /** Returns if the LSQ is full (either LQ or SQ is full). */ bool isFull(); /** diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh index 7e89968a8..6e605b6a0 100644 --- a/src/cpu/o3/lsq_impl.hh +++ b/src/cpu/o3/lsq_impl.hh @@ -1,5 +1,6 @@ /* * Copyright (c) 2011-2012 ARM Limited + * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved * * The license below extends only to copyright in the software and shall @@ -433,7 +434,7 @@ LSQ::numStores() template unsigned -LSQ::numFreeEntries() +LSQ::numFreeLoadEntries() { unsigned total = 0; @@ -443,7 +444,7 @@ LSQ::numFreeEntries() while (threads != end) { ThreadID tid = *threads++; - total += thread[tid].numFreeEntries(); + total += thread[tid].numFreeLoadEntries(); } return total; @@ -451,12 +452,34 @@ LSQ::numFreeEntries() template unsigned -LSQ::numFreeEntries(ThreadID tid) +LSQ::numFreeStoreEntries() { - //if (lsqPolicy == Dynamic) - //return numFreeEntries(); - //else - return thread[tid].numFreeEntries(); + unsigned total = 0; + + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); + + while (threads != end) { + ThreadID tid = *threads++; + + total += thread[tid].numFreeStoreEntries(); + } + + return total; +} + +template +unsigned +LSQ::numFreeLoadEntries(ThreadID tid) +{ + return thread[tid].numFreeLoadEntries(); +} + +template +unsigned +LSQ::numFreeStoreEntries(ThreadID tid) +{ + return thread[tid].numFreeStoreEntries(); } template diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh index 0978e721b..00469197d 100644 --- a/src/cpu/o3/lsq_unit.hh +++ b/src/cpu/o3/lsq_unit.hh @@ -12,6 +12,7 @@ * modified or unmodified, in source code or in binary form. * * Copyright (c) 2004-2006 The Regents of The University of Michigan + * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -198,8 +199,11 @@ class LSQUnit { void setLoadBlockedHandled() { loadBlockedHandled = true; } - /** Returns the number of free entries (min of free LQ and SQ entries). */ - unsigned numFreeEntries(); + /** Returns the number of free LQ entries. */ + unsigned numFreeLoadEntries(); + + /** Returns the number of free SQ entries. */ + unsigned numFreeStoreEntries(); /** Returns the number of loads in the LQ. */ int numLoads() { return loads; } diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index 547800b4c..e6bb560af 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -1,6 +1,7 @@ /* * Copyright (c) 2010-2013 ARM Limited + * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved * * The license below extends only to copyright in the software and shall @@ -414,20 +415,25 @@ LSQUnit::getMemDepViolator() template unsigned -LSQUnit::numFreeEntries() +LSQUnit::numFreeLoadEntries() { - unsigned free_lq_entries = LQEntries - loads; - unsigned free_sq_entries = SQEntries - stores; - - // Both the LQ and SQ entries have an extra dummy entry to differentiate - // empty/full conditions. Subtract 1 from the free entries. - if (free_lq_entries < free_sq_entries) { - return free_lq_entries - 1; - } else { - return free_sq_entries - 1; - } + //LQ has an extra dummy entry to differentiate + //empty/full conditions. Subtract 1 from the free entries. + DPRINTF(LSQUnit, "LQ size: %d, #loads occupied: %d\n", LQEntries, loads); + return LQEntries - loads - 1; } +template +unsigned +LSQUnit::numFreeStoreEntries() +{ + //SQ has an extra dummy entry to differentiate + //empty/full conditions. Subtract 1 from the free entries. + DPRINTF(LSQUnit, "SQ size: %d, #stores occupied: %d\n", SQEntries, stores); + return SQEntries - stores - 1; + + } + template void LSQUnit::checkSnoop(PacketPtr pkt) diff --git a/src/cpu/o3/rename.hh b/src/cpu/o3/rename.hh index 606c3365e..89bf0608f 100644 --- a/src/cpu/o3/rename.hh +++ b/src/cpu/o3/rename.hh @@ -12,6 +12,7 @@ * modified or unmodified, in source code or in binary form. * * Copyright (c) 2004-2006 The Regents of The University of Michigan + * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -251,8 +252,11 @@ class DefaultRename /** Calculates the number of free IQ entries for a specific thread. */ inline int calcFreeIQEntries(ThreadID tid); - /** Calculates the number of free LSQ entries for a specific thread. */ - inline int calcFreeLSQEntries(ThreadID tid); + /** Calculates the number of free LQ entries for a specific thread. */ + inline int calcFreeLQEntries(ThreadID tid); + + /** Calculates the number of free SQ entries for a specific thread. */ + inline int calcFreeSQEntries(ThreadID tid); /** Returns the number of valid instructions coming from decode. */ unsigned validInsts(); @@ -355,6 +359,16 @@ class DefaultRename */ int instsInProgress[Impl::MaxThreads]; + /** Count of Load instructions in progress that have been sent off to the IQ + * and ROB, but are not yet included in their occupancy counts. + */ + int loadsInProgress[Impl::MaxThreads]; + + /** Count of Store instructions in progress that have been sent off to the IQ + * and ROB, but are not yet included in their occupancy counts. + */ + int storesInProgress[Impl::MaxThreads]; + /** Variable that tracks if decode has written to the time buffer this * cycle. Used to tell CPU if there is activity this cycle. */ @@ -365,8 +379,9 @@ class DefaultRename */ struct FreeEntries { unsigned iqEntries; - unsigned lsqEntries; unsigned robEntries; + unsigned lqEntries; + unsigned sqEntries; }; /** Per-thread tracking of the number of free entries of back-end @@ -444,7 +459,8 @@ class DefaultRename enum FullSource { ROB, IQ, - LSQ, + LQ, + SQ, NONE }; @@ -473,8 +489,10 @@ class DefaultRename Stats::Scalar renameROBFullEvents; /** Stat for total number of times that the IQ starts a stall in rename. */ Stats::Scalar renameIQFullEvents; - /** Stat for total number of times that the LSQ starts a stall in rename. */ - Stats::Scalar renameLSQFullEvents; + /** Stat for total number of times that the LQ starts a stall in rename. */ + Stats::Scalar renameLQFullEvents; + /** Stat for total number of times that the SQ starts a stall in rename. */ + Stats::Scalar renameSQFullEvents; /** Stat for total number of times that rename runs out of free registers * to use to rename. */ Stats::Scalar renameFullRegistersEvents; diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh index dcf1d4c66..49abb0055 100644 --- a/src/cpu/o3/rename_impl.hh +++ b/src/cpu/o3/rename_impl.hh @@ -131,10 +131,14 @@ DefaultRename::regStats() .name(name() + ".IQFullEvents") .desc("Number of times rename has blocked due to IQ full") .prereq(renameIQFullEvents); - renameLSQFullEvents - .name(name() + ".LSQFullEvents") - .desc("Number of times rename has blocked due to LSQ full") - .prereq(renameLSQFullEvents); + renameLQFullEvents + .name(name() + ".LQFullEvents") + .desc("Number of times rename has blocked due to LQ full") + .prereq(renameLQFullEvents); + renameSQFullEvents + .name(name() + ".SQFullEvents") + .desc("Number of times rename has blocked due to SQ full") + .prereq(renameSQFullEvents); renameFullRegistersEvents .name(name() + ".FullRegisterEvents") .desc("Number of times there has been no free registers") @@ -237,7 +241,8 @@ DefaultRename::resetStage() renameStatus[tid] = Idle; freeEntries[tid].iqEntries = iew_ptr->instQueue.numFreeEntries(tid); - freeEntries[tid].lsqEntries = iew_ptr->ldstQueue.numFreeEntries(tid); + freeEntries[tid].lqEntries = iew_ptr->ldstQueue.numFreeLoadEntries(tid); + freeEntries[tid].sqEntries = iew_ptr->ldstQueue.numFreeStoreEntries(tid); freeEntries[tid].robEntries = commit_ptr->numROBFreeEntries(tid); emptyROB[tid] = true; @@ -246,6 +251,8 @@ DefaultRename::resetStage() serializeInst[tid] = NULL; instsInProgress[tid] = 0; + loadsInProgress[tid] = 0; + storesInProgress[tid] = 0; serializeOnNextInst[tid] = false; } @@ -420,7 +427,10 @@ DefaultRename::tick() // @todo: make into updateProgress function for (ThreadID tid = 0; tid < numThreads; tid++) { instsInProgress[tid] -= fromIEW->iewInfo[tid].dispatched; - + loadsInProgress[tid] -= fromIEW->iewInfo[tid].dispatchedToLQ; + storesInProgress[tid] -= fromIEW->iewInfo[tid].dispatchedToSQ; + assert(loadsInProgress[tid] >= 0); + assert(storesInProgress[tid] >= 0); assert(instsInProgress[tid] >=0); } @@ -509,7 +519,6 @@ DefaultRename::renameInsts(ThreadID tid) // entries. int free_rob_entries = calcFreeROBEntries(tid); int free_iq_entries = calcFreeIQEntries(tid); - int free_lsq_entries = calcFreeLSQEntries(tid); int min_free_entries = free_rob_entries; FullSource source = ROB; @@ -519,22 +528,15 @@ DefaultRename::renameInsts(ThreadID tid) source = IQ; } - if (free_lsq_entries < min_free_entries) { - min_free_entries = free_lsq_entries; - source = LSQ; - } - // Check if there's any space left. if (min_free_entries <= 0) { - DPRINTF(Rename, "[tid:%u]: Blocking due to no free ROB/IQ/LSQ " + DPRINTF(Rename, "[tid:%u]: Blocking due to no free ROB/IQ/ " "entries.\n" "ROB has %i free entries.\n" - "IQ has %i free entries.\n" - "LSQ has %i free entries.\n", + "IQ has %i free entries.\n", tid, free_rob_entries, - free_iq_entries, - free_lsq_entries); + free_iq_entries); blockThisCycle = true; @@ -585,6 +587,28 @@ DefaultRename::renameInsts(ThreadID tid) inst = insts_to_rename.front(); + //For all kind of instructions, check ROB and IQ first + //For load instruction, check LQ size and take into account the inflight loads + //For store instruction, check SQ size and take into account the inflight stores + + if (inst->isLoad()) { + if(calcFreeLQEntries(tid) <= 0) { + DPRINTF(Rename, "[tid:%u]: Cannot rename due to no free LQ\n"); + source = LQ; + incrFullStat(source); + break; + } + } + + if (inst->isStore()) { + if(calcFreeSQEntries(tid) <= 0) { + DPRINTF(Rename, "[tid:%u]: Cannot rename due to no free SQ\n"); + source = SQ; + incrFullStat(source); + break; + } + } + insts_to_rename.pop_front(); if (renameStatus[tid] == Unblocking) { @@ -665,6 +689,12 @@ DefaultRename::renameInsts(ThreadID tid) renameDestRegs(inst, inst->threadNumber); + if (inst->isLoad()) { + loadsInProgress[tid]++; + } + if (inst->isStore()) { + storesInProgress[tid]++; + } ++renamed_insts; @@ -1122,14 +1152,26 @@ DefaultRename::calcFreeIQEntries(ThreadID tid) template inline int -DefaultRename::calcFreeLSQEntries(ThreadID tid) +DefaultRename::calcFreeLQEntries(ThreadID tid) { - int num_free = freeEntries[tid].lsqEntries - - (instsInProgress[tid] - fromIEW->iewInfo[tid].dispatchedToLSQ); - - //DPRINTF(Rename,"[tid:%i]: %i lsq free\n",tid,num_free); + int num_free = freeEntries[tid].lqEntries - + (loadsInProgress[tid] - fromIEW->iewInfo[tid].dispatchedToLQ); + DPRINTF(Rename, "calcFreeLQEntries: free lqEntries: %d, loadsInProgress: %d, " + "loads dispatchedToLQ: %d\n", freeEntries[tid].lqEntries, + loadsInProgress[tid], fromIEW->iewInfo[tid].dispatchedToLQ); + return num_free; +} - return num_free; +template +inline int +DefaultRename::calcFreeSQEntries(ThreadID tid) +{ + int num_free = freeEntries[tid].sqEntries - + (storesInProgress[tid] - fromIEW->iewInfo[tid].dispatchedToSQ); + DPRINTF(Rename, "calcFreeSQEntries: free sqEntries: %d, storesInProgress: %d, " + "stores dispatchedToSQ: %d\n", freeEntries[tid].sqEntries, + storesInProgress[tid], fromIEW->iewInfo[tid].dispatchedToSQ); + return num_free; } template @@ -1187,7 +1229,7 @@ DefaultRename::checkStall(ThreadID tid) } else if (calcFreeIQEntries(tid) <= 0) { DPRINTF(Rename,"[tid:%i]: Stall: IQ has 0 free entries.\n", tid); ret_val = true; - } else if (calcFreeLSQEntries(tid) <= 0) { + } else if (calcFreeLQEntries(tid) <= 0 && calcFreeSQEntries(tid) <= 0) { DPRINTF(Rename,"[tid:%i]: Stall: LSQ has 0 free entries.\n", tid); ret_val = true; } else if (renameMap[tid]->numFreeEntries() <= 0) { @@ -1211,8 +1253,10 @@ DefaultRename::readFreeEntries(ThreadID tid) if (fromIEW->iewInfo[tid].usedIQ) freeEntries[tid].iqEntries = fromIEW->iewInfo[tid].freeIQEntries; - if (fromIEW->iewInfo[tid].usedLSQ) - freeEntries[tid].lsqEntries = fromIEW->iewInfo[tid].freeLSQEntries; + if (fromIEW->iewInfo[tid].usedLSQ) { + freeEntries[tid].lqEntries = fromIEW->iewInfo[tid].freeLQEntries; + freeEntries[tid].sqEntries = fromIEW->iewInfo[tid].freeSQEntries; + } if (fromCommit->commitInfo[tid].usedROB) { freeEntries[tid].robEntries = @@ -1220,11 +1264,13 @@ DefaultRename::readFreeEntries(ThreadID tid) emptyROB[tid] = fromCommit->commitInfo[tid].emptyROB; } - DPRINTF(Rename, "[tid:%i]: Free IQ: %i, Free ROB: %i, Free LSQ: %i\n", + DPRINTF(Rename, "[tid:%i]: Free IQ: %i, Free ROB: %i, " + "Free LQ: %i, Free SQ: %i\n", tid, freeEntries[tid].iqEntries, freeEntries[tid].robEntries, - freeEntries[tid].lsqEntries); + freeEntries[tid].lqEntries, + freeEntries[tid].sqEntries); DPRINTF(Rename, "[tid:%i]: %i instructions not yet in ROB\n", tid, instsInProgress[tid]); @@ -1363,8 +1409,11 @@ DefaultRename::incrFullStat(const FullSource &source) case IQ: ++renameIQFullEvents; break; - case LSQ: - ++renameLSQFullEvents; + case LQ: + ++renameLQFullEvents; + break; + case SQ: + ++renameSQFullEvents; break; default: panic("Rename full stall stat should be incremented for a reason!"); -- 2.30.2