' m5_switchcpu pseudo-ops will toggle back and forth')
parser.add_option("--num-hw-queues", type="int", default=10,
help="number of hw queues in packet processor")
+parser.add_option("--reg-alloc-policy",type="string", default="simple",
+ help="register allocation policy (simple/dynamic)")
Ruby.define_options(parser)
for k in range(shader.n_wf):
wavefronts.append(Wavefront(simdId = j, wf_slot_id = k,
wf_size = options.wf_size))
- vrf_pool_mgrs.append(SimplePoolManager(pool_size = \
+
+ if options.reg_alloc_policy == "simple":
+ vrf_pool_mgrs.append(SimplePoolManager(pool_size = \
options.vreg_file_size,
min_alloc = \
options.vreg_min_alloc))
-
- vrfs.append(VectorRegisterFile(simd_id=j, wf_size=options.wf_size,
- num_regs=options.vreg_file_size))
-
- srf_pool_mgrs.append(SimplePoolManager(pool_size = \
+ srf_pool_mgrs.append(SimplePoolManager(pool_size = \
+ options.sreg_file_size,
+ min_alloc = \
+ options.vreg_min_alloc))
+ elif options.reg_alloc_policy == "dynamic":
+ vrf_pool_mgrs.append(DynPoolManager(pool_size = \
+ options.vreg_file_size,
+ min_alloc = \
+ options.vreg_min_alloc))
+ srf_pool_mgrs.append(DynPoolManager(pool_size = \
options.sreg_file_size,
min_alloc = \
options.vreg_min_alloc))
+
+ vrfs.append(VectorRegisterFile(simd_id=j, wf_size=options.wf_size,
+ num_regs=options.vreg_file_size))
srfs.append(ScalarRegisterFile(simd_id=j, wf_size=options.wf_size,
num_regs=options.sreg_file_size))
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
-#
-# Authors: Steve Reinhardt
from m5.defines import buildEnv
from m5.params import *
cxx_class = 'SimplePoolManager'
cxx_header = "gpu-compute/simple_pool_manager.hh"
+## This is for allowing multiple workgroups on one CU
+class DynPoolManager(PoolManager):
+ type = 'DynPoolManager'
+ cxx_class = 'DynPoolManager'
+ cxx_header = "gpu-compute/dyn_pool_manager.hh"
+
class RegisterFile(SimObject):
type = 'RegisterFile'
cxx_class = 'RegisterFile'
Source('scheduler.cc')
Source('scoreboard_check_stage.cc')
Source('shader.cc')
+Source('dyn_pool_manager.cc')
Source('simple_pool_manager.cc')
Source('static_register_manager_policy.cc')
Source('tlb_coalescer.cc')
injectGlobalMemFence(gpuDynInst, true);
}
+// reseting SIMD register pools
+// I couldn't think of any other place and
+// I think it is needed in my implementation
+void
+ComputeUnit::resetRegisterPool()
+{
+ for (int i=0; i<numVectorALUs; i++)
+ {
+ registerManager->vrfPoolMgrs[i]->resetRegion(numVecRegsPerSimd);
+ registerManager->srfPoolMgrs[i]->resetRegion(numScalarRegsPerSimd);
+ }
+}
+
void
ComputeUnit::dispWorkgroup(HSAQueueEntry *task, int num_wfs_in_wg)
{
int cacheLineSize() const { return _cacheLineSize; }
int getCacheLineBits() const { return cacheLineBits; }
+ void resetRegisterPool();
+
private:
WFBarrier&
barrierSlot(int bar_id)
--- /dev/null
+/*
+ * Copyright (c) 2020 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include "base/logging.hh"
+#include "base/trace.hh"
+#include "debug/GPUVRF.hh"
+#include "gpu-compute/dyn_pool_manager.hh"
+
+// return the min number of elements that the manager can reserve given
+// a request for "size" elements
+uint32_t
+DynPoolManager::minAllocatedElements(uint32_t size)
+{
+ fatal_if(size <= 0 || size > poolSize(), "Illegal VGPR region size=%d\n",
+ size);
+
+ return size % minAllocation() > 0 ?
+ (minAllocation() - (size % minAllocation())) + size : size;
+}
+
+std::string
+DynPoolManager::printRegion()
+{
+ std::string _cout;
+ uint32_t reservedEntries = 0;
+
+ /*
+ Iterate over all elements in freeSpaceRecord, checking first element
+ of each pair to see how much space in it has been allocated already.
+ This only counts the partially allocated regions. Thus, in addition,
+ count the elements in reservedSpaceRecord.
+ */
+ auto it_free = freeSpaceRecord.begin();
+ while (it_free != freeSpaceRecord.end()) {
+ reservedEntries += it_free->first;
+ ++it_free;
+ }
+ reservedEntries += (reservedSpaceRecord * totalRegSpace);
+
+ if (reservedEntries == 0)
+ _cout = "VRF is empty\n";
+ else {
+ _cout = "VRF reserves " + std::to_string(reservedEntries) + " VGPRs\n";
+ }
+ return _cout;
+}
+
+// reset freeSpace and reservedSpace
+void
+DynPoolManager::resetRegion(const int & regsPerSimd){
+ totalRegSpace = regsPerSimd;
+ reservedSpaceRecord = 0;
+ freeSpaceRecord.clear();
+
+ // reset available free space
+ _totRegSpaceAvailable = regsPerSimd;
+ freeSpaceRecord.push_back(std::make_pair(0,regsPerSimd));
+}
+
+bool
+DynPoolManager::canAllocate(uint32_t numRegions, uint32_t size)
+{
+ uint32_t actualSize = minAllocatedElements(size);
+ DPRINTF(GPUVRF,"Can Allocate %d\n",actualSize);
+ return (_totRegSpaceAvailable >= actualSize);
+}
+
+uint32_t
+DynPoolManager::allocateRegion(const uint32_t size,
+ uint32_t *reservedPoolSize)
+{
+ uint32_t startIdx = (unsigned)-1;
+ uint32_t actualSize = minAllocatedElements(size);
+ auto it = freeSpaceRecord.begin();
+ while (it != freeSpaceRecord.end()) {
+ if (it->second >= actualSize) {
+ // assign the next block starting from here
+ startIdx = it->first;
+ _regionSize = actualSize;
+ *reservedPoolSize = actualSize;
+ _totRegSpaceAvailable -= actualSize;
+
+ // This case sees if this chunk size is exactly equal to
+ // the size of the requested chunk. If yes, then this can't
+ // contribute to future requests and hence, should be removed
+ if (it->second == actualSize) {
+ it = freeSpaceRecord.erase(it);
+ // once entire freeSpaceRecord allocated, increment
+ // reservedSpaceRecord count
+ ++reservedSpaceRecord;
+ } else {
+ it->first += actualSize;
+ it->second -= actualSize;
+ }
+ break;
+ }
+ it++;
+ }
+ DPRINTF(GPUVRF,"totRegSpace %d allocating Register at %d and"
+ " size %d\n",_totRegSpaceAvailable,startIdx,actualSize);
+ return startIdx;
+}
+
+void
+DynPoolManager::freeRegion(uint32_t firstIdx,
+ uint32_t lastIdx)
+{
+ // lastIdx-firstIdx should give the size of free space
+ DPRINTF(GPUVRF,"freeing Region at %d %d, size %d\n",
+ firstIdx,lastIdx,lastIdx-firstIdx);
+
+ // Current dynamic register allocation does not handle wraparound
+ assert(firstIdx < lastIdx);
+ _totRegSpaceAvailable += lastIdx-firstIdx;
+ freeSpaceRecord.push_back(std::make_pair(firstIdx,lastIdx-firstIdx));
+ // remove corresponding entry from reservedSpaceRecord too
+ --reservedSpaceRecord;
+}
+
+uint32_t
+DynPoolManager::regionSize(std::pair<uint32_t, uint32_t> ®ion)
+{
+ bool wrapAround = (region.first > region.second);
+ if (!wrapAround) {
+ return region.second - region.first + 1;
+ } else {
+ return region.second + poolSize() - region.first + 1;
+ }
+}
--- /dev/null
+/*
+ * Copyright (c) 2020 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * For use for simulation and test purposes only
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef __DYN_POOL_MANAGER_HH__
+#define __DYN_POOL_MANAGER_HH__
+
+#include <cassert>
+#include <cstdint>
+
+#include "gpu-compute/pool_manager.hh"
+#include "params/DynPoolManager.hh"
+
+// Dynamic Pool Manager: allows multiple WGs on the same pool
+class DynPoolManager : public PoolManager
+{
+ public:
+ DynPoolManager(const PoolManagerParams &p)
+ : PoolManager(p), _regionSize(0), _nxtFreeIdx(0)
+ {
+ _totRegSpaceAvailable = p.pool_size;
+ }
+
+ uint32_t allocateRegion(const uint32_t size, uint32_t *reservedPoolSize) override;
+ bool canAllocate(uint32_t numRegions, uint32_t size) override;
+ void freeRegion(uint32_t firstIdx, uint32_t lastIdx) override;
+ uint32_t minAllocatedElements(uint32_t size);
+ std::string printRegion() override;
+ uint32_t regionSize(std::pair<uint32_t,uint32_t> ®ion) override;
+ void resetRegion(const int & regsPerSimd) override;
+
+ private:
+ // actual size of a region (normalized to the minimum size that can
+ // be reserved)
+ uint32_t _regionSize;
+ // next index to allocate a region
+ int _nxtFreeIdx;
+ // total registers available - across chunks
+ uint32_t _totRegSpaceAvailable;
+ // regIndex and freeSpace record
+ std::list<std::pair<int,int>> freeSpaceRecord;
+ int reservedSpaceRecord;
+ // total registers to be allocated -- treat as a const
+ int totalRegSpace;
+};
+
+#endif // __DYN_POOL_MANAGER_HH__
virtual void freeRegion(uint32_t firstIdx, uint32_t lastIdx) = 0;
uint32_t poolSize() { return _poolSize; }
+ // I don't think with the current API it is possible to do what
+ // we intend to - reset the entire register pool.
+ // Because we need to reset the register pool when all WGs on
+ // the Compute Unit are finished - before launching WGs from
+ // another kernel.
+ // TsungTai Yeh added a virtual method do the very same - at a diff
+ // place though.
+ virtual void resetRegion(const int & regsPerSimd) {}; // do nothing
private:
// minimum size that can be reserved per allocation
_dispatcher.updateInvCounter(kernId, +1);
// all necessary INV flags are all set now, call cu to execute
cuList[i_cu]->doInvalidate(req, task->dispatchId());
+
+ // I don't like this. This is intrusive coding.
+ cuList[i_cu]->resetRegisterPool();
}
}
w->simdId,
w->computeUnit->scalarRegsReserved[w->simdId]);
- int endIndex = (w->startVgprIndex + w->reservedVectorRegs - 1) %
- w->computeUnit->vrf[w->simdId]->numRegs();
+ // Current dynamic register allocation does not handle wraparound
+ int endIndex = w->startVgprIndex + w->reservedVectorRegs;
w->computeUnit->registerManager->vrfPoolMgrs[w->simdId]->
freeRegion(w->startVgprIndex, endIndex);
- // mark/pre-mark all registers as not busy
+ // mark/pre-mark all registers are not busy
for (int i = 0; i < w->reservedVectorRegs; i++) {
uint32_t physVgprIdx = mapVgpr(w, i);
w->computeUnit->vrf[w->simdId]->markReg(physVgprIdx, false);
w->reservedVectorRegs = 0;
w->startVgprIndex = 0;
- endIndex = (w->startSgprIndex + w->reservedScalarRegs - 1) %
- w->computeUnit->srf[w->simdId]->numRegs();
+ endIndex = w->startSgprIndex + w->reservedScalarRegs;
w->computeUnit->registerManager->srfPoolMgrs[w->simdId]->
freeRegion(w->startSgprIndex, endIndex);
- // mark/pre-mark all registers as not busy
+ // mark/pre-mark all registers are not busy
for (int i = 0; i < w->reservedScalarRegs; i++) {
uint32_t physSgprIdx = mapSgpr(w, i);
w->computeUnit->srf[w->simdId]->markReg(physSgprIdx, false);