While FastAlloc provides a small performance increase (~1.5%) over regular malloc it isn't thread safe.
After removing FastAlloc and using tcmalloc I've seen a performance increase of 12% over libc malloc
when running twolf for ARM.
ListVariable('CPU_MODELS', 'CPU models',
sorted(n for n,m in CpuModel.dict.iteritems() if m.default),
sorted(CpuModel.list)),
- BoolVariable('NO_FAST_ALLOC', 'Disable fast object allocator', False),
- BoolVariable('FORCE_FAST_ALLOC',
- 'Enable fast object allocator, even for gem5.debug', False),
- BoolVariable('FAST_ALLOC_STATS', 'Enable fast object allocator statistics',
- False),
BoolVariable('EFENCE', 'Link with Electric Fence malloc debugger',
False),
BoolVariable('SS_COMPATIBLE_FP',
)
# These variables get exported to #defines in config/*.hh (see src/SConscript).
-export_vars += ['USE_FENV', 'NO_FAST_ALLOC', 'FORCE_FAST_ALLOC',
- 'FAST_ALLOC_STATS', 'SS_COMPATIBLE_FP',
+export_vars += ['USE_FENV', 'SS_COMPATIBLE_FP',
'TARGET_ISA', 'CP_ANNOTATE', 'USE_POSIX_CLOCK' ]
###################################################
#include "arch/x86/pagetable.hh"
#include "arch/x86/tlb.hh"
-#include "base/fast_alloc.hh"
#include "base/types.hh"
#include "mem/mem_object.hh"
#include "mem/packet.hh"
WalkerPort port;
// State to track each walk of the page table
- class WalkerState : public FastAlloc
+ class WalkerState
{
private:
enum State {
Source('circlebuf.cc')
Source('cprintf.cc')
Source('debug.cc')
-Source('fast_alloc.cc')
if env['USE_FENV']:
Source('fenv.c')
Source('hostinfo.cc')
+++ /dev/null
-/*
- * Copyright (c) 2000-2005 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Authors: Steve Reinhardt
- */
-
-/*
- * This code was originally written by Steve Reinhardt as part of
- * the Wisconsin Wind Tunnel simulator. Relicensed as part of M5
- * by permission.
- */
-
-#include <cassert>
-
-#include "base/fast_alloc.hh"
-
-#if USE_FAST_ALLOC
-
-void *FastAlloc::freeLists[Num_Buckets];
-
-#if FAST_ALLOC_STATS
-unsigned FastAlloc::newCount[Num_Buckets];
-unsigned FastAlloc::deleteCount[Num_Buckets];
-unsigned FastAlloc::allocCount[Num_Buckets];
-#endif
-
-void *
-FastAlloc::moreStructs(int bucket)
-{
- assert(bucket > 0 && bucket < Num_Buckets);
-
- int sz = bucket * Alloc_Quantum;
- const int nstructs = Num_Structs_Per_New; // how many to allocate?
- char *p = ::new char[nstructs * sz];
-
-#if FAST_ALLOC_STATS
- ++allocCount[bucket];
-#endif
-
- freeLists[bucket] = p;
- for (int i = 0; i < (nstructs-2); ++i, p += sz)
- *(void **)p = p + sz;
- *(void **)p = 0;
-
- return (p + sz);
-}
-
-#endif // USE_FAST_ALLOC
+++ /dev/null
-/*
- * Copyright (c) 2000-2001, 2003-2005 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Authors: Steve Reinhardt
- */
-
-/*
- * This code was originally written by Steve Reinhardt as part of
- * the Wisconsin Wind Tunnel simulator. Relicensed as part of M5
- * by permission.
- */
-
-#ifndef __BASE_FAST_ALLOC_HH__
-#define __BASE_FAST_ALLOC_HH__
-
-#include <cstddef>
-
-// Fast structure allocator. Designed for small objects that are
-// frequently allocated and deallocated. This code is derived from the
-// 'alloc_struct' package used in WWT and Blizzard. C++ provides a
-// much nicer framework for the same optimization. The package is
-// implemented as a class, FastAlloc. Allocation and deletion are
-// performed using FastAlloc's new and delete operators. Any object
-// that derives from the FastAlloc class will transparently use this
-// allocation package.
-
-// The static allocate() and deallocate() methods can also be called
-// directly if desired.
-
-// In order for derived classes to call delete with the correct
-// structure size even when they are deallocated via a base-type
-// pointer, they must have a virtual destructor. It is sufficient for
-// FastAlloc to declare a virtual destructor (as it does); it is not
-// required for derived classes to declare their own destructor. The
-// compiler will automatically generate a virtual destructor for each
-// derived class. However, it is more efficient if each derived class
-// defines an inline destructor, so that the compiler can statically
-// collapse the destructor call chain back up the inheritance
-// hierarchy.
-
-#include "config/fast_alloc_stats.hh"
-#include "config/force_fast_alloc.hh"
-#include "config/no_fast_alloc.hh"
-
-// By default, we want to enable FastAlloc in any build other than
-// m5.debug. (FastAlloc's reuse policies can mask allocation bugs, so
-// we typically want it disabled when debugging.) Set
-// FORCE_FAST_ALLOC to enable even when debugging, and set
-// NO_FAST_ALLOC to disable even in non-debug builds.
-#define USE_FAST_ALLOC \
- (FORCE_FAST_ALLOC || (!defined(DEBUG) && !NO_FAST_ALLOC))
-
-#if !USE_FAST_ALLOC
-
-class FastAlloc
-{
-};
-
-#else
-
-class FastAlloc
-{
- public:
- static void *allocate(size_t);
- static void deallocate(void *, size_t);
-
- void *operator new(size_t);
- void operator delete(void *, size_t);
-
- virtual ~FastAlloc() {}
-
- private:
-
- // Max_Alloc_Size is the largest object that can be allocated with
- // this class. There's no fundamental limit, but this limits the
- // size of the freeLists array. Let's not make this really huge
- // like in Blizzard.
- static const size_t Max_Alloc_Size = 512;
-
- // Alloc_Quantum is the difference in size between adjacent
- // buckets in the free list array.
- static const int Log2_Alloc_Quantum = 3;
- static const int Alloc_Quantum = (1 << Log2_Alloc_Quantum);
-
- // Num_Buckets = bucketFor(Max_Alloc_Size) + 1
- static const int Num_Buckets =
- ((Max_Alloc_Size + Alloc_Quantum - 1) >> Log2_Alloc_Quantum) + 1;
-
- // when we call new() for more structures, how many should we get?
- static const int Num_Structs_Per_New = 20;
-
- static int bucketFor(size_t);
- static void *moreStructs(int bucket);
-
- static void *freeLists[Num_Buckets];
-
-#if FAST_ALLOC_STATS
- static unsigned newCount[Num_Buckets];
- static unsigned deleteCount[Num_Buckets];
- static unsigned allocCount[Num_Buckets];
-#endif
-};
-
-inline int
-FastAlloc::bucketFor(size_t sz)
-{
- return (sz + Alloc_Quantum - 1) >> Log2_Alloc_Quantum;
-}
-
-inline void *
-FastAlloc::allocate(size_t sz)
-{
- int b;
- void *p;
-
- if (sz > Max_Alloc_Size)
- return (void *)::new char[sz];
-
- b = bucketFor(sz);
- p = freeLists[b];
-
- if (p)
- freeLists[b] = *(void **)p;
- else
- p = moreStructs(b);
-
-#if FAST_ALLOC_STATS
- ++newCount[b];
-#endif
-
- return p;
-}
-
-inline void
-FastAlloc::deallocate(void *p, size_t sz)
-{
- int b;
-
- if (sz > Max_Alloc_Size) {
- ::delete [] (char *)p;
- return;
- }
-
- b = bucketFor(sz);
- *(void **)p = freeLists[b];
- freeLists[b] = p;
-#if FAST_ALLOC_STATS
- ++deleteCount[b];
-#endif
-}
-
-inline void *
-FastAlloc::operator new(size_t sz)
-{
- return allocate(sz);
-}
-
-inline void
-FastAlloc::operator delete(void *p, size_t sz)
-{
- deallocate(p, sz);
-}
-
-#endif // USE_FAST_ALLOC
-
-#endif // __BASE_FAST_ALLOC_HH__
#include <queue>
#include "arch/utility.hh"
-#include "base/fast_alloc.hh"
#include "base/trace.hh"
#include "config/the_isa.hh"
#include "cpu/checker/cpu.hh"
*/
template <class Impl>
-class BaseDynInst : public FastAlloc, public RefCounted
+class BaseDynInst : public RefCounted
{
public:
// Typedef for the CPU.
#include "arch/mt.hh"
#include "arch/types.hh"
#include "arch/utility.hh"
-#include "base/fast_alloc.hh"
#include "base/trace.hh"
#include "base/types.hh"
#include "config/the_isa.hh"
class ResourceRequest;
class Packet;
-class InOrderDynInst : public FastAlloc, public RefCounted
+class InOrderDynInst : public RefCounted
{
public:
// Binary machine instruction type.
#include "arch/isa_traits.hh"
#include "arch/locked_mem.hh"
#include "arch/mmapped_ipr.hh"
-#include "base/fast_alloc.hh"
#include "base/hashmap.hh"
#include "config/the_isa.hh"
#include "cpu/inst_seq.hh"
MasterPort *dcachePort;
/** Derived class to hold any sender state the LSQ needs. */
- class LSQSenderState : public Packet::SenderState, public FastAlloc
+ class LSQSenderState : public Packet::SenderState
{
public:
/** Default constructor. */
#include <queue>
#include "arch/types.hh"
-#include "base/fast_alloc.hh"
#include "base/hashmap.hh"
#include "config/the_isa.hh"
#include "cpu/inst_seq.hh"
};
/** Derived class to hold any sender state the LSQ needs. */
- class LSQSenderState : public Packet::SenderState, public FastAlloc
+ class LSQSenderState : public Packet::SenderState
{
public:
/** Default constructor. */
#include <set>
-#include "base/fast_alloc.hh"
#include "base/statistics.hh"
#include "mem/mem_object.hh"
#include "mem/port.hh"
CpuPort funcPort;
PortProxy funcProxy;
- class MemTestSenderState : public Packet::SenderState, public FastAlloc
+ class MemTestSenderState : public Packet::SenderState
{
public:
/** Constructor. */
#include <set>
-#include "base/fast_alloc.hh"
#include "base/statistics.hh"
#include "mem/mem_object.hh"
#include "mem/port.hh"
CpuPort cachePort;
- class NetworkTestSenderState : public Packet::SenderState, public FastAlloc
+ class NetworkTestSenderState : public Packet::SenderState
{
public:
/** Constructor. */
class DmaPort : public MasterPort
{
protected:
- struct DmaReqState : public Packet::SenderState, public FastAlloc
+ struct DmaReqState : public Packet::SenderState
{
/** Event to call on the device when this transaction (all packets)
* complete. */
#include <queue>
#include <string>
-#include "base/fast_alloc.hh"
#include "base/types.hh"
#include "mem/mem_object.hh"
#include "mem/packet.hh"
* state and original source. It has enough information to also
* restore the response once it comes back to the bridge.
*/
- class RequestState : public Packet::SenderState, public FastAlloc
+ class RequestState : public Packet::SenderState
{
public:
* Cache definitions.
*/
-#include "base/fast_alloc.hh"
#include "base/misc.hh"
#include "base/range.hh"
#include "base/types.hh"
}
-class ForwardResponseRecord : public Packet::SenderState, public FastAlloc
+class ForwardResponseRecord : public Packet::SenderState
{
Packet::SenderState *prevSenderState;
PortID prevSrc;
#include "base/cast.hh"
#include "base/compiler.hh"
-#include "base/fast_alloc.hh"
#include "base/flags.hh"
#include "base/misc.hh"
#include "base/printable.hh"
* ultimate destination and back, possibly being conveyed by several
* different Packets along the way.)
*/
-class Packet : public FastAlloc, public Printable
+class Packet : public Printable
{
public:
typedef uint32_t FlagsType;
* Object used to maintain state of a PrintReq. The senderState
* field of a PrintReq should always be of this type.
*/
- class PrintReqState : public SenderState, public FastAlloc
+ class PrintReqState : public SenderState
{
private:
/**
#include <cassert>
#include <climits>
-#include "base/fast_alloc.hh"
#include "base/flags.hh"
#include "base/misc.hh"
#include "base/types.hh"
typedef Request* RequestPtr;
typedef uint16_t MasterID;
-class Request : public FastAlloc
+class Request
{
public:
typedef uint32_t FlagsType;
setThreadContext(cid, tid);
}
- ~Request() {} // for FastAlloc
+ ~Request() {}
/**
* Set up CPU and thread numbers.
%include <std_string.i>
%include <stdint.i>
-%import "base/fast_alloc.hh"
%import "sim/serialize.hh"
%include "base/types.hh"
#include <iosfwd>
#include <string>
-#include "base/fast_alloc.hh"
#include "base/flags.hh"
#include "base/misc.hh"
#include "base/trace.hh"
*
* Caution, the order of members is chosen to maximize data packing.
*/
-class Event : public Serializable, public FastAlloc
+class Event : public Serializable
{
friend class EventQueue;