mem-cache: Use the compression factor to co-allocate
[gem5.git] / src / mem / stack_dist_calc.hh
1 /*
2 * Copyright (c) 2014-2015 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 #ifndef __MEM_STACK_DIST_CALC_HH__
39 #define __MEM_STACK_DIST_CALC_HH__
40
41 #include <limits>
42 #include <map>
43 #include <vector>
44
45 #include "base/types.hh"
46
47 /**
48 * The stack distance calculator is a passive object that merely
49 * observes the addresses pass to it. It calculates stack distances
50 * of incoming addresses based on the partial sum hierarchy tree
51 * algorithm described by Alamasi et al.
52 * http://doi.acm.org/10.1145/773039.773043.
53 *
54 * A tree structure is maintained and updated at each transaction
55 * (unique or non-unique). The tree is implemented as an STL vector
56 * with layers of the form <map> Each layer in this tree is an
57 * ordered map <uint64_t, Node*>. Nodes are structs which take form
58 * of leaf, intermediate and root nodes. For example, in a tree with 3
59 * layers, tree[0][5] gives a leaf node pointer for key=5 tree[1][1]
60 * gives an intermediate node pointer for key=1 tree[2][0] gives the
61 * root node in the tree.
62 *
63 * At every transaction a hash-map (aiMap) is looked up to check if
64 * the address was already encountered before. Based on this lookup a
65 * transaction can be termed as unique or non-unique.
66 *
67 * In addition to the normal stack distance calculation, a feature to
68 * mark an old node in the tree is added. This is useful if it is
69 * required to see the reuse pattern. For example, BackInvalidates
70 * from a lower level (e.g. membus to L2), can be marked (isMarked
71 * flag of Node set to True). Then later if this same address is
72 * accessed (by L1), the value of the isMarked flag would be
73 * True. This would give some insight on how the BackInvalidates
74 * policy of the lower level affect the read/write accesses in an
75 * application.
76 *
77 * There are two functions provided to interface with the calculator:
78 * 1. pair<uint64_t, bool> calcStackDistAndUpdate(Addr r_address,
79 * bool addNewNode)
80 * At every unique transaction a new leaf node is added at tree[0](leaf layer)
81 * and linked to the layer above (if addNewNode is True). The sums of all
82 * the intermediate nodes is updated till the root. The stack-distance is
83 * returned as a Constant representing INFINITY.
84 *
85 * At every non-unique transaction the tree is traversed from the
86 * leaf at the returned index to the root, the old node is deleted
87 * from the tree, and the sums (to the right are collected) and
88 * decremented. The collected sum represets the stack distance of the
89 * found node. If this node was marked then a bool flag set to True
90 * is returned with the stack_distance. During this operation a node
91 * is discarded at the leaf layer always. Moreover during the
92 * traversal upwards using the updateSum() method, if an intermediate
93 * node is found with no children connected to it, then that is
94 * discarded too.
95 *
96 * The return value of this function is a pair representing the
97 * stack_distance and the value of the marked flag.
98 *
99 * 2. pair<uint64_t , bool> calcStackDist(Addr r_address, bool mark)
100 * This is a stripped down version of the above function which is used to
101 * just inspect the tree, and mark a leaf node (if mark flag is set). The
102 * functionality to add a new node is removed.
103 *
104 * At every unique transaction the stack-distance is returned as a constant
105 * representing INFINITY.
106 *
107 * At every non-unique transaction the tree is traversed from the
108 * leaf at the returned index to the root, and the sums (to the right)
109 * are collected. The collected sum represets the stack distance of
110 * the found node.
111 *
112 * This function does NOT Modify the stack. (No node is added or
113 * deleted). It is just used to mark a node already created and get
114 * its stack distance.
115 *
116 * The return value of this function is a pair representing the stack
117 * distance and the value of the marked flag.
118 *
119 * The table below depicts the usage of the Algorithm using the functions:
120 * pair<uint64_t Stack_dist, bool isMarked> calcStackDistAndUpdate
121 * (Addr r_address, bool addNewNode)
122 * pair<uint64_t Stack_dist, bool isMarked> calcStackDist
123 * (Addr r_address, bool mark)
124 *
125 * | Function | Arguments |Return Val |Use For|
126 * |calcStackDistAndUpdate|r_address, True|I/SD,False |A,GD,GM|
127 * |calcStackDistAndUpdate|r_address,False|SD,prevMark|D,GD,GM|
128 * |calcStackDist |r_address,False|SD,prevMark| GD,GM|
129 * |calcStackDist |r_address, True|SD,prevMark| GD,GM|
130 *
131 * (*A: Allocate an address in stack, if old entry present then it is deleted,
132 * *U: Delete old-address from stack, no new entry is added
133 * *GD: Get-Stack distance of an address,
134 * *GM: Get value of Mark flag, indicates if that address has been touched
135 * before,
136 * *I: stack-distance = infinity,
137 * *SD: Stack Distance
138 * *r_address: address to be added, *prevMark: value of isMarked flag
139 * of the Node)
140 *
141 * Invalidates refer to a type of packet that removes something from
142 * a cache, either autonoumously (due-to cache's own replacement
143 * policy), or snoops from other caches which invalidate something
144 * inside our cache.
145 *
146 * Usage | Function to use |Typical Use |
147 * Add new entry |calcStackDistAndUpdate|Read/Write Allocate |
148 * Delete Old Entry |calcStackDistAndUpdate|Writebacks/Cleanevicts|
149 * Dist.of Old entry|calcStackDist |Cleanevicts/Invalidate|
150 *
151 * Node Balancing: The tree structure is maintained by an
152 * updateTree() operation called when an intermediate node is
153 * required. The update operation is roughly categorized as a root
154 * update or intermediate layer update. When number of leaf nodes
155 * grow over a power of 2 then a new layer is added at the top of the
156 * tree and a new root node is initialized. The old node at the lower
157 * layer is connected to this. In an intermediate node update
158 * operation a new intermediate node is added to the required layer.
159 *
160 * Debugging: Debugging can be enabled by setting the verifyStack flag
161 * true. Debugging is implemented using a dummy stack that behaves in
162 * a naive way, using STL vectors (i.e each unique address is pushed
163 * on the top of an STL vector stack, and SD is returned as
164 * Infinity. If a non unique address is encountered then the previous
165 * entry in the STL vector is removed, all the entities above it are
166 * pushed down, and the address is pushed at the top of the stack).
167 *
168 * A printStack(int numOfEntitiesToPrint) is provided to print top n entities
169 * in both (tree and STL based dummy stack).
170 */
171 class StackDistCalc
172 {
173
174 private:
175
176 struct Node;
177
178 typedef std::map<uint64_t, Node*> IndexNodeMap;
179 typedef std::map<Addr, uint64_t> AddressIndexMap;
180 typedef std::vector<IndexNodeMap> TreeType;
181
182 /**
183 * Gets sum from the node upwards recursively till the root. This
184 * function is called first by getSumsLeavesToRoot, and then
185 * recursively calls itself.
186 *
187 * @param node pointer to the node which is updated
188 * @param from_left variable which says that the request arrived
189 * from the left
190 * @param sum_from_below Sum of left and right children below
191 * @param level level in the tree the calling node is located
192 * @param stack_dist stack distance of the node below
193 * @return The stack distance of the current address.
194 *
195 */
196 uint64_t getSum(Node* node, bool from_left, uint64_t sum_from_below,
197 uint64_t stack_dist, uint64_t level) const;
198
199 /**
200 * Gets the sum from the leaf node specified. This function
201 * is called by calcStackDist.
202 *
203 * @param node pointer to the node which is updated
204 * @return The stack distance of the current address.
205 *
206 */
207 uint64_t getSumsLeavesToRoot(Node* node) const;
208
209 /**
210 * Updates the nodes upwards recursively till the root.
211 * This function is first called by updateSumsLeavesToRoot,
212 * and then it recursively calls itself.
213 *
214 * @param node pointer to the node which is updated
215 * @param from_left variable which says that the request arrived
216 * from the left
217 * @param sum_from_below Sum of left and right children below
218 * @param level level in the tree the calling node is located
219 * @param stack_dist stack distance of the node below
220 * @param discard_node whether the calling node was discarded or not
221 * @return The stack distance of the current address.
222 *
223 */
224 uint64_t updateSum(Node* node,
225 bool from_left, uint64_t sum_from_below, uint64_t level,
226 uint64_t stack_dist, bool discard_node);
227
228 /**
229 * Updates the leaf nodes and nodes above. This function is
230 * called by the calcStackDistAndUpdate.
231 *
232 * @param node pointer to the node which is updated
233 * @param is_new_leaf is true if this is a newly added node
234 * @return The stack distance of the current address.
235 *
236 */
237 uint64_t updateSumsLeavesToRoot(Node* node, bool is_new_leaf);
238
239 /**
240 * updateTree is a tree balancing operation, which maintains the
241 * binary tree structure.
242 * This method is called whenever index%2 == 0 (i.e. every
243 * alternate cycle) The two main operation are :
244 * OP1. Moving the root node one layer up if index counter
245 * crosses power of 2
246 * OP2. Addition of intermediate nodes as and when required
247 * and linking them to their parents in the layer above.
248 */
249 void updateTree();
250
251 /**
252 * This method is used for verification purposes
253 * It recursively traverses upwards from the given node till
254 * the root to check if the ultimate parent node (root-node) points
255 * to null.
256 *
257 * @param node pointer to the node whose sanity is being checked
258 * @param level the level at which this node is located in the tree
259 *
260 */
261 void sanityCheckTree(const Node* node, uint64_t level = 0) const;
262
263 /**
264 * Return the counter for address accesses (unique and
265 * non-unique). This is further used to dump stats at
266 * regular intervals.
267 *
268 * @return The stack distance of the current address.
269 */
270 uint64_t getIndex() const { return index; }
271
272 /**
273 * Query depth of the tree (tree[0] represents leaf layer while
274 * tree[treeDepth] represents the root layer, all layers in
275 * between contain intermediate nodes)
276 *
277 * @return Tree depth
278 */
279 uint64_t getTreeDepth() const { return tree.size() - 1; }
280
281 /**
282 * Print the last n items on the stack.
283 * This method prints top n entries in the tree based implementation as
284 * well as dummy stack.
285 * @param n Number of entries to print
286 */
287 void printStack(int n = 5) const;
288
289 /**
290 * This is an alternative implementation of the stack-distance
291 * in a naive way. It uses simple STL vector to represent the stack.
292 * It can be used in parallel for debugging purposes.
293 * It is 10x slower than the tree based implemenation.
294 *
295 * @param r_address The current address to process
296 * @param update_stack Flag to indicate if stack should be updated
297 * @return Stack distance which is calculated by this alternative
298 * implementation
299 *
300 */
301 uint64_t verifyStackDist(const Addr r_address,
302 bool update_stack = false);
303
304 public:
305 StackDistCalc(bool verify_stack = false);
306
307 ~StackDistCalc();
308
309 /**
310 * A convenient way of refering to infinity.
311 */
312 static constexpr uint64_t Infinity = std::numeric_limits<uint64_t>::max();
313
314
315 /**
316 * Process the given address. If Mark is true then set the
317 * mark flag of the leaf node.
318 * This function returns the stack distance of the incoming
319 * address and the previous status of the mark flag.
320 *
321 * @param r_address The current address to process
322 * @param mark set the mark flag for the address.
323 * @return The stack distance of the current address and the mark flag.
324 */
325 std::pair<uint64_t, bool> calcStackDist(const Addr r_address,
326 bool mark = false);
327
328 /**
329 * Process the given address:
330 * - Lookup the tree for the given address
331 * - delete old node if found in tree
332 * - add a new node (if addNewNode flag is set)
333 * This function returns the stack distance of the incoming
334 * address and the status of the mark flag.
335 *
336 * @param r_address The current address to process
337 * @param addNewNode If true, a new node is added to the tree
338 * @return The stack distance of the current address and the mark flag.
339 */
340 std::pair<uint64_t, bool> calcStackDistAndUpdate(const Addr r_address,
341 bool addNewNode = true);
342
343 private:
344
345 /**
346 * Node which takes form of Leaf, INode or Root
347 */
348 struct Node{
349 // Sum of the left children
350 uint64_t sumLeft;
351
352 // Sum of the right children
353 uint64_t sumRight;
354
355 // Flag to indicate that sumLeft has gone from non-zero value to 0
356 bool discardLeft;
357
358 // Flag to indicate that sumRight has gone from non-zero value to 0
359 bool discardRight;
360
361 // Index of the current element in the Map
362 uint64_t nodeIndex;
363
364 // Pointer to the parent
365 Node* parent;
366
367 // Flag to mark the node as the right/left child
368 bool isLeftNode;
369
370 /**
371 * Flag to indicate if this address is marked. Used in case
372 * where stack distance of a touched address is required.
373 */
374 bool isMarked;
375
376 /**
377 * The discard flags are false by default they become true if
378 * the node is reached again in a future lookup.
379 */
380 Node() : sumLeft(0), sumRight(0), discardLeft(false),
381 discardRight(false), nodeIndex(0),
382 parent(nullptr), isLeftNode(true), isMarked(false)
383 { }
384 };
385
386 /**
387 * Internal counter for address accesses (unique and non-unique)
388 * This counter increments everytime the calcStackDist() method is
389 * called. This counter is used as a key for the hash- map at the
390 * leaf layer. Practically at every call to the calculator this
391 * counter is incremented and a new leaf node is added in the tree
392 * at the leaf layer using this counter value as the key.
393 */
394 uint64_t index;
395
396 // Binary tree of partial sums
397 TreeType tree;
398
399 // Hash map which returns last seen index of each address
400 AddressIndexMap aiMap;
401
402 // Keeps count of number of the next unique index for each
403 // level in the tree
404 std::vector<uint64_t> nextIndex;
405
406 // Dummy Stack for verification
407 std::vector<uint64_t> stack;
408
409 // Flag to enable verification of stack. (Slows down the simulation)
410 const bool verifyStack;
411 };
412
413
414 #endif //__STACK_DIST_CALC_HH__