2 * Copyright (c) 1999-2013 Mark D. Hill and David A. Wood
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 machine(L1Cache, "MESI Directory L1 Cache CMP")
30 : Sequencer * sequencer,
31 CacheMemory * L1Icache,
32 CacheMemory * L1Dcache,
33 Prefetcher * prefetcher = 'NULL',
34 int l2_select_num_bits,
35 Cycles l1_request_latency = 2,
36 Cycles l1_response_latency = 2,
37 Cycles to_l2_latency = 1,
39 bool enable_prefetch = "False"
42 // From this node's L1 cache TO the network
43 // a local L1 -> this L2 bank, currently ordered with directory forwarded requests
44 MessageBuffer requestFromL1Cache, network="To", virtual_network="0", ordered="false", vnet_type="request";
45 // a local L1 -> this L2 bank
46 MessageBuffer responseFromL1Cache, network="To", virtual_network="1", ordered="false", vnet_type="response";
47 MessageBuffer unblockFromL1Cache, network="To", virtual_network="2", ordered="false", vnet_type="unblock";
50 // To this node's L1 cache FROM the network
51 // a L2 bank -> this L1
52 MessageBuffer requestToL1Cache, network="From", virtual_network="0", ordered="false", vnet_type="request";
53 // a L2 bank -> this L1
54 MessageBuffer responseToL1Cache, network="From", virtual_network="1", ordered="false", vnet_type="response";
55 // Request Buffer for prefetches
56 MessageBuffer optionalQueue, ordered="false";
60 state_declaration(State, desc="Cache states", default="L1Cache_State_I") {
62 NP, AccessPermission:Invalid, desc="Not present in either cache";
63 I, AccessPermission:Invalid, desc="a L1 cache entry Idle";
64 S, AccessPermission:Read_Only, desc="a L1 cache entry Shared";
65 E, AccessPermission:Read_Only, desc="a L1 cache entry Exclusive";
66 M, AccessPermission:Read_Write, desc="a L1 cache entry Modified", format="!b";
69 IS, AccessPermission:Busy, desc="L1 idle, issued GETS, have not seen response yet";
70 IM, AccessPermission:Busy, desc="L1 idle, issued GETX, have not seen response yet";
71 SM, AccessPermission:Read_Only, desc="L1 idle, issued GETX, have not seen response yet";
72 IS_I, AccessPermission:Busy, desc="L1 idle, issued GETS, saw Inv before data because directory doesn't block on GETS hit";
74 M_I, AccessPermission:Busy, desc="L1 replacing, waiting for ACK";
75 SINK_WB_ACK, AccessPermission:Busy, desc="This is to sink WB_Acks from L2";
77 // Transient States in which block is being prefetched
78 PF_IS, AccessPermission:Busy, desc="Issued GETS, have not seen response yet";
79 PF_IM, AccessPermission:Busy, desc="Issued GETX, have not seen response yet";
80 PF_SM, AccessPermission:Busy, desc="Issued GETX, received data, waiting for acks";
81 PF_IS_I, AccessPermission:Busy, desc="Issued GETs, saw inv before data";
85 enumeration(Event, desc="Cache events") {
87 Load, desc="Load request from the home processor";
88 Ifetch, desc="I-fetch request from the home processor";
89 Store, desc="Store request from the home processor";
91 Inv, desc="Invalidate request from L2 bank";
93 // internal generated request
94 L1_Replacement, desc="L1 Replacement", format="!r";
97 Fwd_GETX, desc="GETX from other processor";
98 Fwd_GETS, desc="GETS from other processor";
99 Fwd_GET_INSTR, desc="GET_INSTR from other processor";
101 Data, desc="Data for processor";
102 Data_Exclusive, desc="Data for processor";
103 DataS_fromL1, desc="data for GETS request, need to unblock directory";
104 Data_all_Acks, desc="Data for processor, all acks";
106 Ack, desc="Ack for processor";
107 Ack_all, desc="Last ack for processor";
109 WB_Ack, desc="Ack for replacement";
111 PF_Load, desc="load request from prefetcher";
112 PF_Ifetch, desc="instruction fetch request from prefetcher";
113 PF_Store, desc="exclusive load request from prefetcher";
119 structure(Entry, desc="...", interface="AbstractCacheEntry" ) {
120 State CacheState, desc="cache state";
121 DataBlock DataBlk, desc="data for the block";
122 bool Dirty, default="false", desc="data is dirty";
123 bool isPrefetch, desc="Set if this block was prefetched";
127 structure(TBE, desc="...") {
128 Address Addr, desc="Physical address for this TBE";
129 State TBEState, desc="Transient state";
130 DataBlock DataBlk, desc="Buffer for the data block";
131 bool Dirty, default="false", desc="data is dirty";
132 bool isPrefetch, desc="Set if this was caused by a prefetch";
133 int pendingAcks, default="0", desc="number of pending acks";
136 structure(TBETable, external="yes") {
138 void allocate(Address);
139 void deallocate(Address);
140 bool isPresent(Address);
143 TBETable L1_TBEs, template="<L1Cache_TBE>", constructor="m_number_of_TBEs";
145 MessageBuffer mandatoryQueue, ordered="false";
147 int l2_select_low_bit, default="RubySystem::getBlockSizeBits()";
149 void set_cache_entry(AbstractCacheEntry a);
150 void unset_cache_entry();
153 void wakeUpBuffers(Address a);
154 void profileMsgDelay(int virtualNetworkType, Cycles c);
156 // inclusive cache returns L1 entries only
157 Entry getCacheEntry(Address addr), return_by_pointer="yes" {
158 Entry L1Dcache_entry := static_cast(Entry, "pointer", L1Dcache[addr]);
159 if(is_valid(L1Dcache_entry)) {
160 return L1Dcache_entry;
163 Entry L1Icache_entry := static_cast(Entry, "pointer", L1Icache[addr]);
164 return L1Icache_entry;
167 Entry getL1DCacheEntry(Address addr), return_by_pointer="yes" {
168 Entry L1Dcache_entry := static_cast(Entry, "pointer", L1Dcache[addr]);
169 return L1Dcache_entry;
172 Entry getL1ICacheEntry(Address addr), return_by_pointer="yes" {
173 Entry L1Icache_entry := static_cast(Entry, "pointer", L1Icache[addr]);
174 return L1Icache_entry;
177 State getState(TBE tbe, Entry cache_entry, Address addr) {
178 assert((L1Dcache.isTagPresent(addr) && L1Icache.isTagPresent(addr)) == false);
182 } else if (is_valid(cache_entry)) {
183 return cache_entry.CacheState;
188 void setState(TBE tbe, Entry cache_entry, Address addr, State state) {
189 assert((L1Dcache.isTagPresent(addr) && L1Icache.isTagPresent(addr)) == false);
193 tbe.TBEState := state;
196 if (is_valid(cache_entry)) {
197 cache_entry.CacheState := state;
201 AccessPermission getAccessPermission(Address addr) {
202 TBE tbe := L1_TBEs[addr];
204 DPRINTF(RubySlicc, "%s\n", L1Cache_State_to_permission(tbe.TBEState));
205 return L1Cache_State_to_permission(tbe.TBEState);
208 Entry cache_entry := getCacheEntry(addr);
209 if(is_valid(cache_entry)) {
210 DPRINTF(RubySlicc, "%s\n", L1Cache_State_to_permission(cache_entry.CacheState));
211 return L1Cache_State_to_permission(cache_entry.CacheState);
214 DPRINTF(RubySlicc, "%s\n", AccessPermission:NotPresent);
215 return AccessPermission:NotPresent;
218 DataBlock getDataBlock(Address addr), return_by_ref="yes" {
219 TBE tbe := L1_TBEs[addr];
224 return getCacheEntry(addr).DataBlk;
227 void setAccessPermission(Entry cache_entry, Address addr, State state) {
228 if (is_valid(cache_entry)) {
229 cache_entry.changePermission(L1Cache_State_to_permission(state));
233 Event mandatory_request_type_to_event(RubyRequestType type) {
234 if (type == RubyRequestType:LD) {
236 } else if (type == RubyRequestType:IFETCH) {
238 } else if ((type == RubyRequestType:ST) || (type == RubyRequestType:ATOMIC)) {
241 error("Invalid RubyRequestType");
245 Event prefetch_request_type_to_event(RubyRequestType type) {
246 if (type == RubyRequestType:LD) {
247 return Event:PF_Load;
248 } else if (type == RubyRequestType:IFETCH) {
249 return Event:PF_Ifetch;
250 } else if ((type == RubyRequestType:ST) ||
251 (type == RubyRequestType:ATOMIC)) {
252 return Event:PF_Store;
254 error("Invalid RubyRequestType");
258 int getPendingAcks(TBE tbe) {
259 return tbe.pendingAcks;
262 out_port(requestIntraChipL1Network_out, RequestMsg, requestFromL1Cache);
263 out_port(responseIntraChipL1Network_out, ResponseMsg, responseFromL1Cache);
264 out_port(unblockNetwork_out, ResponseMsg, unblockFromL1Cache);
265 out_port(optionalQueue_out, RubyRequest, optionalQueue);
268 // Prefetch queue between the controller and the prefetcher
269 // As per Spracklen et al. (HPCA 2005), the prefetch queue should be
270 // implemented as a LIFO structure. The structure would allow for fast
271 // searches of all entries in the queue, not just the head msg. All
272 // msgs in the structure can be invalidated if a demand miss matches.
273 in_port(optionalQueue_in, RubyRequest, optionalQueue, desc="...", rank = 3) {
274 if (optionalQueue_in.isReady()) {
275 peek(optionalQueue_in, RubyRequest) {
276 // Instruction Prefetch
277 if (in_msg.Type == RubyRequestType:IFETCH) {
278 Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
279 if (is_valid(L1Icache_entry)) {
280 // The block to be prefetched is already present in the
281 // cache. We should drop this request.
282 trigger(prefetch_request_type_to_event(in_msg.Type),
284 L1Icache_entry, L1_TBEs[in_msg.LineAddress]);
287 // Check to see if it is in the OTHER L1
288 Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
289 if (is_valid(L1Dcache_entry)) {
290 // The block is in the wrong L1 cache. We should drop
292 trigger(prefetch_request_type_to_event(in_msg.Type),
294 L1Dcache_entry, L1_TBEs[in_msg.LineAddress]);
297 if (L1Icache.cacheAvail(in_msg.LineAddress)) {
298 // L1 does't have the line, but we have space for it
299 // in the L1 so let's see if the L2 has it
300 trigger(prefetch_request_type_to_event(in_msg.Type),
302 L1Icache_entry, L1_TBEs[in_msg.LineAddress]);
304 // No room in the L1, so we need to make room in the L1
305 trigger(Event:L1_Replacement,
306 L1Icache.cacheProbe(in_msg.LineAddress),
307 getL1ICacheEntry(L1Icache.cacheProbe(in_msg.LineAddress)),
308 L1_TBEs[L1Icache.cacheProbe(in_msg.LineAddress)]);
312 Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
313 if (is_valid(L1Dcache_entry)) {
314 // The block to be prefetched is already present in the
315 // cache. We should drop this request.
316 trigger(prefetch_request_type_to_event(in_msg.Type),
318 L1Dcache_entry, L1_TBEs[in_msg.LineAddress]);
321 // Check to see if it is in the OTHER L1
322 Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
323 if (is_valid(L1Icache_entry)) {
324 // The block is in the wrong L1. Just drop the prefetch
326 trigger(prefetch_request_type_to_event(in_msg.Type),
328 L1Icache_entry, L1_TBEs[in_msg.LineAddress]);
331 if (L1Dcache.cacheAvail(in_msg.LineAddress)) {
332 // L1 does't have the line, but we have space for it in
333 // the L1 let's see if the L2 has it
334 trigger(prefetch_request_type_to_event(in_msg.Type),
336 L1Dcache_entry, L1_TBEs[in_msg.LineAddress]);
338 // No room in the L1, so we need to make room in the L1
339 trigger(Event:L1_Replacement,
340 L1Dcache.cacheProbe(in_msg.LineAddress),
341 getL1DCacheEntry(L1Dcache.cacheProbe(in_msg.LineAddress)),
342 L1_TBEs[L1Dcache.cacheProbe(in_msg.LineAddress)]);
349 // Response IntraChip L1 Network - response msg to this L1 cache
350 in_port(responseIntraChipL1Network_in, ResponseMsg, responseToL1Cache, rank = 2) {
351 if (responseIntraChipL1Network_in.isReady()) {
352 peek(responseIntraChipL1Network_in, ResponseMsg, block_on="Addr") {
353 assert(in_msg.Destination.isElement(machineID));
355 Entry cache_entry := getCacheEntry(in_msg.Addr);
356 TBE tbe := L1_TBEs[in_msg.Addr];
358 if(in_msg.Type == CoherenceResponseType:DATA_EXCLUSIVE) {
359 trigger(Event:Data_Exclusive, in_msg.Addr, cache_entry, tbe);
360 } else if(in_msg.Type == CoherenceResponseType:DATA) {
361 if ((getState(tbe, cache_entry, in_msg.Addr) == State:IS ||
362 getState(tbe, cache_entry, in_msg.Addr) == State:IS_I ||
363 getState(tbe, cache_entry, in_msg.Addr) == State:PF_IS ||
364 getState(tbe, cache_entry, in_msg.Addr) == State:PF_IS_I) &&
365 machineIDToMachineType(in_msg.Sender) == MachineType:L1Cache) {
367 trigger(Event:DataS_fromL1, in_msg.Addr, cache_entry, tbe);
369 } else if ( (getPendingAcks(tbe) - in_msg.AckCount) == 0 ) {
370 trigger(Event:Data_all_Acks, in_msg.Addr, cache_entry, tbe);
372 trigger(Event:Data, in_msg.Addr, cache_entry, tbe);
374 } else if (in_msg.Type == CoherenceResponseType:ACK) {
375 if ( (getPendingAcks(tbe) - in_msg.AckCount) == 0 ) {
376 trigger(Event:Ack_all, in_msg.Addr, cache_entry, tbe);
378 trigger(Event:Ack, in_msg.Addr, cache_entry, tbe);
380 } else if (in_msg.Type == CoherenceResponseType:WB_ACK) {
381 trigger(Event:WB_Ack, in_msg.Addr, cache_entry, tbe);
383 error("Invalid L1 response type");
389 // Request InterChip network - request from this L1 cache to the shared L2
390 in_port(requestIntraChipL1Network_in, RequestMsg, requestToL1Cache, rank = 1) {
391 if(requestIntraChipL1Network_in.isReady()) {
392 peek(requestIntraChipL1Network_in, RequestMsg, block_on="Addr") {
393 assert(in_msg.Destination.isElement(machineID));
395 Entry cache_entry := getCacheEntry(in_msg.Addr);
396 TBE tbe := L1_TBEs[in_msg.Addr];
398 if (in_msg.Type == CoherenceRequestType:INV) {
399 trigger(Event:Inv, in_msg.Addr, cache_entry, tbe);
400 } else if (in_msg.Type == CoherenceRequestType:GETX ||
401 in_msg.Type == CoherenceRequestType:UPGRADE) {
402 // upgrade transforms to GETX due to race
403 trigger(Event:Fwd_GETX, in_msg.Addr, cache_entry, tbe);
404 } else if (in_msg.Type == CoherenceRequestType:GETS) {
405 trigger(Event:Fwd_GETS, in_msg.Addr, cache_entry, tbe);
406 } else if (in_msg.Type == CoherenceRequestType:GET_INSTR) {
407 trigger(Event:Fwd_GET_INSTR, in_msg.Addr, cache_entry, tbe);
409 error("Invalid forwarded request type");
415 // Mandatory Queue betweens Node's CPU and it's L1 caches
416 in_port(mandatoryQueue_in, RubyRequest, mandatoryQueue, desc="...", rank = 0) {
417 if (mandatoryQueue_in.isReady()) {
418 peek(mandatoryQueue_in, RubyRequest, block_on="LineAddress") {
420 // Check for data access to blocks in I-cache and ifetchs to blocks in D-cache
422 if (in_msg.Type == RubyRequestType:IFETCH) {
423 // ** INSTRUCTION ACCESS ***
425 Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
426 if (is_valid(L1Icache_entry)) {
427 // The tag matches for the L1, so the L1 asks the L2 for it.
428 trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress,
429 L1Icache_entry, L1_TBEs[in_msg.LineAddress]);
432 // Check to see if it is in the OTHER L1
433 Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
434 if (is_valid(L1Dcache_entry)) {
435 // The block is in the wrong L1, put the request on the queue to the shared L2
436 trigger(Event:L1_Replacement, in_msg.LineAddress,
437 L1Dcache_entry, L1_TBEs[in_msg.LineAddress]);
440 if (L1Icache.cacheAvail(in_msg.LineAddress)) {
441 // L1 does't have the line, but we have space for it
442 // in the L1 so let's see if the L2 has it.
443 trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress,
444 L1Icache_entry, L1_TBEs[in_msg.LineAddress]);
446 // No room in the L1, so we need to make room in the L1
447 trigger(Event:L1_Replacement, L1Icache.cacheProbe(in_msg.LineAddress),
448 getL1ICacheEntry(L1Icache.cacheProbe(in_msg.LineAddress)),
449 L1_TBEs[L1Icache.cacheProbe(in_msg.LineAddress)]);
454 // *** DATA ACCESS ***
455 Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
456 if (is_valid(L1Dcache_entry)) {
457 // The tag matches for the L1, so the L1 ask the L2 for it
458 trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress,
459 L1Dcache_entry, L1_TBEs[in_msg.LineAddress]);
462 // Check to see if it is in the OTHER L1
463 Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
464 if (is_valid(L1Icache_entry)) {
465 // The block is in the wrong L1, put the request on the queue to the shared L2
466 trigger(Event:L1_Replacement, in_msg.LineAddress,
467 L1Icache_entry, L1_TBEs[in_msg.LineAddress]);
470 if (L1Dcache.cacheAvail(in_msg.LineAddress)) {
471 // L1 does't have the line, but we have space for it
472 // in the L1 let's see if the L2 has it.
473 trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress,
474 L1Dcache_entry, L1_TBEs[in_msg.LineAddress]);
476 // No room in the L1, so we need to make room in the L1
477 trigger(Event:L1_Replacement, L1Dcache.cacheProbe(in_msg.LineAddress),
478 getL1DCacheEntry(L1Dcache.cacheProbe(in_msg.LineAddress)),
479 L1_TBEs[L1Dcache.cacheProbe(in_msg.LineAddress)]);
487 void enqueuePrefetch(Address address, RubyRequestType type) {
488 enqueue(optionalQueue_out, RubyRequest, latency=1) {
489 out_msg.LineAddress := address;
490 out_msg.Type := type;
491 out_msg.AccessMode := RubyAccessMode:Supervisor;
496 action(a_issueGETS, "a", desc="Issue GETS") {
497 peek(mandatoryQueue_in, RubyRequest) {
498 enqueue(requestIntraChipL1Network_out, RequestMsg, latency=l1_request_latency) {
499 out_msg.Addr := address;
500 out_msg.Type := CoherenceRequestType:GETS;
501 out_msg.Requestor := machineID;
502 out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
503 l2_select_low_bit, l2_select_num_bits, intToID(0)));
504 DPRINTF(RubySlicc, "address: %s, destination: %s\n",
505 address, out_msg.Destination);
506 out_msg.MessageSize := MessageSizeType:Control;
507 out_msg.Prefetch := in_msg.Prefetch;
508 out_msg.AccessMode := in_msg.AccessMode;
513 action(pa_issuePfGETS, "pa", desc="Issue prefetch GETS") {
514 peek(optionalQueue_in, RubyRequest) {
515 enqueue(requestIntraChipL1Network_out, RequestMsg,
516 latency=l1_request_latency) {
517 out_msg.Addr := address;
518 out_msg.Type := CoherenceRequestType:GETS;
519 out_msg.Requestor := machineID;
520 out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
521 l2_select_low_bit, l2_select_num_bits, intToID(0)));
522 DPRINTF(RubySlicc, "address: %s, destination: %s\n",
523 address, out_msg.Destination);
524 out_msg.MessageSize := MessageSizeType:Control;
525 out_msg.Prefetch := in_msg.Prefetch;
526 out_msg.AccessMode := in_msg.AccessMode;
531 action(ai_issueGETINSTR, "ai", desc="Issue GETINSTR") {
532 peek(mandatoryQueue_in, RubyRequest) {
533 enqueue(requestIntraChipL1Network_out, RequestMsg, latency=l1_request_latency) {
534 out_msg.Addr := address;
535 out_msg.Type := CoherenceRequestType:GET_INSTR;
536 out_msg.Requestor := machineID;
537 out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
538 l2_select_low_bit, l2_select_num_bits, intToID(0)));
539 DPRINTF(RubySlicc, "address: %s, destination: %s\n",
540 address, out_msg.Destination);
541 out_msg.MessageSize := MessageSizeType:Control;
542 out_msg.Prefetch := in_msg.Prefetch;
543 out_msg.AccessMode := in_msg.AccessMode;
548 action(pai_issuePfGETINSTR, "pai",
549 desc="Issue GETINSTR for prefetch request") {
550 peek(optionalQueue_in, RubyRequest) {
551 enqueue(requestIntraChipL1Network_out, RequestMsg,
552 latency=l1_request_latency) {
553 out_msg.Addr := address;
554 out_msg.Type := CoherenceRequestType:GET_INSTR;
555 out_msg.Requestor := machineID;
556 out_msg.Destination.add(
557 mapAddressToRange(address, MachineType:L2Cache,
558 l2_select_low_bit, l2_select_num_bits, intToID(0)));
559 out_msg.MessageSize := MessageSizeType:Control;
560 out_msg.Prefetch := in_msg.Prefetch;
561 out_msg.AccessMode := in_msg.AccessMode;
563 DPRINTF(RubySlicc, "address: %s, destination: %s\n",
564 address, out_msg.Destination);
569 action(b_issueGETX, "b", desc="Issue GETX") {
570 peek(mandatoryQueue_in, RubyRequest) {
571 enqueue(requestIntraChipL1Network_out, RequestMsg, latency=l1_request_latency) {
572 out_msg.Addr := address;
573 out_msg.Type := CoherenceRequestType:GETX;
574 out_msg.Requestor := machineID;
575 DPRINTF(RubySlicc, "%s\n", machineID);
576 out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
577 l2_select_low_bit, l2_select_num_bits, intToID(0)));
578 DPRINTF(RubySlicc, "address: %s, destination: %s\n",
579 address, out_msg.Destination);
580 out_msg.MessageSize := MessageSizeType:Control;
581 out_msg.Prefetch := in_msg.Prefetch;
582 out_msg.AccessMode := in_msg.AccessMode;
587 action(pb_issuePfGETX, "pb", desc="Issue prefetch GETX") {
588 peek(optionalQueue_in, RubyRequest) {
589 enqueue(requestIntraChipL1Network_out, RequestMsg,
590 latency=l1_request_latency) {
591 out_msg.Addr := address;
592 out_msg.Type := CoherenceRequestType:GETX;
593 out_msg.Requestor := machineID;
594 DPRINTF(RubySlicc, "%s\n", machineID);
596 out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
597 l2_select_low_bit, l2_select_num_bits, intToID(0)));
599 DPRINTF(RubySlicc, "address: %s, destination: %s\n",
600 address, out_msg.Destination);
601 out_msg.MessageSize := MessageSizeType:Control;
602 out_msg.Prefetch := in_msg.Prefetch;
603 out_msg.AccessMode := in_msg.AccessMode;
608 action(c_issueUPGRADE, "c", desc="Issue GETX") {
609 peek(mandatoryQueue_in, RubyRequest) {
610 enqueue(requestIntraChipL1Network_out, RequestMsg, latency= l1_request_latency) {
611 out_msg.Addr := address;
612 out_msg.Type := CoherenceRequestType:UPGRADE;
613 out_msg.Requestor := machineID;
614 out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
615 l2_select_low_bit, l2_select_num_bits, intToID(0)));
616 DPRINTF(RubySlicc, "address: %s, destination: %s\n",
617 address, out_msg.Destination);
618 out_msg.MessageSize := MessageSizeType:Control;
619 out_msg.Prefetch := in_msg.Prefetch;
620 out_msg.AccessMode := in_msg.AccessMode;
625 action(d_sendDataToRequestor, "d", desc="send data to requestor") {
626 peek(requestIntraChipL1Network_in, RequestMsg) {
627 enqueue(responseIntraChipL1Network_out, ResponseMsg, latency=l1_response_latency) {
628 assert(is_valid(cache_entry));
629 out_msg.Addr := address;
630 out_msg.Type := CoherenceResponseType:DATA;
631 out_msg.DataBlk := cache_entry.DataBlk;
632 out_msg.Dirty := cache_entry.Dirty;
633 out_msg.Sender := machineID;
634 out_msg.Destination.add(in_msg.Requestor);
635 out_msg.MessageSize := MessageSizeType:Response_Data;
640 action(d2_sendDataToL2, "d2", desc="send data to the L2 cache because of M downgrade") {
641 enqueue(responseIntraChipL1Network_out, ResponseMsg, latency=l1_response_latency) {
642 assert(is_valid(cache_entry));
643 out_msg.Addr := address;
644 out_msg.Type := CoherenceResponseType:DATA;
645 out_msg.DataBlk := cache_entry.DataBlk;
646 out_msg.Dirty := cache_entry.Dirty;
647 out_msg.Sender := machineID;
648 out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
649 l2_select_low_bit, l2_select_num_bits, intToID(0)));
650 out_msg.MessageSize := MessageSizeType:Response_Data;
654 action(dt_sendDataToRequestor_fromTBE, "dt", desc="send data to requestor") {
655 peek(requestIntraChipL1Network_in, RequestMsg) {
656 enqueue(responseIntraChipL1Network_out, ResponseMsg, latency=l1_response_latency) {
657 assert(is_valid(tbe));
658 out_msg.Addr := address;
659 out_msg.Type := CoherenceResponseType:DATA;
660 out_msg.DataBlk := tbe.DataBlk;
661 out_msg.Dirty := tbe.Dirty;
662 out_msg.Sender := machineID;
663 out_msg.Destination.add(in_msg.Requestor);
664 out_msg.MessageSize := MessageSizeType:Response_Data;
669 action(d2t_sendDataToL2_fromTBE, "d2t", desc="send data to the L2 cache") {
670 enqueue(responseIntraChipL1Network_out, ResponseMsg, latency=l1_response_latency) {
671 assert(is_valid(tbe));
672 out_msg.Addr := address;
673 out_msg.Type := CoherenceResponseType:DATA;
674 out_msg.DataBlk := tbe.DataBlk;
675 out_msg.Dirty := tbe.Dirty;
676 out_msg.Sender := machineID;
677 out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
678 l2_select_low_bit, l2_select_num_bits, intToID(0)));
679 out_msg.MessageSize := MessageSizeType:Response_Data;
683 action(e_sendAckToRequestor, "e", desc="send invalidate ack to requestor (could be L2 or L1)") {
684 peek(requestIntraChipL1Network_in, RequestMsg) {
685 enqueue(responseIntraChipL1Network_out, ResponseMsg, latency=l1_response_latency) {
686 out_msg.Addr := address;
687 out_msg.Type := CoherenceResponseType:ACK;
688 out_msg.Sender := machineID;
689 out_msg.Destination.add(in_msg.Requestor);
690 out_msg.MessageSize := MessageSizeType:Response_Control;
695 action(f_sendDataToL2, "f", desc="send data to the L2 cache") {
696 enqueue(responseIntraChipL1Network_out, ResponseMsg, latency=l1_response_latency) {
697 assert(is_valid(cache_entry));
698 out_msg.Addr := address;
699 out_msg.Type := CoherenceResponseType:DATA;
700 out_msg.DataBlk := cache_entry.DataBlk;
701 out_msg.Dirty := cache_entry.Dirty;
702 out_msg.Sender := machineID;
703 out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
704 l2_select_low_bit, l2_select_num_bits, intToID(0)));
705 out_msg.MessageSize := MessageSizeType:Writeback_Data;
709 action(ft_sendDataToL2_fromTBE, "ft", desc="send data to the L2 cache") {
710 enqueue(responseIntraChipL1Network_out, ResponseMsg, latency=l1_response_latency) {
711 assert(is_valid(tbe));
712 out_msg.Addr := address;
713 out_msg.Type := CoherenceResponseType:DATA;
714 out_msg.DataBlk := tbe.DataBlk;
715 out_msg.Dirty := tbe.Dirty;
716 out_msg.Sender := machineID;
717 out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
718 l2_select_low_bit, l2_select_num_bits, intToID(0)));
719 out_msg.MessageSize := MessageSizeType:Writeback_Data;
723 action(fi_sendInvAck, "fi", desc="send data to the L2 cache") {
724 peek(requestIntraChipL1Network_in, RequestMsg) {
725 enqueue(responseIntraChipL1Network_out, ResponseMsg, latency=l1_response_latency) {
726 out_msg.Addr := address;
727 out_msg.Type := CoherenceResponseType:ACK;
728 out_msg.Sender := machineID;
729 out_msg.Destination.add(in_msg.Requestor);
730 out_msg.MessageSize := MessageSizeType:Response_Control;
731 out_msg.AckCount := 1;
736 action(forward_eviction_to_cpu, "\cc", desc="sends eviction information to the processor") {
737 if (send_evictions) {
738 DPRINTF(RubySlicc, "Sending invalidation for %s to the CPU\n", address);
739 sequencer.evictionCallback(address);
743 action(g_issuePUTX, "g", desc="send data to the L2 cache") {
744 enqueue(requestIntraChipL1Network_out, RequestMsg, latency=l1_response_latency) {
745 assert(is_valid(cache_entry));
746 out_msg.Addr := address;
747 out_msg.Type := CoherenceRequestType:PUTX;
748 out_msg.DataBlk := cache_entry.DataBlk;
749 out_msg.Dirty := cache_entry.Dirty;
750 out_msg.Requestor:= machineID;
751 out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
752 l2_select_low_bit, l2_select_num_bits, intToID(0)));
753 if (cache_entry.Dirty) {
754 out_msg.MessageSize := MessageSizeType:Writeback_Data;
756 out_msg.MessageSize := MessageSizeType:Writeback_Control;
761 action(j_sendUnblock, "j", desc="send unblock to the L2 cache") {
762 enqueue(unblockNetwork_out, ResponseMsg, latency=to_l2_latency) {
763 out_msg.Addr := address;
764 out_msg.Type := CoherenceResponseType:UNBLOCK;
765 out_msg.Sender := machineID;
766 out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
767 l2_select_low_bit, l2_select_num_bits, intToID(0)));
768 out_msg.MessageSize := MessageSizeType:Response_Control;
769 DPRINTF(RubySlicc, "%s\n", address);
773 action(jj_sendExclusiveUnblock, "\j", desc="send unblock to the L2 cache") {
774 enqueue(unblockNetwork_out, ResponseMsg, latency=to_l2_latency) {
775 out_msg.Addr := address;
776 out_msg.Type := CoherenceResponseType:EXCLUSIVE_UNBLOCK;
777 out_msg.Sender := machineID;
778 out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
779 l2_select_low_bit, l2_select_num_bits, intToID(0)));
780 out_msg.MessageSize := MessageSizeType:Response_Control;
781 DPRINTF(RubySlicc, "%s\n", address);
786 action(dg_invalidate_sc, "dg",
787 desc="Invalidate store conditional as the cache lost permissions") {
788 sequencer.invalidateSC(address);
791 action(h_load_hit, "h",
792 desc="If not prefetch, notify sequencer the load completed.")
794 assert(is_valid(cache_entry));
795 DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
796 sequencer.readCallback(address, cache_entry.DataBlk);
799 action(hx_load_hit, "hx",
800 desc="If not prefetch, notify sequencer the load completed.")
802 assert(is_valid(cache_entry));
803 DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
804 sequencer.readCallback(address, cache_entry.DataBlk, true);
807 action(hh_store_hit, "\h",
808 desc="If not prefetch, notify sequencer that store completed.")
810 assert(is_valid(cache_entry));
811 DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
812 sequencer.writeCallback(address, cache_entry.DataBlk);
813 cache_entry.Dirty := true;
816 action(hhx_store_hit, "\hx",
817 desc="If not prefetch, notify sequencer that store completed.")
819 assert(is_valid(cache_entry));
820 DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
821 sequencer.writeCallback(address, cache_entry.DataBlk, true);
822 cache_entry.Dirty := true;
825 action(i_allocateTBE, "i", desc="Allocate TBE (isPrefetch=0, number of invalidates=0)") {
826 check_allocate(L1_TBEs);
827 assert(is_valid(cache_entry));
828 L1_TBEs.allocate(address);
829 set_tbe(L1_TBEs[address]);
830 tbe.isPrefetch := false;
831 tbe.Dirty := cache_entry.Dirty;
832 tbe.DataBlk := cache_entry.DataBlk;
835 action(k_popMandatoryQueue, "k", desc="Pop mandatory queue.") {
836 mandatoryQueue_in.dequeue();
839 action(l_popRequestQueue, "l", desc="Pop incoming request queue and profile the delay within this virtual network") {
840 profileMsgDelay(2, requestIntraChipL1Network_in.dequeue_getDelayCycles());
843 action(o_popIncomingResponseQueue, "o", desc="Pop Incoming Response queue and profile the delay within this virtual network") {
844 profileMsgDelay(1, responseIntraChipL1Network_in.dequeue_getDelayCycles());
847 action(s_deallocateTBE, "s", desc="Deallocate TBE") {
848 L1_TBEs.deallocate(address);
852 action(u_writeDataToL1Cache, "u", desc="Write data to cache") {
853 peek(responseIntraChipL1Network_in, ResponseMsg) {
854 assert(is_valid(cache_entry));
855 cache_entry.DataBlk := in_msg.DataBlk;
856 cache_entry.Dirty := in_msg.Dirty;
860 action(q_updateAckCount, "q", desc="Update ack count") {
861 peek(responseIntraChipL1Network_in, ResponseMsg) {
862 assert(is_valid(tbe));
863 tbe.pendingAcks := tbe.pendingAcks - in_msg.AckCount;
864 APPEND_TRANSITION_COMMENT(in_msg.AckCount);
865 APPEND_TRANSITION_COMMENT(" p: ");
866 APPEND_TRANSITION_COMMENT(tbe.pendingAcks);
870 action(ff_deallocateL1CacheBlock, "\f", desc="Deallocate L1 cache block. Sets the cache to not present, allowing a replacement in parallel with a fetch.") {
871 if (L1Dcache.isTagPresent(address)) {
872 L1Dcache.deallocate(address);
874 L1Icache.deallocate(address);
879 action(oo_allocateL1DCacheBlock, "\o", desc="Set L1 D-cache tag equal to tag of block B.") {
880 if (is_invalid(cache_entry)) {
881 set_cache_entry(L1Dcache.allocate(address, new Entry));
885 action(pp_allocateL1ICacheBlock, "\p", desc="Set L1 I-cache tag equal to tag of block B.") {
886 if (is_invalid(cache_entry)) {
887 set_cache_entry(L1Icache.allocate(address, new Entry));
891 action(z_stallAndWaitMandatoryQueue, "\z", desc="recycle L1 request queue") {
892 stall_and_wait(mandatoryQueue_in, address);
895 action(kd_wakeUpDependents, "kd", desc="wake-up dependents") {
896 wakeUpBuffers(address);
899 action(uu_profileInstMiss, "\uim", desc="Profile the demand miss") {
900 ++L1Icache.demand_misses;
903 action(uu_profileInstHit, "\uih", desc="Profile the demand hit") {
904 ++L1Icache.demand_hits;
907 action(uu_profileDataMiss, "\udm", desc="Profile the demand miss") {
908 ++L1Dcache.demand_misses;
911 action(uu_profileDataHit, "\udh", desc="Profile the demand hit") {
912 ++L1Dcache.demand_hits;
915 action(po_observeMiss, "\po", desc="Inform the prefetcher about the miss") {
916 peek(mandatoryQueue_in, RubyRequest) {
917 if (enable_prefetch) {
918 prefetcher.observeMiss(in_msg.LineAddress, in_msg.Type);
923 action(ppm_observePfMiss, "\ppm",
924 desc="Inform the prefetcher about the partial miss") {
925 peek(mandatoryQueue_in, RubyRequest) {
926 prefetcher.observePfMiss(in_msg.LineAddress);
930 action(pq_popPrefetchQueue, "\pq", desc="Pop the prefetch request queue") {
931 optionalQueue_in.dequeue();
934 action(mp_markPrefetched, "mp", desc="Write data from response queue to cache") {
935 assert(is_valid(cache_entry));
936 cache_entry.isPrefetch := true;
940 //*****************************************************
942 //*****************************************************
944 // Transitions for Load/Store/Replacement/WriteBack from transient states
945 transition({IS, IM, IS_I, M_I, SM, SINK_WB_ACK}, {Load, Ifetch, Store, L1_Replacement}) {
946 z_stallAndWaitMandatoryQueue;
949 transition({PF_IS, PF_IS_I}, {Store, L1_Replacement}) {
950 z_stallAndWaitMandatoryQueue;
953 transition({PF_IM, PF_SM}, {Load, Ifetch, L1_Replacement}) {
954 z_stallAndWaitMandatoryQueue;
957 // Transitions from Idle
958 transition({NP,I}, L1_Replacement) {
959 ff_deallocateL1CacheBlock;
962 transition({S,E,M,IS,IM,SM,IS_I,M_I,SINK_WB_ACK,PF_IS,PF_IM},
963 {PF_Load, PF_Store, PF_Ifetch}) {
967 transition({NP,I}, Load, IS) {
968 oo_allocateL1DCacheBlock;
976 transition({NP,I}, PF_Load, PF_IS) {
977 oo_allocateL1DCacheBlock;
983 transition(PF_IS, Load, IS) {
989 transition(PF_IS_I, Load, IS_I) {
995 transition({NP,I}, Ifetch, IS) {
996 pp_allocateL1ICacheBlock;
1001 k_popMandatoryQueue;
1004 transition({NP,I}, PF_Ifetch, PF_IS) {
1005 pp_allocateL1ICacheBlock;
1007 pai_issuePfGETINSTR;
1008 pq_popPrefetchQueue;
1011 // We proactively assume that the prefetch is in to
1012 // the instruction cache
1013 transition(PF_IS, Ifetch, IS) {
1016 k_popMandatoryQueue;
1019 transition({NP,I}, Store, IM) {
1020 oo_allocateL1DCacheBlock;
1025 k_popMandatoryQueue;
1028 transition({NP,I}, PF_Store, PF_IM) {
1029 oo_allocateL1DCacheBlock;
1032 pq_popPrefetchQueue;
1035 transition(PF_IM, Store, IM) {
1038 k_popMandatoryQueue;
1041 transition(PF_SM, Store, SM) {
1044 k_popMandatoryQueue;
1047 transition({NP, I}, Inv) {
1052 // Transitions from Shared
1053 transition({S,E,M}, Load) {
1056 k_popMandatoryQueue;
1059 transition({S,E,M}, Ifetch) {
1062 k_popMandatoryQueue;
1065 transition(S, Store, SM) {
1069 k_popMandatoryQueue;
1072 transition(S, L1_Replacement, I) {
1073 forward_eviction_to_cpu;
1074 ff_deallocateL1CacheBlock;
1077 transition(S, Inv, I) {
1078 forward_eviction_to_cpu;
1083 // Transitions from Exclusive
1085 transition({E,M}, Store, M) {
1088 k_popMandatoryQueue;
1091 transition(E, L1_Replacement, M_I) {
1092 // silent E replacement??
1093 forward_eviction_to_cpu;
1095 g_issuePUTX; // send data, but hold in case forwarded request
1096 ff_deallocateL1CacheBlock;
1099 transition(E, Inv, I) {
1101 forward_eviction_to_cpu;
1106 transition(E, Fwd_GETX, I) {
1107 forward_eviction_to_cpu;
1108 d_sendDataToRequestor;
1112 transition(E, {Fwd_GETS, Fwd_GET_INSTR}, S) {
1113 d_sendDataToRequestor;
1118 // Transitions from Modified
1120 transition(M, L1_Replacement, M_I) {
1121 forward_eviction_to_cpu;
1123 g_issuePUTX; // send data, but hold in case forwarded request
1124 ff_deallocateL1CacheBlock;
1127 transition(M_I, WB_Ack, I) {
1129 o_popIncomingResponseQueue;
1130 kd_wakeUpDependents;
1133 transition(M, Inv, I) {
1134 forward_eviction_to_cpu;
1139 transition(M_I, Inv, SINK_WB_ACK) {
1140 ft_sendDataToL2_fromTBE;
1144 transition(M, Fwd_GETX, I) {
1145 forward_eviction_to_cpu;
1146 d_sendDataToRequestor;
1150 transition(M, {Fwd_GETS, Fwd_GET_INSTR}, S) {
1151 d_sendDataToRequestor;
1156 transition(M_I, Fwd_GETX, SINK_WB_ACK) {
1157 dt_sendDataToRequestor_fromTBE;
1161 transition(M_I, {Fwd_GETS, Fwd_GET_INSTR}, SINK_WB_ACK) {
1162 dt_sendDataToRequestor_fromTBE;
1163 d2t_sendDataToL2_fromTBE;
1167 // Transitions from IS
1168 transition({IS, IS_I}, Inv, IS_I) {
1173 transition({PF_IS, PF_IS_I}, Inv, PF_IS_I) {
1178 transition(IS, Data_all_Acks, S) {
1179 u_writeDataToL1Cache;
1182 o_popIncomingResponseQueue;
1183 kd_wakeUpDependents;
1186 transition(PF_IS, Data_all_Acks, S) {
1187 u_writeDataToL1Cache;
1190 o_popIncomingResponseQueue;
1191 kd_wakeUpDependents;
1194 transition(IS_I, Data_all_Acks, I) {
1195 u_writeDataToL1Cache;
1198 o_popIncomingResponseQueue;
1199 kd_wakeUpDependents;
1202 transition(PF_IS_I, Data_all_Acks, I) {
1204 o_popIncomingResponseQueue;
1205 kd_wakeUpDependents;
1208 transition(IS, DataS_fromL1, S) {
1209 u_writeDataToL1Cache;
1213 o_popIncomingResponseQueue;
1214 kd_wakeUpDependents;
1217 transition(PF_IS, DataS_fromL1, S) {
1218 u_writeDataToL1Cache;
1221 o_popIncomingResponseQueue;
1222 kd_wakeUpDependents;
1225 transition(IS_I, DataS_fromL1, I) {
1226 u_writeDataToL1Cache;
1230 o_popIncomingResponseQueue;
1231 kd_wakeUpDependents;
1234 transition(PF_IS_I, DataS_fromL1, I) {
1237 o_popIncomingResponseQueue;
1238 kd_wakeUpDependents;
1241 // directory is blocked when sending exclusive data
1242 transition(IS_I, Data_Exclusive, E) {
1243 u_writeDataToL1Cache;
1245 jj_sendExclusiveUnblock;
1247 o_popIncomingResponseQueue;
1248 kd_wakeUpDependents;
1251 // directory is blocked when sending exclusive data
1252 transition(PF_IS_I, Data_Exclusive, E) {
1253 u_writeDataToL1Cache;
1254 jj_sendExclusiveUnblock;
1256 o_popIncomingResponseQueue;
1257 kd_wakeUpDependents;
1260 transition(IS, Data_Exclusive, E) {
1261 u_writeDataToL1Cache;
1263 jj_sendExclusiveUnblock;
1265 o_popIncomingResponseQueue;
1266 kd_wakeUpDependents;
1269 transition(PF_IS, Data_Exclusive, E) {
1270 u_writeDataToL1Cache;
1271 jj_sendExclusiveUnblock;
1274 o_popIncomingResponseQueue;
1275 kd_wakeUpDependents;
1278 // Transitions from IM
1279 transition(IM, Inv, IM) {
1284 transition({PF_IM, PF_SM}, Inv, PF_IM) {
1289 transition(IM, Data, SM) {
1290 u_writeDataToL1Cache;
1292 o_popIncomingResponseQueue;
1295 transition(PF_IM, Data, PF_SM) {
1296 u_writeDataToL1Cache;
1298 o_popIncomingResponseQueue;
1301 transition(IM, Data_all_Acks, M) {
1302 u_writeDataToL1Cache;
1304 jj_sendExclusiveUnblock;
1306 o_popIncomingResponseQueue;
1307 kd_wakeUpDependents;
1310 transition(PF_IM, Data_all_Acks, M) {
1311 u_writeDataToL1Cache;
1312 jj_sendExclusiveUnblock;
1315 o_popIncomingResponseQueue;
1316 kd_wakeUpDependents;
1319 // transitions from SM
1320 transition(SM, Inv, IM) {
1326 transition({SM, IM, PF_SM, PF_IM}, Ack) {
1328 o_popIncomingResponseQueue;
1331 transition(SM, Ack_all, M) {
1332 jj_sendExclusiveUnblock;
1335 o_popIncomingResponseQueue;
1336 kd_wakeUpDependents;
1339 transition(PF_SM, Ack_all, M) {
1340 jj_sendExclusiveUnblock;
1343 o_popIncomingResponseQueue;
1344 kd_wakeUpDependents;
1347 transition(SINK_WB_ACK, Inv){
1352 transition(SINK_WB_ACK, WB_Ack, I){
1354 o_popIncomingResponseQueue;
1355 kd_wakeUpDependents;