3 * Copyright (c) 1999-2005 Mark D. Hill and David A. Wood
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are
8 * met: redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer;
10 * redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution;
13 * neither the name of the copyright holders nor the names of its
14 * contributors may be used to endorse or promote products derived from
15 * this software without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 machine(L1Cache, "MESI Directory L1 Cache CMP")
31 : Sequencer * sequencer,
32 CacheMemory * L1IcacheMemory,
33 CacheMemory * L1DcacheMemory,
34 Prefetcher * prefetcher = 'NULL',
35 int l2_select_num_bits,
36 int l1_request_latency = 2,
37 int l1_response_latency = 2,
38 int to_l2_latency = 1,
40 bool enable_prefetch = "False"
43 // From this node's L1 cache TO the network
44 // a local L1 -> this L2 bank, currently ordered with directory forwarded requests
45 MessageBuffer requestFromL1Cache, network="To", virtual_network="0", ordered="false", vnet_type="request";
46 // a local L1 -> this L2 bank
47 MessageBuffer responseFromL1Cache, network="To", virtual_network="1", ordered="false", vnet_type="response";
48 MessageBuffer unblockFromL1Cache, network="To", virtual_network="2", ordered="false", vnet_type="unblock";
51 // To this node's L1 cache FROM the network
52 // a L2 bank -> this L1
53 MessageBuffer requestToL1Cache, network="From", virtual_network="0", ordered="false", vnet_type="request";
54 // a L2 bank -> this L1
55 MessageBuffer responseToL1Cache, network="From", virtual_network="1", ordered="false", vnet_type="response";
56 // Request Buffer for prefetches
57 MessageBuffer optionalQueue, ordered="false";
61 state_declaration(State, desc="Cache states", default="L1Cache_State_I") {
63 NP, AccessPermission:Invalid, desc="Not present in either cache";
64 I, AccessPermission:Invalid, desc="a L1 cache entry Idle";
65 S, AccessPermission:Read_Only, desc="a L1 cache entry Shared";
66 E, AccessPermission:Read_Only, desc="a L1 cache entry Exclusive";
67 M, AccessPermission:Read_Write, desc="a L1 cache entry Modified", format="!b";
70 IS, AccessPermission:Busy, desc="L1 idle, issued GETS, have not seen response yet";
71 IM, AccessPermission:Busy, desc="L1 idle, issued GETX, have not seen response yet";
72 SM, AccessPermission:Read_Only, desc="L1 idle, issued GETX, have not seen response yet";
73 IS_I, AccessPermission:Busy, desc="L1 idle, issued GETS, saw Inv before data because directory doesn't block on GETS hit";
75 M_I, AccessPermission:Busy, desc="L1 replacing, waiting for ACK";
76 SINK_WB_ACK, AccessPermission:Busy, desc="This is to sink WB_Acks from L2";
78 // Transient States in which block is being prefetched
79 PF_IS, AccessPermission:Busy, desc="Issued GETS, have not seen response yet";
80 PF_IM, AccessPermission:Busy, desc="Issued GETX, have not seen response yet";
81 PF_SM, AccessPermission:Busy, desc="Issued GETX, received data, waiting for acks";
82 PF_IS_I, AccessPermission:Busy, desc="Issued GETs, saw inv before data";
86 enumeration(Event, desc="Cache events") {
88 Load, desc="Load request from the home processor";
89 Ifetch, desc="I-fetch request from the home processor";
90 Store, desc="Store request from the home processor";
92 Inv, desc="Invalidate request from L2 bank";
94 // internal generated request
95 L1_Replacement, desc="L1 Replacement", format="!r";
98 Fwd_GETX, desc="GETX from other processor";
99 Fwd_GETS, desc="GETS from other processor";
100 Fwd_GET_INSTR, desc="GET_INSTR from other processor";
102 Data, desc="Data for processor";
103 Data_Exclusive, desc="Data for processor";
104 DataS_fromL1, desc="data for GETS request, need to unblock directory";
105 Data_all_Acks, desc="Data for processor, all acks";
107 Ack, desc="Ack for processor";
108 Ack_all, desc="Last ack for processor";
110 WB_Ack, desc="Ack for replacement";
112 PF_Load, desc="load request from prefetcher";
113 PF_Ifetch, desc="instruction fetch request from prefetcher";
114 PF_Store, desc="exclusive load request from prefetcher";
120 structure(Entry, desc="...", interface="AbstractCacheEntry" ) {
121 State CacheState, desc="cache state";
122 DataBlock DataBlk, desc="data for the block";
123 bool Dirty, default="false", desc="data is dirty";
124 bool isPrefetch, desc="Set if this block was prefetched";
128 structure(TBE, desc="...") {
129 Address Address, desc="Physical address for this TBE";
130 State TBEState, desc="Transient state";
131 DataBlock DataBlk, desc="Buffer for the data block";
132 bool Dirty, default="false", desc="data is dirty";
133 bool isPrefetch, desc="Set if this was caused by a prefetch";
134 int pendingAcks, default="0", desc="number of pending acks";
137 structure(TBETable, external="yes") {
139 void allocate(Address);
140 void deallocate(Address);
141 bool isPresent(Address);
144 TBETable L1_TBEs, template="<L1Cache_TBE>", constructor="m_number_of_TBEs";
146 MessageBuffer mandatoryQueue, ordered="false";
148 int l2_select_low_bit, default="RubySystem::getBlockSizeBits()";
150 void set_cache_entry(AbstractCacheEntry a);
151 void unset_cache_entry();
154 void wakeUpBuffers(Address a);
156 // inclusive cache returns L1 entries only
157 Entry getCacheEntry(Address addr), return_by_pointer="yes" {
158 Entry L1Dcache_entry := static_cast(Entry, "pointer", L1DcacheMemory[addr]);
159 if(is_valid(L1Dcache_entry)) {
160 return L1Dcache_entry;
163 Entry L1Icache_entry := static_cast(Entry, "pointer", L1IcacheMemory[addr]);
164 return L1Icache_entry;
167 Entry getL1DCacheEntry(Address addr), return_by_pointer="yes" {
168 Entry L1Dcache_entry := static_cast(Entry, "pointer", L1DcacheMemory[addr]);
169 return L1Dcache_entry;
172 Entry getL1ICacheEntry(Address addr), return_by_pointer="yes" {
173 Entry L1Icache_entry := static_cast(Entry, "pointer", L1IcacheMemory[addr]);
174 return L1Icache_entry;
177 State getState(TBE tbe, Entry cache_entry, Address addr) {
178 assert((L1DcacheMemory.isTagPresent(addr) && L1IcacheMemory.isTagPresent(addr)) == false);
182 } else if (is_valid(cache_entry)) {
183 return cache_entry.CacheState;
188 void setState(TBE tbe, Entry cache_entry, Address addr, State state) {
189 assert((L1DcacheMemory.isTagPresent(addr) && L1IcacheMemory.isTagPresent(addr)) == false);
193 tbe.TBEState := state;
196 if (is_valid(cache_entry)) {
197 cache_entry.CacheState := state;
201 AccessPermission getAccessPermission(Address addr) {
202 TBE tbe := L1_TBEs[addr];
204 DPRINTF(RubySlicc, "%s\n", L1Cache_State_to_permission(tbe.TBEState));
205 return L1Cache_State_to_permission(tbe.TBEState);
208 Entry cache_entry := getCacheEntry(addr);
209 if(is_valid(cache_entry)) {
210 DPRINTF(RubySlicc, "%s\n", L1Cache_State_to_permission(cache_entry.CacheState));
211 return L1Cache_State_to_permission(cache_entry.CacheState);
214 DPRINTF(RubySlicc, "%s\n", AccessPermission:NotPresent);
215 return AccessPermission:NotPresent;
218 DataBlock getDataBlock(Address addr), return_by_ref="yes" {
219 TBE tbe := L1_TBEs[addr];
224 return getCacheEntry(addr).DataBlk;
227 void setAccessPermission(Entry cache_entry, Address addr, State state) {
228 if (is_valid(cache_entry)) {
229 cache_entry.changePermission(L1Cache_State_to_permission(state));
233 Event mandatory_request_type_to_event(RubyRequestType type) {
234 if (type == RubyRequestType:LD) {
236 } else if (type == RubyRequestType:IFETCH) {
238 } else if ((type == RubyRequestType:ST) || (type == RubyRequestType:ATOMIC)) {
241 error("Invalid RubyRequestType");
245 Event prefetch_request_type_to_event(RubyRequestType type) {
246 if (type == RubyRequestType:LD) {
247 return Event:PF_Load;
248 } else if (type == RubyRequestType:IFETCH) {
249 return Event:PF_Ifetch;
250 } else if ((type == RubyRequestType:ST) ||
251 (type == RubyRequestType:ATOMIC)) {
252 return Event:PF_Store;
254 error("Invalid RubyRequestType");
258 int getPendingAcks(TBE tbe) {
259 return tbe.pendingAcks;
262 out_port(requestIntraChipL1Network_out, RequestMsg, requestFromL1Cache);
263 out_port(responseIntraChipL1Network_out, ResponseMsg, responseFromL1Cache);
264 out_port(unblockNetwork_out, ResponseMsg, unblockFromL1Cache);
265 out_port(optionalQueue_out, RubyRequest, optionalQueue);
268 // Prefetch queue between the controller and the prefetcher
269 // As per Spracklen et al. (HPCA 2005), the prefetch queue should be
270 // implemented as a LIFO structure. The structure would allow for fast
271 // searches of all entries in the queue, not just the head msg. All
272 // msgs in the structure can be invalidated if a demand miss matches.
273 in_port(optionalQueue_in, RubyRequest, optionalQueue, desc="...", rank = 3) {
274 if (optionalQueue_in.isReady()) {
275 peek(optionalQueue_in, RubyRequest) {
276 // Instruction Prefetch
277 if (in_msg.Type == RubyRequestType:IFETCH) {
278 Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
279 if (is_valid(L1Icache_entry)) {
280 // The block to be prefetched is already present in the
281 // cache. We should drop this request.
282 trigger(prefetch_request_type_to_event(in_msg.Type),
284 L1Icache_entry, L1_TBEs[in_msg.LineAddress]);
287 // Check to see if it is in the OTHER L1
288 Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
289 if (is_valid(L1Dcache_entry)) {
290 // The block is in the wrong L1 cache. We should drop
292 trigger(prefetch_request_type_to_event(in_msg.Type),
294 L1Dcache_entry, L1_TBEs[in_msg.LineAddress]);
297 if (L1IcacheMemory.cacheAvail(in_msg.LineAddress)) {
298 // L1 does't have the line, but we have space for it
299 // in the L1 so let's see if the L2 has it
300 trigger(prefetch_request_type_to_event(in_msg.Type),
302 L1Icache_entry, L1_TBEs[in_msg.LineAddress]);
304 // No room in the L1, so we need to make room in the L1
305 trigger(Event:L1_Replacement,
306 L1IcacheMemory.cacheProbe(in_msg.LineAddress),
307 getL1ICacheEntry(L1IcacheMemory.cacheProbe(in_msg.LineAddress)),
308 L1_TBEs[L1IcacheMemory.cacheProbe(in_msg.LineAddress)]);
312 Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
313 if (is_valid(L1Dcache_entry)) {
314 // The block to be prefetched is already present in the
315 // cache. We should drop this request.
316 trigger(prefetch_request_type_to_event(in_msg.Type),
318 L1Dcache_entry, L1_TBEs[in_msg.LineAddress]);
321 // Check to see if it is in the OTHER L1
322 Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
323 if (is_valid(L1Icache_entry)) {
324 // The block is in the wrong L1. Just drop the prefetch
326 trigger(prefetch_request_type_to_event(in_msg.Type),
328 L1Icache_entry, L1_TBEs[in_msg.LineAddress]);
331 if (L1DcacheMemory.cacheAvail(in_msg.LineAddress)) {
332 // L1 does't have the line, but we have space for it in
333 // the L1 let's see if the L2 has it
334 trigger(prefetch_request_type_to_event(in_msg.Type),
336 L1Dcache_entry, L1_TBEs[in_msg.LineAddress]);
338 // No room in the L1, so we need to make room in the L1
339 trigger(Event:L1_Replacement,
340 L1DcacheMemory.cacheProbe(in_msg.LineAddress),
341 getL1DCacheEntry(L1DcacheMemory.cacheProbe(in_msg.LineAddress)),
342 L1_TBEs[L1DcacheMemory.cacheProbe(in_msg.LineAddress)]);
349 // Response IntraChip L1 Network - response msg to this L1 cache
350 in_port(responseIntraChipL1Network_in, ResponseMsg, responseToL1Cache, rank = 2) {
351 if (responseIntraChipL1Network_in.isReady()) {
352 peek(responseIntraChipL1Network_in, ResponseMsg, block_on="Address") {
353 assert(in_msg.Destination.isElement(machineID));
355 Entry cache_entry := getCacheEntry(in_msg.Address);
356 TBE tbe := L1_TBEs[in_msg.Address];
358 if(in_msg.Type == CoherenceResponseType:DATA_EXCLUSIVE) {
359 trigger(Event:Data_Exclusive, in_msg.Address, cache_entry, tbe);
360 } else if(in_msg.Type == CoherenceResponseType:DATA) {
361 if ((getState(tbe, cache_entry, in_msg.Address) == State:IS ||
362 getState(tbe, cache_entry, in_msg.Address) == State:IS_I ||
363 getState(tbe, cache_entry, in_msg.Address) == State:PF_IS ||
364 getState(tbe, cache_entry, in_msg.Address) == State:PF_IS_I) &&
365 machineIDToMachineType(in_msg.Sender) == MachineType:L1Cache) {
367 trigger(Event:DataS_fromL1, in_msg.Address, cache_entry, tbe);
369 } else if ( (getPendingAcks(tbe) - in_msg.AckCount) == 0 ) {
370 trigger(Event:Data_all_Acks, in_msg.Address, cache_entry, tbe);
372 trigger(Event:Data, in_msg.Address, cache_entry, tbe);
374 } else if (in_msg.Type == CoherenceResponseType:ACK) {
375 if ( (getPendingAcks(tbe) - in_msg.AckCount) == 0 ) {
376 trigger(Event:Ack_all, in_msg.Address, cache_entry, tbe);
378 trigger(Event:Ack, in_msg.Address, cache_entry, tbe);
380 } else if (in_msg.Type == CoherenceResponseType:WB_ACK) {
381 trigger(Event:WB_Ack, in_msg.Address, cache_entry, tbe);
383 error("Invalid L1 response type");
389 // Request InterChip network - request from this L1 cache to the shared L2
390 in_port(requestIntraChipL1Network_in, RequestMsg, requestToL1Cache, rank = 1) {
391 if(requestIntraChipL1Network_in.isReady()) {
392 peek(requestIntraChipL1Network_in, RequestMsg, block_on="Address") {
393 assert(in_msg.Destination.isElement(machineID));
395 Entry cache_entry := getCacheEntry(in_msg.Address);
396 TBE tbe := L1_TBEs[in_msg.Address];
398 if (in_msg.Type == CoherenceRequestType:INV) {
399 trigger(Event:Inv, in_msg.Address, cache_entry, tbe);
400 } else if (in_msg.Type == CoherenceRequestType:GETX || in_msg.Type == CoherenceRequestType:UPGRADE) {
401 // upgrade transforms to GETX due to race
402 trigger(Event:Fwd_GETX, in_msg.Address, cache_entry, tbe);
403 } else if (in_msg.Type == CoherenceRequestType:GETS) {
404 trigger(Event:Fwd_GETS, in_msg.Address, cache_entry, tbe);
405 } else if (in_msg.Type == CoherenceRequestType:GET_INSTR) {
406 trigger(Event:Fwd_GET_INSTR, in_msg.Address, cache_entry, tbe);
408 error("Invalid forwarded request type");
414 // Mandatory Queue betweens Node's CPU and it's L1 caches
415 in_port(mandatoryQueue_in, RubyRequest, mandatoryQueue, desc="...", rank = 0) {
416 if (mandatoryQueue_in.isReady()) {
417 peek(mandatoryQueue_in, RubyRequest, block_on="LineAddress") {
419 // Check for data access to blocks in I-cache and ifetchs to blocks in D-cache
421 if (in_msg.Type == RubyRequestType:IFETCH) {
422 // ** INSTRUCTION ACCESS ***
424 Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
425 if (is_valid(L1Icache_entry)) {
426 // The tag matches for the L1, so the L1 asks the L2 for it.
427 trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress,
428 L1Icache_entry, L1_TBEs[in_msg.LineAddress]);
431 // Check to see if it is in the OTHER L1
432 Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
433 if (is_valid(L1Dcache_entry)) {
434 // The block is in the wrong L1, put the request on the queue to the shared L2
435 trigger(Event:L1_Replacement, in_msg.LineAddress,
436 L1Dcache_entry, L1_TBEs[in_msg.LineAddress]);
439 if (L1IcacheMemory.cacheAvail(in_msg.LineAddress)) {
440 // L1 does't have the line, but we have space for it in the L1 so let's see if the L2 has it
441 trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress,
442 L1Icache_entry, L1_TBEs[in_msg.LineAddress]);
444 // No room in the L1, so we need to make room in the L1
445 trigger(Event:L1_Replacement, L1IcacheMemory.cacheProbe(in_msg.LineAddress),
446 getL1ICacheEntry(L1IcacheMemory.cacheProbe(in_msg.LineAddress)),
447 L1_TBEs[L1IcacheMemory.cacheProbe(in_msg.LineAddress)]);
452 // *** DATA ACCESS ***
453 Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
454 if (is_valid(L1Dcache_entry)) {
455 // The tag matches for the L1, so the L1 ask the L2 for it
456 trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress,
457 L1Dcache_entry, L1_TBEs[in_msg.LineAddress]);
460 // Check to see if it is in the OTHER L1
461 Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
462 if (is_valid(L1Icache_entry)) {
463 // The block is in the wrong L1, put the request on the queue to the shared L2
464 trigger(Event:L1_Replacement, in_msg.LineAddress,
465 L1Icache_entry, L1_TBEs[in_msg.LineAddress]);
468 if (L1DcacheMemory.cacheAvail(in_msg.LineAddress)) {
469 // L1 does't have the line, but we have space for it in the L1 let's see if the L2 has it
470 trigger(mandatory_request_type_to_event(in_msg.Type), in_msg.LineAddress,
471 L1Dcache_entry, L1_TBEs[in_msg.LineAddress]);
473 // No room in the L1, so we need to make room in the L1
474 trigger(Event:L1_Replacement, L1DcacheMemory.cacheProbe(in_msg.LineAddress),
475 getL1DCacheEntry(L1DcacheMemory.cacheProbe(in_msg.LineAddress)),
476 L1_TBEs[L1DcacheMemory.cacheProbe(in_msg.LineAddress)]);
484 void enqueuePrefetch(Address address, RubyRequestType type) {
485 enqueue(optionalQueue_out, RubyRequest, latency=1) {
486 out_msg.LineAddress := address;
487 out_msg.Type := type;
488 out_msg.AccessMode := RubyAccessMode:Supervisor;
493 action(a_issueGETS, "a", desc="Issue GETS") {
494 peek(mandatoryQueue_in, RubyRequest) {
495 enqueue(requestIntraChipL1Network_out, RequestMsg, latency=l1_request_latency) {
496 out_msg.Address := address;
497 out_msg.Type := CoherenceRequestType:GETS;
498 out_msg.Requestor := machineID;
499 out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
500 l2_select_low_bit, l2_select_num_bits));
501 DPRINTF(RubySlicc, "address: %s, destination: %s\n",
502 address, out_msg.Destination);
503 out_msg.MessageSize := MessageSizeType:Control;
504 out_msg.Prefetch := in_msg.Prefetch;
505 out_msg.AccessMode := in_msg.AccessMode;
510 action(pa_issuePfGETS, "pa", desc="Issue prefetch GETS") {
511 peek(optionalQueue_in, RubyRequest) {
512 enqueue(requestIntraChipL1Network_out, RequestMsg,
513 latency=l1_request_latency) {
514 out_msg.Address := address;
515 out_msg.Type := CoherenceRequestType:GETS;
516 out_msg.Requestor := machineID;
517 out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
518 l2_select_low_bit, l2_select_num_bits));
519 DPRINTF(RubySlicc, "address: %s, destination: %s\n",
520 address, out_msg.Destination);
521 out_msg.MessageSize := MessageSizeType:Control;
522 out_msg.Prefetch := in_msg.Prefetch;
523 out_msg.AccessMode := in_msg.AccessMode;
528 action(ai_issueGETINSTR, "ai", desc="Issue GETINSTR") {
529 peek(mandatoryQueue_in, RubyRequest) {
530 enqueue(requestIntraChipL1Network_out, RequestMsg, latency=l1_request_latency) {
531 out_msg.Address := address;
532 out_msg.Type := CoherenceRequestType:GET_INSTR;
533 out_msg.Requestor := machineID;
534 out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
535 l2_select_low_bit, l2_select_num_bits));
536 DPRINTF(RubySlicc, "address: %s, destination: %s\n",
537 address, out_msg.Destination);
538 out_msg.MessageSize := MessageSizeType:Control;
539 out_msg.Prefetch := in_msg.Prefetch;
540 out_msg.AccessMode := in_msg.AccessMode;
545 action(pai_issuePfGETINSTR, "pai",
546 desc="Issue GETINSTR for prefetch request") {
547 peek(optionalQueue_in, RubyRequest) {
548 enqueue(requestIntraChipL1Network_out, RequestMsg,
549 latency=l1_request_latency) {
550 out_msg.Address := address;
551 out_msg.Type := CoherenceRequestType:GET_INSTR;
552 out_msg.Requestor := machineID;
553 out_msg.Destination.add(
554 mapAddressToRange(address, MachineType:L2Cache,
555 l2_select_low_bit, l2_select_num_bits));
556 out_msg.MessageSize := MessageSizeType:Control;
557 out_msg.Prefetch := in_msg.Prefetch;
558 out_msg.AccessMode := in_msg.AccessMode;
560 DPRINTF(RubySlicc, "address: %s, destination: %s\n",
561 address, out_msg.Destination);
566 action(b_issueGETX, "b", desc="Issue GETX") {
567 peek(mandatoryQueue_in, RubyRequest) {
568 enqueue(requestIntraChipL1Network_out, RequestMsg, latency=l1_request_latency) {
569 out_msg.Address := address;
570 out_msg.Type := CoherenceRequestType:GETX;
571 out_msg.Requestor := machineID;
572 DPRINTF(RubySlicc, "%s\n", machineID);
573 out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
574 l2_select_low_bit, l2_select_num_bits));
575 DPRINTF(RubySlicc, "address: %s, destination: %s\n",
576 address, out_msg.Destination);
577 out_msg.MessageSize := MessageSizeType:Control;
578 out_msg.Prefetch := in_msg.Prefetch;
579 out_msg.AccessMode := in_msg.AccessMode;
584 action(pb_issuePfGETX, "pb", desc="Issue prefetch GETX") {
585 peek(optionalQueue_in, RubyRequest) {
586 enqueue(requestIntraChipL1Network_out, RequestMsg,
587 latency=l1_request_latency) {
588 out_msg.Address := address;
589 out_msg.Type := CoherenceRequestType:GETX;
590 out_msg.Requestor := machineID;
591 DPRINTF(RubySlicc, "%s\n", machineID);
593 out_msg.Destination.add(mapAddressToRange(address,
596 l2_select_num_bits));
598 DPRINTF(RubySlicc, "address: %s, destination: %s\n",
599 address, out_msg.Destination);
600 out_msg.MessageSize := MessageSizeType:Control;
601 out_msg.Prefetch := in_msg.Prefetch;
602 out_msg.AccessMode := in_msg.AccessMode;
607 action(c_issueUPGRADE, "c", desc="Issue GETX") {
608 peek(mandatoryQueue_in, RubyRequest) {
609 enqueue(requestIntraChipL1Network_out, RequestMsg, latency= l1_request_latency) {
610 out_msg.Address := address;
611 out_msg.Type := CoherenceRequestType:UPGRADE;
612 out_msg.Requestor := machineID;
613 out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
614 l2_select_low_bit, l2_select_num_bits));
615 DPRINTF(RubySlicc, "address: %s, destination: %s\n",
616 address, out_msg.Destination);
617 out_msg.MessageSize := MessageSizeType:Control;
618 out_msg.Prefetch := in_msg.Prefetch;
619 out_msg.AccessMode := in_msg.AccessMode;
624 action(d_sendDataToRequestor, "d", desc="send data to requestor") {
625 peek(requestIntraChipL1Network_in, RequestMsg) {
626 enqueue(responseIntraChipL1Network_out, ResponseMsg, latency=l1_response_latency) {
627 assert(is_valid(cache_entry));
628 out_msg.Address := address;
629 out_msg.Type := CoherenceResponseType:DATA;
630 out_msg.DataBlk := cache_entry.DataBlk;
631 out_msg.Dirty := cache_entry.Dirty;
632 out_msg.Sender := machineID;
633 out_msg.Destination.add(in_msg.Requestor);
634 out_msg.MessageSize := MessageSizeType:Response_Data;
639 action(d2_sendDataToL2, "d2", desc="send data to the L2 cache because of M downgrade") {
640 enqueue(responseIntraChipL1Network_out, ResponseMsg, latency=l1_response_latency) {
641 assert(is_valid(cache_entry));
642 out_msg.Address := address;
643 out_msg.Type := CoherenceResponseType:DATA;
644 out_msg.DataBlk := cache_entry.DataBlk;
645 out_msg.Dirty := cache_entry.Dirty;
646 out_msg.Sender := machineID;
647 out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
648 l2_select_low_bit, l2_select_num_bits));
649 out_msg.MessageSize := MessageSizeType:Response_Data;
653 action(dt_sendDataToRequestor_fromTBE, "dt", desc="send data to requestor") {
654 peek(requestIntraChipL1Network_in, RequestMsg) {
655 enqueue(responseIntraChipL1Network_out, ResponseMsg, latency=l1_response_latency) {
656 assert(is_valid(tbe));
657 out_msg.Address := address;
658 out_msg.Type := CoherenceResponseType:DATA;
659 out_msg.DataBlk := tbe.DataBlk;
660 out_msg.Dirty := tbe.Dirty;
661 out_msg.Sender := machineID;
662 out_msg.Destination.add(in_msg.Requestor);
663 out_msg.MessageSize := MessageSizeType:Response_Data;
668 action(d2t_sendDataToL2_fromTBE, "d2t", desc="send data to the L2 cache") {
669 enqueue(responseIntraChipL1Network_out, ResponseMsg, latency=l1_response_latency) {
670 assert(is_valid(tbe));
671 out_msg.Address := address;
672 out_msg.Type := CoherenceResponseType:DATA;
673 out_msg.DataBlk := tbe.DataBlk;
674 out_msg.Dirty := tbe.Dirty;
675 out_msg.Sender := machineID;
676 out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
677 l2_select_low_bit, l2_select_num_bits));
678 out_msg.MessageSize := MessageSizeType:Response_Data;
682 action(e_sendAckToRequestor, "e", desc="send invalidate ack to requestor (could be L2 or L1)") {
683 peek(requestIntraChipL1Network_in, RequestMsg) {
684 enqueue(responseIntraChipL1Network_out, ResponseMsg, latency=l1_response_latency) {
685 out_msg.Address := address;
686 out_msg.Type := CoherenceResponseType:ACK;
687 out_msg.Sender := machineID;
688 out_msg.Destination.add(in_msg.Requestor);
689 out_msg.MessageSize := MessageSizeType:Response_Control;
694 action(f_sendDataToL2, "f", desc="send data to the L2 cache") {
695 enqueue(responseIntraChipL1Network_out, ResponseMsg, latency=l1_response_latency) {
696 assert(is_valid(cache_entry));
697 out_msg.Address := address;
698 out_msg.Type := CoherenceResponseType:DATA;
699 out_msg.DataBlk := cache_entry.DataBlk;
700 out_msg.Dirty := cache_entry.Dirty;
701 out_msg.Sender := machineID;
702 out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
703 l2_select_low_bit, l2_select_num_bits));
704 out_msg.MessageSize := MessageSizeType:Writeback_Data;
708 action(ft_sendDataToL2_fromTBE, "ft", desc="send data to the L2 cache") {
709 enqueue(responseIntraChipL1Network_out, ResponseMsg, latency=l1_response_latency) {
710 assert(is_valid(tbe));
711 out_msg.Address := address;
712 out_msg.Type := CoherenceResponseType:DATA;
713 out_msg.DataBlk := tbe.DataBlk;
714 out_msg.Dirty := tbe.Dirty;
715 out_msg.Sender := machineID;
716 out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
717 l2_select_low_bit, l2_select_num_bits));
718 out_msg.MessageSize := MessageSizeType:Writeback_Data;
722 action(fi_sendInvAck, "fi", desc="send data to the L2 cache") {
723 peek(requestIntraChipL1Network_in, RequestMsg) {
724 enqueue(responseIntraChipL1Network_out, ResponseMsg, latency=l1_response_latency) {
725 out_msg.Address := address;
726 out_msg.Type := CoherenceResponseType:ACK;
727 out_msg.Sender := machineID;
728 out_msg.Destination.add(in_msg.Requestor);
729 out_msg.MessageSize := MessageSizeType:Response_Control;
730 out_msg.AckCount := 1;
735 action(forward_eviction_to_cpu, "\cc", desc="sends eviction information to the processor") {
736 if (send_evictions) {
737 DPRINTF(RubySlicc, "Sending invalidation for %s to the CPU\n", address);
738 sequencer.evictionCallback(address);
742 action(g_issuePUTX, "g", desc="send data to the L2 cache") {
743 enqueue(requestIntraChipL1Network_out, RequestMsg, latency=l1_response_latency) {
744 assert(is_valid(cache_entry));
745 out_msg.Address := address;
746 out_msg.Type := CoherenceRequestType:PUTX;
747 out_msg.DataBlk := cache_entry.DataBlk;
748 out_msg.Dirty := cache_entry.Dirty;
749 out_msg.Requestor:= machineID;
750 out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
751 l2_select_low_bit, l2_select_num_bits));
752 if (cache_entry.Dirty) {
753 out_msg.MessageSize := MessageSizeType:Writeback_Data;
755 out_msg.MessageSize := MessageSizeType:Writeback_Control;
760 action(j_sendUnblock, "j", desc="send unblock to the L2 cache") {
761 enqueue(unblockNetwork_out, ResponseMsg, latency=to_l2_latency) {
762 out_msg.Address := address;
763 out_msg.Type := CoherenceResponseType:UNBLOCK;
764 out_msg.Sender := machineID;
765 out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
766 l2_select_low_bit, l2_select_num_bits));
767 out_msg.MessageSize := MessageSizeType:Response_Control;
768 DPRINTF(RubySlicc, "%s\n", address);
772 action(jj_sendExclusiveUnblock, "\j", desc="send unblock to the L2 cache") {
773 enqueue(unblockNetwork_out, ResponseMsg, latency=to_l2_latency) {
774 out_msg.Address := address;
775 out_msg.Type := CoherenceResponseType:EXCLUSIVE_UNBLOCK;
776 out_msg.Sender := machineID;
777 out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
778 l2_select_low_bit, l2_select_num_bits));
779 out_msg.MessageSize := MessageSizeType:Response_Control;
780 DPRINTF(RubySlicc, "%s\n", address);
785 action(h_load_hit, "h", desc="If not prefetch, notify sequencer the load completed.") {
786 assert(is_valid(cache_entry));
787 DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
788 sequencer.readCallback(address, cache_entry.DataBlk);
791 action(hh_store_hit, "\h", desc="If not prefetch, notify sequencer that store completed.") {
792 assert(is_valid(cache_entry));
793 DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
794 sequencer.writeCallback(address, cache_entry.DataBlk);
795 cache_entry.Dirty := true;
798 action(i_allocateTBE, "i", desc="Allocate TBE (isPrefetch=0, number of invalidates=0)") {
799 check_allocate(L1_TBEs);
800 assert(is_valid(cache_entry));
801 L1_TBEs.allocate(address);
802 set_tbe(L1_TBEs[address]);
803 tbe.isPrefetch := false;
804 tbe.Dirty := cache_entry.Dirty;
805 tbe.DataBlk := cache_entry.DataBlk;
808 action(k_popMandatoryQueue, "k", desc="Pop mandatory queue.") {
809 mandatoryQueue_in.dequeue();
812 action(l_popRequestQueue, "l", desc="Pop incoming request queue and profile the delay within this virtual network") {
813 profileMsgDelay(2, requestIntraChipL1Network_in.dequeue_getDelayCycles());
816 action(o_popIncomingResponseQueue, "o", desc="Pop Incoming Response queue and profile the delay within this virtual network") {
817 profileMsgDelay(1, responseIntraChipL1Network_in.dequeue_getDelayCycles());
820 action(s_deallocateTBE, "s", desc="Deallocate TBE") {
821 L1_TBEs.deallocate(address);
825 action(u_writeDataToL1Cache, "u", desc="Write data to cache") {
826 peek(responseIntraChipL1Network_in, ResponseMsg) {
827 assert(is_valid(cache_entry));
828 cache_entry.DataBlk := in_msg.DataBlk;
829 cache_entry.Dirty := in_msg.Dirty;
833 action(q_updateAckCount, "q", desc="Update ack count") {
834 peek(responseIntraChipL1Network_in, ResponseMsg) {
835 assert(is_valid(tbe));
836 tbe.pendingAcks := tbe.pendingAcks - in_msg.AckCount;
837 APPEND_TRANSITION_COMMENT(in_msg.AckCount);
838 APPEND_TRANSITION_COMMENT(" p: ");
839 APPEND_TRANSITION_COMMENT(tbe.pendingAcks);
843 action(ff_deallocateL1CacheBlock, "\f", desc="Deallocate L1 cache block. Sets the cache to not present, allowing a replacement in parallel with a fetch.") {
844 if (L1DcacheMemory.isTagPresent(address)) {
845 L1DcacheMemory.deallocate(address);
847 L1IcacheMemory.deallocate(address);
852 action(oo_allocateL1DCacheBlock, "\o", desc="Set L1 D-cache tag equal to tag of block B.") {
853 if (is_invalid(cache_entry)) {
854 set_cache_entry(L1DcacheMemory.allocate(address, new Entry));
858 action(pp_allocateL1ICacheBlock, "\p", desc="Set L1 I-cache tag equal to tag of block B.") {
859 if (is_invalid(cache_entry)) {
860 set_cache_entry(L1IcacheMemory.allocate(address, new Entry));
864 action(z_stallAndWaitMandatoryQueue, "\z", desc="recycle L1 request queue") {
865 stall_and_wait(mandatoryQueue_in, address);
868 action(kd_wakeUpDependents, "kd", desc="wake-up dependents") {
869 wakeUpBuffers(address);
872 action(uu_profileInstMiss, "\ui", desc="Profile the demand miss") {
873 peek(mandatoryQueue_in, RubyRequest) {
874 L1IcacheMemory.profileMiss(in_msg);
878 action(uu_profileDataMiss, "\ud", desc="Profile the demand miss") {
879 peek(mandatoryQueue_in, RubyRequest) {
880 L1DcacheMemory.profileMiss(in_msg);
884 action(po_observeMiss, "\po", desc="Inform the prefetcher about the miss") {
885 peek(mandatoryQueue_in, RubyRequest) {
886 if (enable_prefetch) {
887 prefetcher.observeMiss(in_msg.LineAddress, in_msg.Type);
892 action(ppm_observePfMiss, "\ppm",
893 desc="Inform the prefetcher about the partial miss") {
894 peek(mandatoryQueue_in, RubyRequest) {
895 prefetcher.observePfMiss(in_msg.LineAddress);
899 action(pq_popPrefetchQueue, "\pq", desc="Pop the prefetch request queue") {
900 optionalQueue_in.dequeue();
903 action(mp_markPrefetched, "mp", desc="Write data from response queue to cache") {
904 assert(is_valid(cache_entry));
905 cache_entry.isPrefetch := true;
909 //*****************************************************
911 //*****************************************************
913 // Transitions for Load/Store/Replacement/WriteBack from transient states
914 transition({IS, IM, IS_I, M_I, SM, SINK_WB_ACK}, {Load, Ifetch, Store, L1_Replacement}) {
915 z_stallAndWaitMandatoryQueue;
918 transition({PF_IS, PF_IS_I}, {Store, L1_Replacement}) {
919 z_stallAndWaitMandatoryQueue;
922 transition({PF_IM, PF_SM}, {Load, Ifetch, L1_Replacement}) {
923 z_stallAndWaitMandatoryQueue;
926 // Transitions from Idle
927 transition({NP,I}, L1_Replacement) {
928 ff_deallocateL1CacheBlock;
931 transition({S,E,M,IS,IM,SM,IS_I,M_I,SINK_WB_ACK,PF_IS,PF_IM},
932 {PF_Load, PF_Store}) {
936 transition({NP,I}, Load, IS) {
937 oo_allocateL1DCacheBlock;
945 transition({NP,I}, PF_Load, PF_IS) {
946 oo_allocateL1DCacheBlock;
952 transition(PF_IS, Load, IS) {
958 transition(PF_IS_I, Load, IS_I) {
964 transition({NP,I}, Ifetch, IS) {
965 pp_allocateL1ICacheBlock;
973 transition({NP,I}, PF_Ifetch, PF_IS) {
974 pp_allocateL1ICacheBlock;
980 // We proactively assume that the prefetch is in to
981 // the instruction cache
982 transition(PF_IS, Ifetch, IS) {
988 transition({NP,I}, Store, IM) {
989 oo_allocateL1DCacheBlock;
997 transition({NP,I}, PF_Store, PF_IM) {
998 oo_allocateL1DCacheBlock;
1001 pq_popPrefetchQueue;
1004 transition(PF_IM, Store, IM) {
1007 k_popMandatoryQueue;
1010 transition(PF_SM, Store, SM) {
1013 k_popMandatoryQueue;
1016 transition({NP, I}, Inv) {
1021 // Transitions from Shared
1022 transition(S, {Load,Ifetch}) {
1024 k_popMandatoryQueue;
1027 transition(S, Store, SM) {
1031 k_popMandatoryQueue;
1034 transition(S, L1_Replacement, I) {
1035 forward_eviction_to_cpu;
1036 ff_deallocateL1CacheBlock;
1039 transition(S, Inv, I) {
1040 forward_eviction_to_cpu;
1045 // Transitions from Exclusive
1047 transition(E, {Load, Ifetch}) {
1049 k_popMandatoryQueue;
1052 transition(E, Store, M) {
1054 k_popMandatoryQueue;
1057 transition(E, L1_Replacement, M_I) {
1058 // silent E replacement??
1059 forward_eviction_to_cpu;
1061 g_issuePUTX; // send data, but hold in case forwarded request
1062 ff_deallocateL1CacheBlock;
1065 transition(E, Inv, I) {
1067 forward_eviction_to_cpu;
1072 transition(E, Fwd_GETX, I) {
1073 forward_eviction_to_cpu;
1074 d_sendDataToRequestor;
1078 transition(E, {Fwd_GETS, Fwd_GET_INSTR}, S) {
1079 d_sendDataToRequestor;
1084 // Transitions from Modified
1085 transition(M, {Load, Ifetch}) {
1087 k_popMandatoryQueue;
1090 transition(M, Store) {
1092 k_popMandatoryQueue;
1095 transition(M, L1_Replacement, M_I) {
1096 forward_eviction_to_cpu;
1098 g_issuePUTX; // send data, but hold in case forwarded request
1099 ff_deallocateL1CacheBlock;
1102 transition(M_I, WB_Ack, I) {
1104 o_popIncomingResponseQueue;
1105 kd_wakeUpDependents;
1108 transition(M, Inv, I) {
1109 forward_eviction_to_cpu;
1114 transition(M_I, Inv, SINK_WB_ACK) {
1115 ft_sendDataToL2_fromTBE;
1119 transition(M, Fwd_GETX, I) {
1120 forward_eviction_to_cpu;
1121 d_sendDataToRequestor;
1125 transition(M, {Fwd_GETS, Fwd_GET_INSTR}, S) {
1126 d_sendDataToRequestor;
1131 transition(M_I, Fwd_GETX, SINK_WB_ACK) {
1132 dt_sendDataToRequestor_fromTBE;
1136 transition(M_I, {Fwd_GETS, Fwd_GET_INSTR}, SINK_WB_ACK) {
1137 dt_sendDataToRequestor_fromTBE;
1138 d2t_sendDataToL2_fromTBE;
1142 // Transitions from IS
1143 transition({IS, IS_I}, Inv, IS_I) {
1148 transition({PF_IS, PF_IS_I}, Inv, PF_IS_I) {
1153 transition(IS, Data_all_Acks, S) {
1154 u_writeDataToL1Cache;
1157 o_popIncomingResponseQueue;
1158 kd_wakeUpDependents;
1161 transition(PF_IS, Data_all_Acks, S) {
1162 u_writeDataToL1Cache;
1165 o_popIncomingResponseQueue;
1166 kd_wakeUpDependents;
1169 transition(IS_I, Data_all_Acks, I) {
1170 u_writeDataToL1Cache;
1173 o_popIncomingResponseQueue;
1174 kd_wakeUpDependents;
1177 transition(PF_IS_I, Data_all_Acks, I) {
1179 o_popIncomingResponseQueue;
1180 kd_wakeUpDependents;
1183 transition(IS, DataS_fromL1, S) {
1184 u_writeDataToL1Cache;
1188 o_popIncomingResponseQueue;
1189 kd_wakeUpDependents;
1192 transition(PF_IS, DataS_fromL1, S) {
1193 u_writeDataToL1Cache;
1196 o_popIncomingResponseQueue;
1197 kd_wakeUpDependents;
1200 transition(IS_I, DataS_fromL1, I) {
1201 u_writeDataToL1Cache;
1205 o_popIncomingResponseQueue;
1206 kd_wakeUpDependents;
1209 transition(PF_IS_I, DataS_fromL1, I) {
1212 o_popIncomingResponseQueue;
1213 kd_wakeUpDependents;
1216 // directory is blocked when sending exclusive data
1217 transition(IS_I, Data_Exclusive, E) {
1218 u_writeDataToL1Cache;
1220 jj_sendExclusiveUnblock;
1222 o_popIncomingResponseQueue;
1223 kd_wakeUpDependents;
1226 // directory is blocked when sending exclusive data
1227 transition(PF_IS_I, Data_Exclusive, E) {
1228 u_writeDataToL1Cache;
1229 jj_sendExclusiveUnblock;
1231 o_popIncomingResponseQueue;
1232 kd_wakeUpDependents;
1235 transition(IS, Data_Exclusive, E) {
1236 u_writeDataToL1Cache;
1238 jj_sendExclusiveUnblock;
1240 o_popIncomingResponseQueue;
1241 kd_wakeUpDependents;
1244 transition(PF_IS, Data_Exclusive, E) {
1245 u_writeDataToL1Cache;
1246 jj_sendExclusiveUnblock;
1249 o_popIncomingResponseQueue;
1250 kd_wakeUpDependents;
1253 // Transitions from IM
1254 transition({IM, SM}, Inv, IM) {
1259 transition({PF_IM, PF_SM}, Inv, PF_IM) {
1264 transition(IM, Data, SM) {
1265 u_writeDataToL1Cache;
1267 o_popIncomingResponseQueue;
1270 transition(PF_IM, Data, PF_SM) {
1271 u_writeDataToL1Cache;
1273 o_popIncomingResponseQueue;
1276 transition(IM, Data_all_Acks, M) {
1277 u_writeDataToL1Cache;
1279 jj_sendExclusiveUnblock;
1281 o_popIncomingResponseQueue;
1282 kd_wakeUpDependents;
1285 transition(PF_IM, Data_all_Acks, M) {
1286 u_writeDataToL1Cache;
1287 jj_sendExclusiveUnblock;
1290 o_popIncomingResponseQueue;
1291 kd_wakeUpDependents;
1294 // transitions from SM
1295 transition({SM, IM, PF_SM, PF_IM}, Ack) {
1297 o_popIncomingResponseQueue;
1300 transition(SM, Ack_all, M) {
1301 jj_sendExclusiveUnblock;
1304 o_popIncomingResponseQueue;
1305 kd_wakeUpDependents;
1308 transition(PF_SM, Ack_all, M) {
1309 jj_sendExclusiveUnblock;
1312 o_popIncomingResponseQueue;
1313 kd_wakeUpDependents;
1316 transition(SINK_WB_ACK, Inv){
1321 transition(SINK_WB_ACK, WB_Ack, I){
1323 o_popIncomingResponseQueue;
1324 kd_wakeUpDependents;