2 * Copyright (c) 1999-2013 Mark D. Hill and David A. Wood
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 machine(MachineType:L1Cache, "Directory protocol")
30 : Sequencer * sequencer;
31 CacheMemory * L1Icache;
32 CacheMemory * L1Dcache;
33 int l2_select_num_bits;
34 Cycles request_latency := 2;
35 Cycles use_timeout_latency := 50;
39 // From this node's L1 cache TO the network
40 // a local L1 -> this L2 bank, currently ordered with directory forwarded requests
41 MessageBuffer * requestFromL1Cache, network="To", virtual_network="0",
43 // a local L1 -> this L2 bank
44 MessageBuffer * responseFromL1Cache, network="To", virtual_network="2",
47 // To this node's L1 cache FROM the network
48 // a L2 bank -> this L1
49 MessageBuffer * requestToL1Cache, network="From", virtual_network="0",
51 // a L2 bank -> this L1
52 MessageBuffer * responseToL1Cache, network="From", virtual_network="2",
55 MessageBuffer * triggerQueue;
57 MessageBuffer * mandatoryQueue;
60 state_declaration(State, desc="Cache states", default="L1Cache_State_I") {
62 I, AccessPermission:Invalid, desc="Idle";
63 S, AccessPermission:Read_Only, desc="Shared";
64 O, AccessPermission:Read_Only, desc="Owned";
65 M, AccessPermission:Read_Only, desc="Modified (dirty)";
66 M_W, AccessPermission:Read_Only, desc="Modified (dirty)";
67 MM, AccessPermission:Read_Write, desc="Modified (dirty and locally modified)";
68 MM_W, AccessPermission:Read_Write, desc="Modified (dirty and locally modified)";
71 IM, AccessPermission:Busy, "IM", desc="Issued GetX";
72 SM, AccessPermission:Read_Only, "SM", desc="Issued GetX, we still have an old copy of the line";
73 OM, AccessPermission:Read_Only, "SM", desc="Issued GetX, received data";
74 IS, AccessPermission:Busy, "IS", desc="Issued GetS";
75 SI, AccessPermission:Busy, "OI", desc="Issued PutS, waiting for ack";
76 OI, AccessPermission:Busy, "OI", desc="Issued PutO, waiting for ack";
77 MI, AccessPermission:Busy, "MI", desc="Issued PutX, waiting for ack";
78 II, AccessPermission:Busy, "II", desc="Issued PutX/O, saw Fwd_GETS or Fwd_GETX, waiting for ack";
82 enumeration(Event, desc="Cache events") {
83 Load, desc="Load request from the processor";
84 Ifetch, desc="I-fetch request from the processor";
85 Store, desc="Store request from the processor";
86 L1_Replacement, desc="Replacement";
89 Own_GETX, desc="We observe our own GetX forwarded back to us";
90 Fwd_GETX, desc="A GetX from another processor";
91 Fwd_GETS, desc="A GetS from another processor";
92 Fwd_DMA, desc="A GetS from another processor";
93 Inv, desc="Invalidations from the directory";
96 Ack, desc="Received an ack message";
97 Data, desc="Received a data message, responder has a shared copy";
98 Exclusive_Data, desc="Received a data message";
100 Writeback_Ack, desc="Writeback O.K. from directory";
101 Writeback_Ack_Data, desc="Writeback O.K. from directory";
102 Writeback_Nack, desc="Writeback not O.K. from directory";
105 All_acks, desc="Received all required data and message acks";
108 Use_Timeout, desc="lockout period ended";
114 structure(Entry, desc="...", interface="AbstractCacheEntry") {
115 State CacheState, desc="cache state";
116 bool Dirty, desc="Is the data dirty (different than memory)?";
117 DataBlock DataBlk, desc="data for the block";
121 structure(TBE, desc="...") {
122 Addr addr, desc="Physical address for this TBE";
123 State TBEState, desc="Transient state";
124 DataBlock DataBlk, desc="data for the block, required for concurrent writebacks";
125 bool Dirty, desc="Is the data dirty (different than memory)?";
126 int NumPendingMsgs, default="0", desc="Number of acks/data messages that this processor is waiting for";
129 structure(TBETable, external ="yes") {
132 void deallocate(Addr);
133 bool isPresent(Addr);
137 Tick cyclesToTicks(Cycles c);
138 void set_cache_entry(AbstractCacheEntry b);
139 void unset_cache_entry();
143 TBETable TBEs, template="<L1Cache_TBE>", constructor="m_number_of_TBEs";
144 TimerTable useTimerTable;
145 int l2_select_low_bit, default="RubySystem::getBlockSizeBits()";
147 Entry getCacheEntry(Addr addr), return_by_pointer="yes" {
148 Entry L1Dcache_entry := static_cast(Entry, "pointer", L1Dcache.lookup(addr));
149 if(is_valid(L1Dcache_entry)) {
150 return L1Dcache_entry;
153 Entry L1Icache_entry := static_cast(Entry, "pointer", L1Icache.lookup(addr));
154 return L1Icache_entry;
157 Entry getL1DCacheEntry(Addr addr), return_by_pointer="yes" {
158 return static_cast(Entry, "pointer", L1Dcache.lookup(addr));
161 Entry getL1ICacheEntry(Addr addr), return_by_pointer="yes" {
162 return static_cast(Entry, "pointer", L1Icache.lookup(addr));
165 State getState(TBE tbe, Entry cache_entry, Addr addr) {
168 } else if (is_valid(cache_entry)) {
169 return cache_entry.CacheState;
174 void setState(TBE tbe, Entry cache_entry, Addr addr, State state) {
175 assert((L1Dcache.isTagPresent(addr) && L1Icache.isTagPresent(addr)) == false);
178 tbe.TBEState := state;
181 if (is_valid(cache_entry)) {
182 if ( ((cache_entry.CacheState != State:M) && (state == State:M)) ||
183 ((cache_entry.CacheState != State:MM) && (state == State:MM)) ||
184 ((cache_entry.CacheState != State:S) && (state == State:S)) ||
185 ((cache_entry.CacheState != State:O) && (state == State:O)) ) {
187 cache_entry.CacheState := state;
188 sequencer.checkCoherence(addr);
191 cache_entry.CacheState := state;
196 AccessPermission getAccessPermission(Addr addr) {
197 TBE tbe := TBEs[addr];
199 DPRINTF(RubySlicc, "%s\n", L1Cache_State_to_permission(tbe.TBEState));
200 return L1Cache_State_to_permission(tbe.TBEState);
203 Entry cache_entry := getCacheEntry(addr);
204 if(is_valid(cache_entry)) {
205 DPRINTF(RubySlicc, "%s\n", L1Cache_State_to_permission(cache_entry.CacheState));
206 return L1Cache_State_to_permission(cache_entry.CacheState);
209 DPRINTF(RubySlicc, "AccessPermission_NotPresent\n");
210 return AccessPermission:NotPresent;
213 void setAccessPermission(Entry cache_entry, Addr addr, State state) {
214 if (is_valid(cache_entry)) {
215 cache_entry.changePermission(L1Cache_State_to_permission(state));
219 void functionalRead(Addr addr, Packet *pkt) {
220 Entry cache_entry := getCacheEntry(addr);
221 if(is_valid(cache_entry)) {
222 testAndRead(addr, cache_entry.DataBlk, pkt);
224 TBE tbe := TBEs[addr];
226 testAndRead(addr, tbe.DataBlk, pkt);
228 error("Data block missing!");
233 int functionalWrite(Addr addr, Packet *pkt) {
234 int num_functional_writes := 0;
236 Entry cache_entry := getCacheEntry(addr);
237 if(is_valid(cache_entry)) {
238 num_functional_writes := num_functional_writes +
239 testAndWrite(addr, cache_entry.DataBlk, pkt);
240 return num_functional_writes;
243 TBE tbe := TBEs[addr];
244 num_functional_writes := num_functional_writes +
245 testAndWrite(addr, tbe.DataBlk, pkt);
246 return num_functional_writes;
249 Event mandatory_request_type_to_event(RubyRequestType type) {
250 if (type == RubyRequestType:LD) {
252 } else if (type == RubyRequestType:IFETCH) {
254 } else if ((type == RubyRequestType:ST) || (type == RubyRequestType:ATOMIC)) {
257 error("Invalid RubyRequestType");
263 out_port(requestNetwork_out, RequestMsg, requestFromL1Cache);
264 out_port(responseNetwork_out, ResponseMsg, responseFromL1Cache);
265 out_port(triggerQueue_out, TriggerMsg, triggerQueue);
270 in_port(useTimerTable_in, Addr, useTimerTable) {
271 if (useTimerTable_in.isReady(clockEdge())) {
272 Addr readyAddress := useTimerTable.nextAddress();
273 trigger(Event:Use_Timeout, readyAddress, getCacheEntry(readyAddress),
274 TBEs.lookup(readyAddress));
279 in_port(triggerQueue_in, TriggerMsg, triggerQueue) {
280 if (triggerQueue_in.isReady(clockEdge())) {
281 peek(triggerQueue_in, TriggerMsg) {
282 if (in_msg.Type == TriggerType:ALL_ACKS) {
283 trigger(Event:All_acks, in_msg.addr,
284 getCacheEntry(in_msg.addr), TBEs[in_msg.addr]);
286 error("Unexpected message");
292 // Nothing from the request network
295 in_port(requestNetwork_in, RequestMsg, requestToL1Cache) {
296 if (requestNetwork_in.isReady(clockEdge())) {
297 peek(requestNetwork_in, RequestMsg, block_on="addr") {
298 assert(in_msg.Destination.isElement(machineID));
299 DPRINTF(RubySlicc, "L1 received: %s\n", in_msg.Type);
301 if (in_msg.Type == CoherenceRequestType:GETX || in_msg.Type == CoherenceRequestType:DMA_WRITE) {
302 if (in_msg.Requestor == machineID && in_msg.RequestorMachine == MachineType:L1Cache) {
303 trigger(Event:Own_GETX, in_msg.addr,
304 getCacheEntry(in_msg.addr), TBEs[in_msg.addr]);
306 trigger(Event:Fwd_GETX, in_msg.addr,
307 getCacheEntry(in_msg.addr), TBEs[in_msg.addr]);
309 } else if (in_msg.Type == CoherenceRequestType:GETS) {
310 trigger(Event:Fwd_GETS, in_msg.addr,
311 getCacheEntry(in_msg.addr), TBEs[in_msg.addr]);
312 } else if (in_msg.Type == CoherenceRequestType:DMA_READ) {
313 trigger(Event:Fwd_DMA, in_msg.addr,
314 getCacheEntry(in_msg.addr), TBEs[in_msg.addr]);
315 } else if (in_msg.Type == CoherenceRequestType:WB_ACK) {
316 trigger(Event:Writeback_Ack, in_msg.addr,
317 getCacheEntry(in_msg.addr), TBEs[in_msg.addr]);
318 } else if (in_msg.Type == CoherenceRequestType:WB_ACK_DATA) {
319 trigger(Event:Writeback_Ack_Data, in_msg.addr,
320 getCacheEntry(in_msg.addr), TBEs[in_msg.addr]);
321 } else if (in_msg.Type == CoherenceRequestType:WB_NACK) {
322 trigger(Event:Writeback_Nack, in_msg.addr,
323 getCacheEntry(in_msg.addr), TBEs[in_msg.addr]);
324 } else if (in_msg.Type == CoherenceRequestType:INV) {
325 trigger(Event:Inv, in_msg.addr,
326 getCacheEntry(in_msg.addr), TBEs[in_msg.addr]);
328 error("Unexpected message");
335 in_port(responseToL1Cache_in, ResponseMsg, responseToL1Cache) {
336 if (responseToL1Cache_in.isReady(clockEdge())) {
337 peek(responseToL1Cache_in, ResponseMsg, block_on="addr") {
338 if (in_msg.Type == CoherenceResponseType:ACK) {
339 trigger(Event:Ack, in_msg.addr,
340 getCacheEntry(in_msg.addr), TBEs[in_msg.addr]);
341 } else if (in_msg.Type == CoherenceResponseType:DATA) {
342 trigger(Event:Data, in_msg.addr,
343 getCacheEntry(in_msg.addr), TBEs[in_msg.addr]);
344 } else if (in_msg.Type == CoherenceResponseType:DATA_EXCLUSIVE) {
345 trigger(Event:Exclusive_Data, in_msg.addr,
346 getCacheEntry(in_msg.addr), TBEs[in_msg.addr]);
348 error("Unexpected message");
354 // Nothing from the unblock network
355 // Mandatory Queue betweens Node's CPU and it's L1 caches
356 in_port(mandatoryQueue_in, RubyRequest, mandatoryQueue, desc="...") {
357 if (mandatoryQueue_in.isReady(clockEdge())) {
358 peek(mandatoryQueue_in, RubyRequest, block_on="LineAddress") {
360 // Check for data access to blocks in I-cache and ifetchs to blocks in D-cache
362 if (in_msg.Type == RubyRequestType:IFETCH) {
363 // ** INSTRUCTION ACCESS ***
365 Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
366 if (is_valid(L1Icache_entry)) {
367 // The tag matches for the L1, so the L1 asks the L2 for it.
368 trigger(mandatory_request_type_to_event(in_msg.Type),
369 in_msg.LineAddress, L1Icache_entry,
370 TBEs[in_msg.LineAddress]);
373 Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
374 // Check to see if it is in the OTHER L1
375 if (is_valid(L1Dcache_entry)) {
376 // The block is in the wrong L1, put the request on the queue to the shared L2
377 trigger(Event:L1_Replacement, in_msg.LineAddress, L1Dcache_entry,
378 TBEs[in_msg.LineAddress]);
380 if (L1Icache.cacheAvail(in_msg.LineAddress)) {
381 // L1 does't have the line, but we have space for it in the L1 so let's see if the L2 has it
382 trigger(mandatory_request_type_to_event(in_msg.Type),
383 in_msg.LineAddress, L1Icache_entry,
384 TBEs[in_msg.LineAddress]);
386 // No room in the L1, so we need to make room in the L1
387 trigger(Event:L1_Replacement,
388 L1Icache.cacheProbe(in_msg.LineAddress),
389 getL1ICacheEntry(L1Icache.cacheProbe(in_msg.LineAddress)),
390 TBEs[L1Icache.cacheProbe(in_msg.LineAddress)]);
394 // *** DATA ACCESS ***
396 Entry L1Dcache_entry := getL1DCacheEntry(in_msg.LineAddress);
397 if (is_valid(L1Dcache_entry)) {
398 // The tag matches for the L1, so the L1 ask the L2 for it
399 trigger(mandatory_request_type_to_event(in_msg.Type),
400 in_msg.LineAddress, L1Dcache_entry,
401 TBEs[in_msg.LineAddress]);
404 Entry L1Icache_entry := getL1ICacheEntry(in_msg.LineAddress);
405 // Check to see if it is in the OTHER L1
406 if (is_valid(L1Icache_entry)) {
407 // The block is in the wrong L1, put the request on the queue to the shared L2
408 trigger(Event:L1_Replacement, in_msg.LineAddress,
409 L1Icache_entry, TBEs[in_msg.LineAddress]);
411 if (L1Dcache.cacheAvail(in_msg.LineAddress)) {
412 // L1 does't have the line, but we have space for it in the L1 let's see if the L2 has it
413 trigger(mandatory_request_type_to_event(in_msg.Type),
414 in_msg.LineAddress, L1Dcache_entry,
415 TBEs[in_msg.LineAddress]);
417 // No room in the L1, so we need to make room in the L1
418 trigger(Event:L1_Replacement,
419 L1Dcache.cacheProbe(in_msg.LineAddress),
420 getL1DCacheEntry(L1Dcache.cacheProbe(in_msg.LineAddress)),
421 TBEs[L1Dcache.cacheProbe(in_msg.LineAddress)]);
432 action(a_issueGETS, "a", desc="Issue GETS") {
433 peek(mandatoryQueue_in, RubyRequest) {
434 enqueue(requestNetwork_out, RequestMsg, request_latency) {
435 out_msg.addr := address;
436 out_msg.Type := CoherenceRequestType:GETS;
437 out_msg.Requestor := machineID;
438 out_msg.RequestorMachine := MachineType:L1Cache;
439 out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
440 l2_select_low_bit, l2_select_num_bits, intToID(0)));
441 out_msg.MessageSize := MessageSizeType:Request_Control;
442 out_msg.AccessMode := in_msg.AccessMode;
443 out_msg.Prefetch := in_msg.Prefetch;
448 action(b_issueGETX, "b", desc="Issue GETX") {
449 peek(mandatoryQueue_in, RubyRequest) {
450 enqueue(requestNetwork_out, RequestMsg, request_latency) {
451 out_msg.addr := address;
452 out_msg.Type := CoherenceRequestType:GETX;
453 out_msg.Requestor := machineID;
454 out_msg.RequestorMachine := MachineType:L1Cache;
455 out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
456 l2_select_low_bit, l2_select_num_bits, intToID(0)));
457 out_msg.MessageSize := MessageSizeType:Request_Control;
458 out_msg.AccessMode := in_msg.AccessMode;
459 out_msg.Prefetch := in_msg.Prefetch;
464 action(d_issuePUTX, "d", desc="Issue PUTX") {
465 enqueue(requestNetwork_out, RequestMsg, request_latency) {
466 out_msg.addr := address;
467 out_msg.Type := CoherenceRequestType:PUTX;
468 out_msg.Requestor := machineID;
469 out_msg.RequestorMachine := MachineType:L1Cache;
470 out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
471 l2_select_low_bit, l2_select_num_bits, intToID(0)));
472 out_msg.MessageSize := MessageSizeType:Writeback_Control;
476 action(dd_issuePUTO, "\d", desc="Issue PUTO") {
477 enqueue(requestNetwork_out, RequestMsg, request_latency) {
478 out_msg.addr := address;
479 out_msg.Type := CoherenceRequestType:PUTO;
480 out_msg.Requestor := machineID;
481 out_msg.RequestorMachine := MachineType:L1Cache;
482 out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
483 l2_select_low_bit, l2_select_num_bits, intToID(0)));
484 out_msg.MessageSize := MessageSizeType:Writeback_Control;
488 action(dd_issuePUTS, "\ds", desc="Issue PUTS") {
489 enqueue(requestNetwork_out, RequestMsg, request_latency) {
490 out_msg.addr := address;
491 out_msg.Type := CoherenceRequestType:PUTS;
492 out_msg.Requestor := machineID;
493 out_msg.RequestorMachine := MachineType:L1Cache;
494 out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
495 l2_select_low_bit, l2_select_num_bits, intToID(0)));
496 out_msg.MessageSize := MessageSizeType:Writeback_Control;
500 action(e_sendData, "e", desc="Send data from cache to requestor") {
501 peek(requestNetwork_in, RequestMsg) {
502 assert(is_valid(cache_entry));
503 if (in_msg.RequestorMachine == MachineType:L2Cache) {
504 enqueue(responseNetwork_out, ResponseMsg, request_latency) {
505 out_msg.addr := address;
506 out_msg.Type := CoherenceResponseType:DATA;
507 out_msg.Sender := machineID;
508 out_msg.SenderMachine := MachineType:L1Cache;
509 out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
510 l2_select_low_bit, l2_select_num_bits, intToID(0)));
511 out_msg.DataBlk := cache_entry.DataBlk;
512 // out_msg.Dirty := cache_entry.Dirty;
513 out_msg.Dirty := false;
514 out_msg.Acks := in_msg.Acks;
515 out_msg.MessageSize := MessageSizeType:Response_Data;
517 DPRINTF(RubySlicc, "Sending data to L2: %#x\n", in_msg.addr);
520 enqueue(responseNetwork_out, ResponseMsg, request_latency) {
521 out_msg.addr := address;
522 out_msg.Type := CoherenceResponseType:DATA;
523 out_msg.Sender := machineID;
524 out_msg.SenderMachine := MachineType:L1Cache;
525 out_msg.Destination.add(in_msg.Requestor);
526 out_msg.DataBlk := cache_entry.DataBlk;
527 // out_msg.Dirty := cache_entry.Dirty;
528 out_msg.Dirty := false;
529 out_msg.Acks := in_msg.Acks;
530 out_msg.MessageSize := MessageSizeType:ResponseLocal_Data;
532 DPRINTF(RubySlicc, "Sending data to L1\n");
537 action(e_sendDataToL2, "ee", desc="Send data from cache to requestor") {
538 enqueue(responseNetwork_out, ResponseMsg, request_latency) {
539 assert(is_valid(cache_entry));
540 out_msg.addr := address;
541 out_msg.Type := CoherenceResponseType:DATA;
542 out_msg.Sender := machineID;
543 out_msg.SenderMachine := MachineType:L1Cache;
544 out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
545 l2_select_low_bit, l2_select_num_bits, intToID(0)));
546 out_msg.DataBlk := cache_entry.DataBlk;
547 out_msg.Dirty := cache_entry.Dirty;
548 out_msg.Acks := 0; // irrelevant
549 out_msg.MessageSize := MessageSizeType:Response_Data;
553 action(ee_sendDataExclusive, "\e", desc="Send data from cache to requestor, don't keep a shared copy") {
554 peek(requestNetwork_in, RequestMsg) {
555 assert(is_valid(cache_entry));
556 if (in_msg.RequestorMachine == MachineType:L2Cache) {
557 enqueue(responseNetwork_out, ResponseMsg, request_latency) {
558 out_msg.addr := address;
559 out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE;
560 out_msg.Sender := machineID;
561 out_msg.SenderMachine := MachineType:L1Cache;
562 out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
563 l2_select_low_bit, l2_select_num_bits, intToID(0)));
564 out_msg.DataBlk := cache_entry.DataBlk;
565 out_msg.Dirty := cache_entry.Dirty;
566 out_msg.Acks := in_msg.Acks;
567 out_msg.MessageSize := MessageSizeType:Response_Data;
569 DPRINTF(RubySlicc, "Sending exclusive data to L2\n");
572 enqueue(responseNetwork_out, ResponseMsg, request_latency) {
573 out_msg.addr := address;
574 out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE;
575 out_msg.Sender := machineID;
576 out_msg.SenderMachine := MachineType:L1Cache;
577 out_msg.Destination.add(in_msg.Requestor);
578 out_msg.DataBlk := cache_entry.DataBlk;
579 out_msg.Dirty := cache_entry.Dirty;
580 out_msg.Acks := in_msg.Acks;
581 out_msg.MessageSize := MessageSizeType:ResponseLocal_Data;
583 DPRINTF(RubySlicc, "Sending exclusive data to L1\n");
588 action(f_sendAck, "f", desc="Send ack from cache to requestor") {
589 peek(requestNetwork_in, RequestMsg) {
590 if (in_msg.RequestorMachine == MachineType:L1Cache) {
591 enqueue(responseNetwork_out, ResponseMsg, request_latency) {
592 out_msg.addr := address;
593 out_msg.Type := CoherenceResponseType:ACK;
594 out_msg.Sender := machineID;
595 out_msg.SenderMachine := MachineType:L1Cache;
596 out_msg.Destination.add(in_msg.Requestor);
597 out_msg.Acks := 0 - 1; // -1
598 out_msg.MessageSize := MessageSizeType:Response_Control;
602 enqueue(responseNetwork_out, ResponseMsg, request_latency) {
603 out_msg.addr := address;
604 out_msg.Type := CoherenceResponseType:ACK;
605 out_msg.Sender := machineID;
606 out_msg.SenderMachine := MachineType:L1Cache;
607 out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
608 l2_select_low_bit, l2_select_num_bits, intToID(0)));
609 out_msg.Acks := 0 - 1; // -1
610 out_msg.MessageSize := MessageSizeType:Response_Control;
616 action(g_sendUnblock, "g", desc="Send unblock to memory") {
617 enqueue(responseNetwork_out, ResponseMsg, request_latency) {
618 out_msg.addr := address;
619 out_msg.Type := CoherenceResponseType:UNBLOCK;
620 out_msg.Sender := machineID;
621 out_msg.SenderMachine := MachineType:L1Cache;
622 out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
623 l2_select_low_bit, l2_select_num_bits, intToID(0)));
624 out_msg.MessageSize := MessageSizeType:Unblock_Control;
628 action(gg_sendUnblockExclusive, "\g", desc="Send unblock exclusive to memory") {
629 enqueue(responseNetwork_out, ResponseMsg, request_latency) {
630 out_msg.addr := address;
631 out_msg.Type := CoherenceResponseType:UNBLOCK_EXCLUSIVE;
632 out_msg.Sender := machineID;
633 out_msg.SenderMachine := MachineType:L1Cache;
634 out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
635 l2_select_low_bit, l2_select_num_bits, intToID(0)));
636 out_msg.MessageSize := MessageSizeType:Unblock_Control;
640 action(h_load_hit, "hd", desc="Notify sequencer the load completed.") {
641 assert(is_valid(cache_entry));
642 DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
643 L1Dcache.setMRU(cache_entry);
644 sequencer.readCallback(address, cache_entry.DataBlk);
647 action(h_ifetch_hit, "hi", desc="Notify the sequencer about ifetch completion.") {
648 assert(is_valid(cache_entry));
649 DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
650 L1Icache.setMRU(cache_entry);
651 sequencer.readCallback(address, cache_entry.DataBlk);
654 action(hx_load_hit, "hx", desc="Notify sequencer the load completed.") {
655 assert(is_valid(cache_entry));
656 DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
657 L1Icache.setMRU(address);
658 L1Dcache.setMRU(address);
659 sequencer.readCallback(address, cache_entry.DataBlk, true);
662 action(hh_store_hit, "\h", desc="Notify sequencer that store completed.") {
663 assert(is_valid(cache_entry));
664 DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
665 L1Dcache.setMRU(cache_entry);
666 sequencer.writeCallback(address, cache_entry.DataBlk);
667 cache_entry.Dirty := true;
670 action(xx_store_hit, "\xx", desc="Notify sequencer that store completed.") {
671 assert(is_valid(cache_entry));
672 DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
673 L1Icache.setMRU(address);
674 L1Dcache.setMRU(address);
675 sequencer.writeCallback(address, cache_entry.DataBlk, true);
676 cache_entry.Dirty := true;
679 action(i_allocateTBE, "i", desc="Allocate TBE") {
680 check_allocate(TBEs);
681 TBEs.allocate(address);
682 set_tbe(TBEs[address]);
683 assert(is_valid(cache_entry));
684 tbe.DataBlk := cache_entry.DataBlk; // Data only used for writebacks
685 tbe.Dirty := cache_entry.Dirty;
688 action(j_popTriggerQueue, "j", desc="Pop trigger queue.") {
689 triggerQueue_in.dequeue(clockEdge());
692 action(jj_unsetUseTimer, "\jj", desc="Unset use timer.") {
693 useTimerTable.unset(address);
696 action(k_popMandatoryQueue, "k", desc="Pop mandatory queue.") {
697 mandatoryQueue_in.dequeue(clockEdge());
700 action(l_popForwardQueue, "l", desc="Pop forwareded request queue.") {
701 requestNetwork_in.dequeue(clockEdge());
704 action(m_decrementNumberOfMessages, "m", desc="Decrement the number of messages for which we're waiting") {
705 peek(responseToL1Cache_in, ResponseMsg) {
706 assert(is_valid(tbe));
707 DPRINTF(RubySlicc, "L1 decrementNumberOfMessages: %d\n", in_msg.Acks);
708 tbe.NumPendingMsgs := tbe.NumPendingMsgs - in_msg.Acks;
712 action(mm_decrementNumberOfMessages, "\m", desc="Decrement the number of messages for which we're waiting") {
713 peek(requestNetwork_in, RequestMsg) {
714 assert(is_valid(tbe));
715 tbe.NumPendingMsgs := tbe.NumPendingMsgs - in_msg.Acks;
719 action(n_popResponseQueue, "n", desc="Pop response queue") {
720 responseToL1Cache_in.dequeue(clockEdge());
723 action(o_checkForCompletion, "o", desc="Check if we have received all the messages required for completion") {
724 assert(is_valid(tbe));
725 if (tbe.NumPendingMsgs == 0) {
726 enqueue(triggerQueue_out, TriggerMsg) {
727 out_msg.addr := address;
728 out_msg.Type := TriggerType:ALL_ACKS;
733 action(o_scheduleUseTimeout, "oo", desc="Schedule a use timeout.") {
734 useTimerTable.set(address,
735 clockEdge() + cyclesToTicks(use_timeout_latency));
738 action(ub_dmaUnblockL2Cache, "ub", desc="Send dma ack to l2 cache") {
739 peek(requestNetwork_in, RequestMsg) {
740 enqueue(responseNetwork_out, ResponseMsg, request_latency) {
741 out_msg.addr := address;
742 out_msg.Type := CoherenceResponseType:DMA_ACK;
743 out_msg.Sender := machineID;
744 out_msg.SenderMachine := MachineType:L1Cache;
745 out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
746 l2_select_low_bit, l2_select_num_bits, intToID(0)));
747 out_msg.Dirty := false;
749 out_msg.MessageSize := MessageSizeType:Response_Control;
754 action(q_sendDataFromTBEToCache, "q", desc="Send data from TBE to cache") {
755 peek(requestNetwork_in, RequestMsg) {
756 assert(is_valid(tbe));
757 if (in_msg.RequestorMachine == MachineType:L1Cache ||
758 in_msg.RequestorMachine == MachineType:DMA) {
759 enqueue(responseNetwork_out, ResponseMsg, request_latency) {
760 out_msg.addr := address;
761 out_msg.Type := CoherenceResponseType:DATA;
762 out_msg.Sender := machineID;
763 out_msg.SenderMachine := MachineType:L1Cache;
764 out_msg.Destination.add(in_msg.Requestor);
765 out_msg.DataBlk := tbe.DataBlk;
766 // out_msg.Dirty := tbe.Dirty;
767 out_msg.Dirty := false;
768 out_msg.Acks := in_msg.Acks;
769 out_msg.MessageSize := MessageSizeType:ResponseLocal_Data;
773 enqueue(responseNetwork_out, ResponseMsg, request_latency) {
774 out_msg.addr := address;
775 out_msg.Type := CoherenceResponseType:DATA;
776 out_msg.Sender := machineID;
777 out_msg.SenderMachine := MachineType:L1Cache;
778 out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
779 l2_select_low_bit, l2_select_num_bits, intToID(0)));
780 out_msg.DataBlk := tbe.DataBlk;
781 // out_msg.Dirty := tbe.Dirty;
782 out_msg.Dirty := false;
783 out_msg.Acks := in_msg.Acks;
784 out_msg.MessageSize := MessageSizeType:Response_Data;
790 action(q_sendExclusiveDataFromTBEToCache, "qq", desc="Send data from TBE to cache") {
791 peek(requestNetwork_in, RequestMsg) {
792 assert(is_valid(tbe));
793 if (in_msg.RequestorMachine == MachineType:L1Cache) {
794 enqueue(responseNetwork_out, ResponseMsg, request_latency) {
795 out_msg.addr := address;
796 out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE;
797 out_msg.Sender := machineID;
798 out_msg.SenderMachine := MachineType:L1Cache;
799 out_msg.Destination.add(in_msg.Requestor);
800 out_msg.DataBlk := tbe.DataBlk;
801 out_msg.Dirty := tbe.Dirty;
802 out_msg.Acks := in_msg.Acks;
803 out_msg.MessageSize := MessageSizeType:ResponseLocal_Data;
807 enqueue(responseNetwork_out, ResponseMsg, request_latency) {
808 out_msg.addr := address;
809 out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE;
810 out_msg.Sender := machineID;
811 out_msg.SenderMachine := MachineType:L1Cache;
812 out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
813 l2_select_low_bit, l2_select_num_bits, intToID(0)));
814 out_msg.DataBlk := tbe.DataBlk;
815 out_msg.Dirty := tbe.Dirty;
816 out_msg.Acks := in_msg.Acks;
817 out_msg.MessageSize := MessageSizeType:Response_Data;
823 // L2 will usually request data for a writeback
824 action(qq_sendWBDataFromTBEToL2, "\q", desc="Send data from TBE to L2") {
825 enqueue(responseNetwork_out, ResponseMsg, request_latency) {
826 assert(is_valid(tbe));
827 out_msg.addr := address;
828 out_msg.Sender := machineID;
829 out_msg.SenderMachine := MachineType:L1Cache;
830 out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
831 l2_select_low_bit, l2_select_num_bits, intToID(0)));
832 out_msg.Dirty := tbe.Dirty;
834 out_msg.Type := CoherenceResponseType:WRITEBACK_DIRTY_DATA;
836 out_msg.Type := CoherenceResponseType:WRITEBACK_CLEAN_DATA;
838 out_msg.DataBlk := tbe.DataBlk;
839 out_msg.MessageSize := MessageSizeType:Writeback_Data;
843 action(s_deallocateTBE, "s", desc="Deallocate TBE") {
844 TBEs.deallocate(address);
848 action(u_writeDataToCache, "u", desc="Write data to cache") {
849 peek(responseToL1Cache_in, ResponseMsg) {
850 assert(is_valid(cache_entry));
851 cache_entry.DataBlk := in_msg.DataBlk;
852 cache_entry.Dirty := in_msg.Dirty;
854 if (in_msg.Type == CoherenceResponseType:DATA) {
855 //assert(in_msg.Dirty == false);
860 action(v_writeDataToCacheVerify, "v", desc="Write data to cache, assert it was same as before") {
861 peek(responseToL1Cache_in, ResponseMsg) {
862 assert(is_valid(cache_entry));
863 assert(cache_entry.DataBlk == in_msg.DataBlk);
864 cache_entry.DataBlk := in_msg.DataBlk;
865 cache_entry.Dirty := in_msg.Dirty;
869 action(kk_deallocateL1CacheBlock, "\k", desc="Deallocate cache block. Sets the cache to invalid, allowing a replacement in parallel with a fetch.") {
870 if (L1Dcache.isTagPresent(address)) {
871 L1Dcache.deallocate(address);
873 L1Icache.deallocate(address);
878 action(ii_allocateL1DCacheBlock, "\i", desc="Set L1 D-cache tag equal to tag of block B.") {
879 if ((is_invalid(cache_entry))) {
880 set_cache_entry(L1Dcache.allocate(address, new Entry));
884 action(jj_allocateL1ICacheBlock, "\j", desc="Set L1 I-cache tag equal to tag of block B.") {
885 if ((is_invalid(cache_entry))) {
886 set_cache_entry(L1Icache.allocate(address, new Entry));
890 action(forward_eviction_to_cpu, "\cc", desc="sends eviction information to the processor") {
891 if (send_evictions) {
892 DPRINTF(RubySlicc, "Sending invalidation for %#x to the CPU\n", address);
893 sequencer.evictionCallback(address);
897 action(uu_profileInstMiss, "\uim", desc="Profile the demand miss") {
898 ++L1Icache.demand_misses;
901 action(uu_profileInstHit, "\uih", desc="Profile the demand hit") {
902 ++L1Icache.demand_hits;
905 action(uu_profileDataMiss, "\udm", desc="Profile the demand miss") {
906 ++L1Dcache.demand_misses;
909 action(uu_profileDataHit, "\udh", desc="Profile the demand hit") {
910 ++L1Dcache.demand_hits;
913 action(z_recycleRequestQueue, "z", desc="Send the head of the mandatory queue to the back of the queue.") {
914 requestNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
917 action(zz_recycleMandatoryQueue, "\z", desc="Send the head of the mandatory queue to the back of the queue.") {
918 mandatoryQueue_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
921 //*****************************************************
923 //*****************************************************
925 // Transitions for Load/Store/L2_Replacement from transient states
926 transition({IM, SM, OM, IS, OI, SI, MI, II}, {Store, L1_Replacement}) {
927 zz_recycleMandatoryQueue;
930 transition({M_W, MM_W}, L1_Replacement) {
931 zz_recycleMandatoryQueue;
934 transition({M_W, MM_W}, {Fwd_GETS, Fwd_DMA, Fwd_GETX, Own_GETX, Inv}) {
935 z_recycleRequestQueue;
938 transition({IM, IS, OI, MI, SI, II}, {Load, Ifetch}) {
939 zz_recycleMandatoryQueue;
942 // Transitions from Idle
943 transition(I, Load, IS) {
944 ii_allocateL1DCacheBlock;
951 transition(I, Ifetch, IS) {
952 jj_allocateL1ICacheBlock;
959 transition(I, Store, IM) {
960 ii_allocateL1DCacheBlock;
967 transition(I, L1_Replacement) {
968 kk_deallocateL1CacheBlock;
976 transition({S, SM, O, OM, MM, MM_W, M, M_W}, Load) {
982 transition({S, SM, O, OM, MM, MM_W, M, M_W}, Ifetch) {
988 // Transitions from Shared
989 transition(S, Store, SM) {
996 transition(S, L1_Replacement, SI) {
999 forward_eviction_to_cpu;
1000 kk_deallocateL1CacheBlock;
1003 transition(S, Inv, I) {
1005 forward_eviction_to_cpu;
1009 transition(S, Fwd_GETS) {
1014 transition(S, Fwd_DMA) {
1016 ub_dmaUnblockL2Cache;
1020 // Transitions from Owned
1021 transition(O, Store, OM) {
1025 k_popMandatoryQueue;
1028 transition(O, L1_Replacement, OI) {
1031 forward_eviction_to_cpu;
1032 kk_deallocateL1CacheBlock;
1035 transition(O, Fwd_GETX, I) {
1036 ee_sendDataExclusive;
1037 forward_eviction_to_cpu;
1041 transition(O, Fwd_GETS) {
1046 transition(O, Fwd_DMA) {
1048 ub_dmaUnblockL2Cache;
1052 // Transitions from MM
1053 transition({MM, MM_W}, Store) {
1056 k_popMandatoryQueue;
1059 transition(MM, L1_Replacement, MI) {
1062 forward_eviction_to_cpu;
1063 kk_deallocateL1CacheBlock;
1066 transition(MM, Fwd_GETX, I) {
1067 ee_sendDataExclusive;
1068 forward_eviction_to_cpu;
1072 transition(MM, Fwd_GETS, I) {
1073 ee_sendDataExclusive;
1074 forward_eviction_to_cpu;
1078 transition(MM, Fwd_DMA, MM) {
1080 ub_dmaUnblockL2Cache;
1084 // Transitions from M
1085 transition(M, Store, MM) {
1088 k_popMandatoryQueue;
1091 transition(M_W, Store, MM_W) {
1094 k_popMandatoryQueue;
1097 transition(M, L1_Replacement, MI) {
1100 forward_eviction_to_cpu;
1101 kk_deallocateL1CacheBlock;
1104 transition(M, Fwd_GETX, I) {
1106 ee_sendDataExclusive;
1107 forward_eviction_to_cpu;
1111 transition(M, Fwd_GETS, O) {
1116 transition(M, Fwd_DMA) {
1118 ub_dmaUnblockL2Cache;
1122 // Transitions from IM
1124 transition(IM, Inv) {
1129 transition(IM, Ack) {
1130 m_decrementNumberOfMessages;
1131 o_checkForCompletion;
1135 transition(IM, {Exclusive_Data, Data}, OM) {
1137 m_decrementNumberOfMessages;
1138 o_checkForCompletion;
1142 // Transitions from SM
1143 transition(SM, Inv, IM) {
1145 forward_eviction_to_cpu;
1149 transition(SM, Ack) {
1150 m_decrementNumberOfMessages;
1151 o_checkForCompletion;
1155 transition(SM, {Data, Exclusive_Data}, OM) {
1156 // v_writeDataToCacheVerify;
1157 m_decrementNumberOfMessages;
1158 o_checkForCompletion;
1162 transition(SM, Fwd_GETS) {
1167 transition(SM, Fwd_DMA) {
1169 ub_dmaUnblockL2Cache;
1173 // Transitions from OM
1174 transition(OM, Own_GETX) {
1175 mm_decrementNumberOfMessages;
1176 o_checkForCompletion;
1181 // transition(OM, Fwd_GETX, OMF) {
1182 transition(OM, Fwd_GETX, IM) {
1183 ee_sendDataExclusive;
1187 transition(OM, Fwd_GETS) {
1192 transition(OM, Fwd_DMA) {
1194 ub_dmaUnblockL2Cache;
1198 //transition({OM, OMF}, Ack) {
1199 transition(OM, Ack) {
1200 m_decrementNumberOfMessages;
1201 o_checkForCompletion;
1205 transition(OM, All_acks, MM_W) {
1207 gg_sendUnblockExclusive;
1209 o_scheduleUseTimeout;
1213 transition(MM_W, Use_Timeout, MM) {
1217 // Transitions from IS
1219 transition(IS, Inv) {
1224 transition(IS, Data, S) {
1226 m_decrementNumberOfMessages;
1233 transition(IS, Exclusive_Data, M_W) {
1235 m_decrementNumberOfMessages;
1237 gg_sendUnblockExclusive;
1238 o_scheduleUseTimeout;
1243 transition(M_W, Use_Timeout, M) {
1247 // Transitions from OI/MI
1249 transition(MI, Fwd_GETS, OI) {
1250 q_sendDataFromTBEToCache;
1254 transition(MI, Fwd_DMA) {
1255 q_sendDataFromTBEToCache;
1256 ub_dmaUnblockL2Cache;
1260 transition(MI, Fwd_GETX, II) {
1261 q_sendExclusiveDataFromTBEToCache;
1265 transition({SI, OI}, Fwd_GETS) {
1266 q_sendDataFromTBEToCache;
1270 transition({SI, OI}, Fwd_DMA) {
1271 q_sendDataFromTBEToCache;
1272 ub_dmaUnblockL2Cache;
1276 transition(OI, Fwd_GETX, II) {
1277 q_sendExclusiveDataFromTBEToCache;
1281 transition({SI, OI, MI}, Writeback_Ack_Data, I) {
1282 qq_sendWBDataFromTBEToL2; // always send data
1287 transition({SI, OI, MI}, Writeback_Ack, I) {
1293 transition({MI, OI}, Writeback_Nack, OI) {
1294 // FIXME: This might cause deadlock by re-using the writeback
1295 // channel, we should handle this case differently.
1300 // Transitions from II
1301 transition(II, {Writeback_Ack, Writeback_Ack_Data}, I) {
1307 // transition({II, SI}, Writeback_Nack, I) {
1308 transition(II, Writeback_Nack, I) {
1313 transition(SI, Writeback_Nack) {
1318 transition(II, Inv) {
1323 transition(SI, Inv, II) {