mem-cache: Create an address aware TempCacheBlk
[gem5.git] / src / mem / protocol / GPU_RfO-TCP.sm
1 /*
2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its
18 * contributors may be used to endorse or promote products derived from this
19 * software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Authors: Lisa Hsu
34 */
35
36 machine(MachineType:TCP, "GPU TCP (L1 Data Cache)")
37 : GPUCoalescer* coalescer;
38 Sequencer* sequencer;
39 bool use_seq_not_coal;
40 CacheMemory * L1cache;
41 int TCC_select_num_bits;
42 Cycles issue_latency := 40; // time to send data down to TCC
43 Cycles l2_hit_latency := 18;
44
45 MessageBuffer * requestFromTCP, network="To", virtual_network="1", vnet_type="request";
46 MessageBuffer * responseFromTCP, network="To", virtual_network="3", vnet_type="response";
47 MessageBuffer * unblockFromCore, network="To", virtual_network="5", vnet_type="unblock";
48
49 MessageBuffer * probeToTCP, network="From", virtual_network="1", vnet_type="request";
50 MessageBuffer * responseToTCP, network="From", virtual_network="3", vnet_type="response";
51
52 MessageBuffer * mandatoryQueue;
53 {
54 state_declaration(State, desc="TCP Cache States", default="TCP_State_I") {
55 I, AccessPermission:Invalid, desc="Invalid";
56 S, AccessPermission:Read_Only, desc="Shared";
57 E, AccessPermission:Read_Write, desc="Exclusive";
58 O, AccessPermission:Read_Only, desc="Owner state in core, both clusters and other cores may be sharing line";
59 M, AccessPermission:Read_Write, desc="Modified";
60
61 I_M, AccessPermission:Busy, desc="Invalid, issued RdBlkM, have not seen response yet";
62 I_ES, AccessPermission:Busy, desc="Invalid, issued RdBlk, have not seen response yet";
63 S_M, AccessPermission:Read_Only, desc="Shared, issued CtoD, have not seen response yet";
64 O_M, AccessPermission:Read_Only, desc="Shared, issued CtoD, have not seen response yet";
65
66 ES_I, AccessPermission:Read_Only, desc="L1 replacement, waiting for clean WB ack";
67 MO_I, AccessPermission:Read_Only, desc="L1 replacement, waiting for dirty WB ack";
68
69 MO_PI, AccessPermission:Read_Only, desc="L1 downgrade, waiting for CtoD ack (or ProbeInvalidateData)";
70
71 I_C, AccessPermission:Invalid, desc="Invalid, waiting for WBAck from TCC for canceled WB";
72 }
73
74 enumeration(Event, desc="TCP Events") {
75 // Core initiated
76 Load, desc="Load";
77 Store, desc="Store";
78
79 // TCC initiated
80 TCC_AckS, desc="TCC Ack to Core Request";
81 TCC_AckE, desc="TCC Ack to Core Request";
82 TCC_AckM, desc="TCC Ack to Core Request";
83 TCC_AckCtoD, desc="TCC Ack to Core Request";
84 TCC_AckWB, desc="TCC Ack for clean WB";
85 TCC_NackWB, desc="TCC Nack for clean WB";
86
87 // Mem sys initiated
88 Repl, desc="Replacing block from cache";
89
90 // Probe Events
91 PrbInvData, desc="probe, return O or M data";
92 PrbInv, desc="probe, no need for data";
93 LocalPrbInv, desc="local probe, no need for data";
94 PrbShrData, desc="probe downgrade, return O or M data";
95 }
96
97 enumeration(RequestType, desc="To communicate stats from transitions to recordStats") {
98 DataArrayRead, desc="Read the data array";
99 DataArrayWrite, desc="Write the data array";
100 TagArrayRead, desc="Read the data array";
101 TagArrayWrite, desc="Write the data array";
102 }
103
104
105 structure(Entry, desc="...", interface="AbstractCacheEntry") {
106 State CacheState, desc="cache state";
107 bool Dirty, desc="Is the data dirty (diff than memory)?";
108 DataBlock DataBlk, desc="data for the block";
109 bool FromL2, default="false", desc="block just moved from L2";
110 }
111
112 structure(TBE, desc="...") {
113 State TBEState, desc="Transient state";
114 DataBlock DataBlk, desc="data for the block, required for concurrent writebacks";
115 bool Dirty, desc="Is the data dirty (different than memory)?";
116 int NumPendingMsgs, desc="Number of acks/data messages that this processor is waiting for";
117 bool Shared, desc="Victim hit by shared probe";
118 }
119
120 structure(TBETable, external="yes") {
121 TBE lookup(Addr);
122 void allocate(Addr);
123 void deallocate(Addr);
124 bool isPresent(Addr);
125 }
126
127 TBETable TBEs, template="<TCP_TBE>", constructor="m_number_of_TBEs";
128 int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()";
129
130 Tick clockEdge();
131 Tick cyclesToTicks(Cycles c);
132
133 void set_cache_entry(AbstractCacheEntry b);
134 void unset_cache_entry();
135 void set_tbe(TBE b);
136 void unset_tbe();
137 void wakeUpAllBuffers();
138 void wakeUpBuffers(Addr a);
139 Cycles curCycle();
140
141 // Internal functions
142 Entry getCacheEntry(Addr address), return_by_pointer="yes" {
143 Entry cache_entry := static_cast(Entry, "pointer", L1cache.lookup(address));
144 return cache_entry;
145 }
146
147 DataBlock getDataBlock(Addr addr), return_by_ref="yes" {
148 TBE tbe := TBEs.lookup(addr);
149 if(is_valid(tbe)) {
150 return tbe.DataBlk;
151 } else {
152 return getCacheEntry(addr).DataBlk;
153 }
154 }
155
156 State getState(TBE tbe, Entry cache_entry, Addr addr) {
157 if(is_valid(tbe)) {
158 return tbe.TBEState;
159 } else if (is_valid(cache_entry)) {
160 return cache_entry.CacheState;
161 }
162 return State:I;
163 }
164
165 void setState(TBE tbe, Entry cache_entry, Addr addr, State state) {
166 if (is_valid(tbe)) {
167 tbe.TBEState := state;
168 }
169
170 if (is_valid(cache_entry)) {
171 cache_entry.CacheState := state;
172 }
173 }
174
175 AccessPermission getAccessPermission(Addr addr) {
176 TBE tbe := TBEs.lookup(addr);
177 if(is_valid(tbe)) {
178 return TCP_State_to_permission(tbe.TBEState);
179 }
180
181 Entry cache_entry := getCacheEntry(addr);
182 if(is_valid(cache_entry)) {
183 return TCP_State_to_permission(cache_entry.CacheState);
184 }
185
186 return AccessPermission:NotPresent;
187 }
188
189 bool isValid(Addr addr) {
190 AccessPermission perm := getAccessPermission(addr);
191 if (perm == AccessPermission:NotPresent ||
192 perm == AccessPermission:Invalid ||
193 perm == AccessPermission:Busy) {
194 return false;
195 } else {
196 return true;
197 }
198 }
199
200 void setAccessPermission(Entry cache_entry, Addr addr, State state) {
201 if (is_valid(cache_entry)) {
202 cache_entry.changePermission(TCP_State_to_permission(state));
203 }
204 }
205
206 void functionalRead(Addr addr, Packet *pkt) {
207 TBE tbe := TBEs.lookup(addr);
208 if(is_valid(tbe)) {
209 testAndRead(addr, tbe.DataBlk, pkt);
210 } else {
211 functionalMemoryRead(pkt);
212 }
213 }
214
215 int functionalWrite(Addr addr, Packet *pkt) {
216 int num_functional_writes := 0;
217
218 TBE tbe := TBEs.lookup(addr);
219 if(is_valid(tbe)) {
220 num_functional_writes := num_functional_writes +
221 testAndWrite(addr, tbe.DataBlk, pkt);
222 }
223
224 num_functional_writes := num_functional_writes + functionalMemoryWrite(pkt);
225 return num_functional_writes;
226 }
227
228 void recordRequestType(RequestType request_type, Addr addr) {
229 if (request_type == RequestType:DataArrayRead) {
230 L1cache.recordRequestType(CacheRequestType:DataArrayRead, addr);
231 } else if (request_type == RequestType:DataArrayWrite) {
232 L1cache.recordRequestType(CacheRequestType:DataArrayWrite, addr);
233 } else if (request_type == RequestType:TagArrayRead) {
234 L1cache.recordRequestType(CacheRequestType:TagArrayRead, addr);
235 } else if (request_type == RequestType:TagArrayWrite) {
236 L1cache.recordRequestType(CacheRequestType:TagArrayWrite, addr);
237 }
238 }
239
240 bool checkResourceAvailable(RequestType request_type, Addr addr) {
241 if (request_type == RequestType:DataArrayRead) {
242 return L1cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
243 } else if (request_type == RequestType:DataArrayWrite) {
244 return L1cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
245 } else if (request_type == RequestType:TagArrayRead) {
246 return L1cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
247 } else if (request_type == RequestType:TagArrayWrite) {
248 return L1cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
249 } else {
250 error("Invalid RequestType type in checkResourceAvailable");
251 return true;
252 }
253 }
254
255 MachineType getCoherenceType(MachineID myMachID,
256 MachineID senderMachID) {
257 if(myMachID == senderMachID) {
258 return MachineType:TCP;
259 } else if(machineIDToMachineType(senderMachID) == MachineType:TCP) {
260 return MachineType:L1Cache_wCC;
261 } else if(machineIDToMachineType(senderMachID) == MachineType:TCC) {
262 return MachineType:TCC;
263 } else {
264 return MachineType:TCCdir;
265 }
266 }
267
268 // Out Ports
269
270 out_port(requestNetwork_out, CPURequestMsg, requestFromTCP);
271 out_port(responseNetwork_out, ResponseMsg, responseFromTCP);
272 out_port(unblockNetwork_out, UnblockMsg, unblockFromCore);
273
274 // In Ports
275
276 in_port(probeNetwork_in, TDProbeRequestMsg, probeToTCP) {
277 if (probeNetwork_in.isReady(clockEdge())) {
278 peek(probeNetwork_in, TDProbeRequestMsg, block_on="addr") {
279 DPRINTF(RubySlicc, "%s\n", in_msg);
280 DPRINTF(RubySlicc, "machineID: %s\n", machineID);
281 Entry cache_entry := getCacheEntry(in_msg.addr);
282 TBE tbe := TBEs.lookup(in_msg.addr);
283
284 if (in_msg.Type == ProbeRequestType:PrbInv) {
285 if (in_msg.ReturnData) {
286 trigger(Event:PrbInvData, in_msg.addr, cache_entry, tbe);
287 } else {
288 if(in_msg.localCtoD) {
289 trigger(Event:LocalPrbInv, in_msg.addr, cache_entry, tbe);
290 } else {
291 trigger(Event:PrbInv, in_msg.addr, cache_entry, tbe);
292 }
293 }
294 } else if (in_msg.Type == ProbeRequestType:PrbDowngrade) {
295 assert(in_msg.ReturnData);
296 trigger(Event:PrbShrData, in_msg.addr, cache_entry, tbe);
297 }
298 }
299 }
300 }
301
302 in_port(responseToTCP_in, ResponseMsg, responseToTCP) {
303 if (responseToTCP_in.isReady(clockEdge())) {
304 peek(responseToTCP_in, ResponseMsg, block_on="addr") {
305
306 Entry cache_entry := getCacheEntry(in_msg.addr);
307 TBE tbe := TBEs.lookup(in_msg.addr);
308
309 if (in_msg.Type == CoherenceResponseType:TDSysResp) {
310 if (in_msg.State == CoherenceState:Modified) {
311 if (in_msg.CtoD) {
312 trigger(Event:TCC_AckCtoD, in_msg.addr, cache_entry, tbe);
313 } else {
314 trigger(Event:TCC_AckM, in_msg.addr, cache_entry, tbe);
315 }
316 } else if (in_msg.State == CoherenceState:Shared) {
317 trigger(Event:TCC_AckS, in_msg.addr, cache_entry, tbe);
318 } else if (in_msg.State == CoherenceState:Exclusive) {
319 trigger(Event:TCC_AckE, in_msg.addr, cache_entry, tbe);
320 }
321 } else if (in_msg.Type == CoherenceResponseType:TDSysWBAck) {
322 trigger(Event:TCC_AckWB, in_msg.addr, cache_entry, tbe);
323 } else if (in_msg.Type == CoherenceResponseType:TDSysWBNack) {
324 trigger(Event:TCC_NackWB, in_msg.addr, cache_entry, tbe);
325 } else {
326 error("Unexpected Response Message to Core");
327 }
328 }
329 }
330 }
331
332 in_port(mandatoryQueue_in, RubyRequest, mandatoryQueue, desc="...") {
333 if (mandatoryQueue_in.isReady(clockEdge())) {
334 peek(mandatoryQueue_in, RubyRequest, block_on="LineAddress") {
335 Entry cache_entry := getCacheEntry(in_msg.LineAddress);
336 TBE tbe := TBEs.lookup(in_msg.LineAddress);
337 DPRINTF(RubySlicc, "%s\n", in_msg);
338 if (in_msg.Type == RubyRequestType:LD) {
339 if (is_valid(cache_entry) || L1cache.cacheAvail(in_msg.LineAddress)) {
340 trigger(Event:Load, in_msg.LineAddress, cache_entry, tbe);
341 } else {
342 Addr victim := L1cache.cacheProbe(in_msg.LineAddress);
343 trigger(Event:Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
344 }
345 } else {
346 if (is_valid(cache_entry) || L1cache.cacheAvail(in_msg.LineAddress)) {
347 trigger(Event:Store, in_msg.LineAddress, cache_entry, tbe);
348 } else {
349 Addr victim := L1cache.cacheProbe(in_msg.LineAddress);
350 trigger(Event:Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
351 }
352 }
353 }
354 }
355 }
356
357 // Actions
358
359 action(ic_invCache, "ic", desc="invalidate cache") {
360 if(is_valid(cache_entry)) {
361 L1cache.deallocate(address);
362 }
363 unset_cache_entry();
364 }
365
366 action(n_issueRdBlk, "n", desc="Issue RdBlk") {
367 enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
368 out_msg.addr := address;
369 out_msg.Type := CoherenceRequestType:RdBlk;
370 out_msg.Requestor := machineID;
371 out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
372 TCC_select_low_bit, TCC_select_num_bits));
373 out_msg.MessageSize := MessageSizeType:Request_Control;
374 out_msg.InitialRequestTime := curCycle();
375 }
376 }
377
378 action(nM_issueRdBlkM, "nM", desc="Issue RdBlkM") {
379 enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
380 out_msg.addr := address;
381 out_msg.Type := CoherenceRequestType:RdBlkM;
382 out_msg.Requestor := machineID;
383 out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
384 TCC_select_low_bit, TCC_select_num_bits));
385 out_msg.MessageSize := MessageSizeType:Request_Control;
386 out_msg.InitialRequestTime := curCycle();
387 }
388 }
389
390 action(vd_victim, "vd", desc="Victimize M/O Data") {
391 enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
392 out_msg.addr := address;
393 out_msg.Requestor := machineID;
394 assert(is_valid(cache_entry));
395 out_msg.DataBlk := cache_entry.DataBlk;
396 out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
397 TCC_select_low_bit, TCC_select_num_bits));
398 out_msg.MessageSize := MessageSizeType:Request_Control;
399 out_msg.Type := CoherenceRequestType:VicDirty;
400 out_msg.InitialRequestTime := curCycle();
401 if (cache_entry.CacheState == State:O) {
402 out_msg.Shared := true;
403 } else {
404 out_msg.Shared := false;
405 }
406 out_msg.Dirty := cache_entry.Dirty;
407 }
408 }
409
410 action(vc_victim, "vc", desc="Victimize E/S Data") {
411 enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
412 out_msg.addr := address;
413 out_msg.Requestor := machineID;
414 out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
415 TCC_select_low_bit, TCC_select_num_bits));
416 out_msg.MessageSize := MessageSizeType:Request_Control;
417 out_msg.Type := CoherenceRequestType:VicClean;
418 out_msg.InitialRequestTime := curCycle();
419 if (cache_entry.CacheState == State:S) {
420 out_msg.Shared := true;
421 } else {
422 out_msg.Shared := false;
423 }
424 }
425 }
426
427 action(a_allocate, "a", desc="allocate block") {
428 if (is_invalid(cache_entry)) {
429 set_cache_entry(L1cache.allocate(address, new Entry));
430 }
431 }
432
433 action(t_allocateTBE, "t", desc="allocate TBE Entry") {
434 check_allocate(TBEs);
435 assert(is_valid(cache_entry));
436 TBEs.allocate(address);
437 set_tbe(TBEs.lookup(address));
438 tbe.DataBlk := cache_entry.DataBlk; // Data only used for WBs
439 tbe.Dirty := cache_entry.Dirty;
440 tbe.Shared := false;
441 }
442
443 action(d_deallocateTBE, "d", desc="Deallocate TBE") {
444 TBEs.deallocate(address);
445 unset_tbe();
446 }
447
448 action(p_popMandatoryQueue, "pm", desc="Pop Mandatory Queue") {
449 mandatoryQueue_in.dequeue(clockEdge());
450 }
451
452 action(pr_popResponseQueue, "pr", desc="Pop Response Queue") {
453 responseToTCP_in.dequeue(clockEdge());
454 }
455
456 action(pp_popProbeQueue, "pp", desc="pop probe queue") {
457 probeNetwork_in.dequeue(clockEdge());
458 }
459
460 action(l_loadDone, "l", desc="local load done") {
461 assert(is_valid(cache_entry));
462 if (use_seq_not_coal) {
463 sequencer.readCallback(address, cache_entry.DataBlk,
464 false, MachineType:TCP);
465 } else {
466 coalescer.readCallback(address, MachineType:TCP, cache_entry.DataBlk);
467 }
468 }
469
470 action(xl_loadDone, "xl", desc="remote load done") {
471 peek(responseToTCP_in, ResponseMsg) {
472 assert(is_valid(cache_entry));
473 if (use_seq_not_coal) {
474 coalescer.recordCPReadCallBack(machineID, in_msg.Sender);
475 sequencer.readCallback(address,
476 cache_entry.DataBlk,
477 false,
478 machineIDToMachineType(in_msg.Sender),
479 in_msg.InitialRequestTime,
480 in_msg.ForwardRequestTime,
481 in_msg.ProbeRequestStartTime);
482 } else {
483 MachineType cc_mach_type := getCoherenceType(machineID,
484 in_msg.Sender);
485 coalescer.readCallback(address,
486 cc_mach_type,
487 cache_entry.DataBlk,
488 in_msg.InitialRequestTime,
489 in_msg.ForwardRequestTime,
490 in_msg.ProbeRequestStartTime);
491 }
492 }
493 }
494
495 action(s_storeDone, "s", desc="local store done") {
496 assert(is_valid(cache_entry));
497 if (use_seq_not_coal) {
498 coalescer.recordCPWriteCallBack(machineID, machineID);
499 sequencer.writeCallback(address, cache_entry.DataBlk,
500 false, MachineType:TCP);
501 } else {
502 coalescer.writeCallback(address, MachineType:TCP, cache_entry.DataBlk);
503 }
504 cache_entry.Dirty := true;
505 }
506
507 action(xs_storeDone, "xs", desc="remote store done") {
508 peek(responseToTCP_in, ResponseMsg) {
509 assert(is_valid(cache_entry));
510 if (use_seq_not_coal) {
511 coalescer.recordCPWriteCallBack(machineID, in_msg.Sender);
512 sequencer.writeCallback(address,
513 cache_entry.DataBlk,
514 false,
515 machineIDToMachineType(in_msg.Sender),
516 in_msg.InitialRequestTime,
517 in_msg.ForwardRequestTime,
518 in_msg.ProbeRequestStartTime);
519 } else {
520 MachineType cc_mach_type := getCoherenceType(machineID,
521 in_msg.Sender);
522 coalescer.writeCallback(address,
523 cc_mach_type,
524 cache_entry.DataBlk,
525 in_msg.InitialRequestTime,
526 in_msg.ForwardRequestTime,
527 in_msg.ProbeRequestStartTime);
528 }
529 cache_entry.Dirty := true;
530 }
531 }
532
533 action(w_writeCache, "w", desc="write data to cache") {
534 peek(responseToTCP_in, ResponseMsg) {
535 assert(is_valid(cache_entry));
536 cache_entry.DataBlk := in_msg.DataBlk;
537 cache_entry.Dirty := in_msg.Dirty;
538 }
539 }
540
541 action(ss_sendStaleNotification, "ss", desc="stale data; nothing to writeback") {
542 peek(responseToTCP_in, ResponseMsg) {
543 enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
544 out_msg.addr := address;
545 out_msg.Type := CoherenceResponseType:StaleNotif;
546 out_msg.Sender := machineID;
547 out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
548 TCC_select_low_bit, TCC_select_num_bits));
549 out_msg.MessageSize := MessageSizeType:Response_Control;
550 DPRINTF(RubySlicc, "%s\n", out_msg);
551 }
552 }
553 }
554
555 action(wb_data, "wb", desc="write back data") {
556 peek(responseToTCP_in, ResponseMsg) {
557 enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
558 out_msg.addr := address;
559 out_msg.Type := CoherenceResponseType:CPUData;
560 out_msg.Sender := machineID;
561 out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
562 TCC_select_low_bit, TCC_select_num_bits));
563 out_msg.DataBlk := tbe.DataBlk;
564 out_msg.Dirty := tbe.Dirty;
565 if (tbe.Shared) {
566 out_msg.NbReqShared := true;
567 } else {
568 out_msg.NbReqShared := false;
569 }
570 out_msg.State := CoherenceState:Shared; // faux info
571 out_msg.MessageSize := MessageSizeType:Writeback_Data;
572 DPRINTF(RubySlicc, "%s\n", out_msg);
573 }
574 }
575 }
576
577 action(piu_sendProbeResponseInvUntransferredOwnership, "piu", desc="send probe ack inv, no data, retain ownership") {
578 enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
579 out_msg.addr := address;
580 out_msg.Type := CoherenceResponseType:CPUPrbResp; // TCC, L3 respond in same way to probes
581 out_msg.Sender := machineID;
582 // will this always be ok? probably not for multisocket
583 out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
584 TCC_select_low_bit, TCC_select_num_bits));
585 out_msg.Dirty := false;
586 out_msg.Hit := false;
587 out_msg.Ntsl := true;
588 out_msg.State := CoherenceState:NA;
589 out_msg.UntransferredOwner :=true;
590 out_msg.MessageSize := MessageSizeType:Response_Control;
591 }
592 }
593
594 action(pi_sendProbeResponseInv, "pi", desc="send probe ack inv, no data") {
595 enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
596 out_msg.addr := address;
597 out_msg.Type := CoherenceResponseType:CPUPrbResp; // TCC, L3 respond in same way to probes
598 out_msg.Sender := machineID;
599 out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
600 TCC_select_low_bit, TCC_select_num_bits));
601 out_msg.Dirty := false;
602 out_msg.Hit := false;
603 out_msg.Ntsl := true;
604 out_msg.State := CoherenceState:NA;
605 out_msg.MessageSize := MessageSizeType:Response_Control;
606 out_msg.isValid := isValid(address);
607 }
608 }
609
610 action(pim_sendProbeResponseInvMs, "pim", desc="send probe ack inv, no data") {
611 enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
612 out_msg.addr := address;
613 out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and TCC respond in same way to probes
614 out_msg.Sender := machineID;
615 out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
616 TCC_select_low_bit, TCC_select_num_bits));
617 out_msg.Dirty := false;
618 out_msg.Ntsl := true;
619 out_msg.Hit := false;
620 out_msg.State := CoherenceState:NA;
621 out_msg.MessageSize := MessageSizeType:Response_Control;
622 out_msg.isValid := isValid(address);
623 }
624 }
625
626 action(prm_sendProbeResponseMiss, "prm", desc="send probe ack PrbShrData, no data") {
627 enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
628 out_msg.addr := address;
629 out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and TCC respond in same way to probes
630 out_msg.Sender := machineID;
631 out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
632 TCC_select_low_bit, TCC_select_num_bits));
633 out_msg.Dirty := false; // only true if sending back data i think
634 out_msg.Hit := false;
635 out_msg.Ntsl := false;
636 out_msg.State := CoherenceState:NA;
637 out_msg.MessageSize := MessageSizeType:Response_Control;
638 out_msg.isValid := isValid(address);
639 }
640 }
641
642 action(pd_sendProbeResponseData, "pd", desc="send probe ack, with data") {
643 enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
644 assert(is_valid(cache_entry) || is_valid(tbe));
645 out_msg.addr := address;
646 out_msg.Type := CoherenceResponseType:CPUPrbResp;
647 out_msg.Sender := machineID;
648 out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
649 TCC_select_low_bit, TCC_select_num_bits));
650 out_msg.DataBlk := getDataBlock(address);
651 if (is_valid(tbe)) {
652 out_msg.Dirty := tbe.Dirty;
653 } else {
654 out_msg.Dirty := cache_entry.Dirty;
655 }
656 out_msg.Hit := true;
657 out_msg.State := CoherenceState:NA;
658 out_msg.MessageSize := MessageSizeType:Response_Data;
659 out_msg.isValid := isValid(address);
660 APPEND_TRANSITION_COMMENT("Sending ack with dirty ");
661 APPEND_TRANSITION_COMMENT(out_msg.Dirty);
662 }
663 }
664
665 action(pdm_sendProbeResponseDataMs, "pdm", desc="send probe ack, with data") {
666 enqueue(responseNetwork_out, ResponseMsg, issue_latency) {
667 assert(is_valid(cache_entry) || is_valid(tbe));
668 assert(is_valid(cache_entry));
669 out_msg.addr := address;
670 out_msg.Type := CoherenceResponseType:CPUPrbResp;
671 out_msg.Sender := machineID;
672 out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
673 TCC_select_low_bit, TCC_select_num_bits));
674 out_msg.DataBlk := getDataBlock(address);
675 if (is_valid(tbe)) {
676 out_msg.Dirty := tbe.Dirty;
677 } else {
678 out_msg.Dirty := cache_entry.Dirty;
679 }
680 out_msg.Hit := true;
681 out_msg.State := CoherenceState:NA;
682 out_msg.MessageSize := MessageSizeType:Response_Data;
683 out_msg.isValid := isValid(address);
684 APPEND_TRANSITION_COMMENT("Sending ack with dirty ");
685 APPEND_TRANSITION_COMMENT(out_msg.Dirty);
686 DPRINTF(RubySlicc, "Data is %s\n", out_msg.DataBlk);
687 }
688 }
689
690 action(sf_setSharedFlip, "sf", desc="hit by shared probe, status may be different") {
691 assert(is_valid(tbe));
692 tbe.Shared := true;
693 }
694
695 action(mru_updateMRU, "mru", desc="Touch block for replacement policy") {
696 L1cache.setMRU(address);
697 }
698
699 action(uu_sendUnblock, "uu", desc="state changed, unblock") {
700 enqueue(unblockNetwork_out, UnblockMsg, issue_latency) {
701 out_msg.addr := address;
702 out_msg.Sender := machineID;
703 out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir,
704 TCC_select_low_bit, TCC_select_num_bits));
705 out_msg.MessageSize := MessageSizeType:Unblock_Control;
706 out_msg.wasValid := isValid(address);
707 DPRINTF(RubySlicc, "%s\n", out_msg);
708 }
709 }
710
711 action(yy_recycleProbeQueue, "yy", desc="recycle probe queue") {
712 probeNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
713 }
714
715 action(zz_recycleMandatoryQueue, "\z", desc="recycle mandatory queue") {
716 mandatoryQueue_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
717 }
718
719 // Transitions
720
721 // transitions from base
722 transition(I, Load, I_ES) {TagArrayRead} {
723 a_allocate;
724 n_issueRdBlk;
725 p_popMandatoryQueue;
726 }
727
728 transition(I, Store, I_M) {TagArrayRead, TagArrayWrite} {
729 a_allocate;
730 nM_issueRdBlkM;
731 p_popMandatoryQueue;
732 }
733
734 transition(S, Store, S_M) {TagArrayRead} {
735 mru_updateMRU;
736 nM_issueRdBlkM;
737 p_popMandatoryQueue;
738 }
739
740 transition(E, Store, M) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
741 mru_updateMRU;
742 s_storeDone;
743 p_popMandatoryQueue;
744 }
745
746 transition(O, Store, O_M) {TagArrayRead, DataArrayWrite} {
747 mru_updateMRU;
748 nM_issueRdBlkM;
749 p_popMandatoryQueue;
750 }
751
752 transition(M, Store) {TagArrayRead, DataArrayWrite} {
753 mru_updateMRU;
754 s_storeDone;
755 p_popMandatoryQueue;
756 }
757
758 // simple hit transitions
759 transition({S, E, O, M}, Load) {TagArrayRead, DataArrayRead} {
760 l_loadDone;
761 mru_updateMRU;
762 p_popMandatoryQueue;
763 }
764
765 // recycles from transients
766 transition({I_M, I_ES, ES_I, MO_I, S_M, O_M, MO_PI, I_C}, {Load, Store, Repl}) {} {
767 zz_recycleMandatoryQueue;
768 }
769
770 transition({S, E}, Repl, ES_I) {TagArrayRead} {
771 t_allocateTBE;
772 vc_victim;
773 ic_invCache;
774 }
775
776 transition({O, M}, Repl, MO_I) {TagArrayRead, DataArrayRead} {
777 t_allocateTBE;
778 vd_victim;
779 ic_invCache;
780 }
781
782 // TD event transitions
783 transition(I_M, {TCC_AckM, TCC_AckCtoD}, M) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
784 w_writeCache;
785 xs_storeDone;
786 uu_sendUnblock;
787 pr_popResponseQueue;
788 }
789
790 transition(I_ES, TCC_AckS, S) {TagArrayWrite, DataArrayWrite} {
791 w_writeCache;
792 xl_loadDone;
793 uu_sendUnblock;
794 pr_popResponseQueue;
795 }
796
797 transition(I_ES, TCC_AckE, E) {TagArrayWrite, DataArrayWrite} {
798 w_writeCache;
799 xl_loadDone;
800 uu_sendUnblock;
801 pr_popResponseQueue;
802 }
803
804 transition({S_M, O_M}, TCC_AckM, M) {TagArrayWrite, DataArrayWrite} {
805 xs_storeDone;
806 uu_sendUnblock;
807 pr_popResponseQueue;
808 }
809
810 transition({MO_I, ES_I}, TCC_NackWB, I){TagArrayWrite} {
811 d_deallocateTBE;
812 pr_popResponseQueue;
813 }
814
815 transition({MO_I, ES_I}, TCC_AckWB, I) {TagArrayWrite, DataArrayRead} {
816 wb_data;
817 d_deallocateTBE;
818 pr_popResponseQueue;
819 }
820
821 transition(I_C, TCC_AckWB, I) {TagArrayWrite} {
822 ss_sendStaleNotification;
823 d_deallocateTBE;
824 pr_popResponseQueue;
825 }
826
827 transition(I_C, TCC_NackWB, I) {TagArrayWrite} {
828 d_deallocateTBE;
829 pr_popResponseQueue;
830 }
831
832 // Probe transitions
833 transition({M, O}, PrbInvData, I) {TagArrayRead, TagArrayWrite} {
834 pd_sendProbeResponseData;
835 ic_invCache;
836 pp_popProbeQueue;
837 }
838
839 transition(I, PrbInvData) {TagArrayRead, TagArrayWrite} {
840 prm_sendProbeResponseMiss;
841 pp_popProbeQueue;
842 }
843
844 transition({E, S}, PrbInvData, I) {TagArrayRead, TagArrayWrite} {
845 pd_sendProbeResponseData;
846 ic_invCache;
847 pp_popProbeQueue;
848 }
849
850 transition(I_C, PrbInvData, I_C) {} {
851 pi_sendProbeResponseInv;
852 ic_invCache;
853 pp_popProbeQueue;
854 }
855
856 // Needed for TCC-based protocols. Must hold on to ownership till transfer complete
857 transition({M, O}, LocalPrbInv, MO_PI){TagArrayRead, TagArrayWrite} {
858 piu_sendProbeResponseInvUntransferredOwnership;
859 pp_popProbeQueue;
860 }
861
862 // If there is a race and we see a probe invalidate, handle normally.
863 transition(MO_PI, PrbInvData, I){TagArrayWrite} {
864 pd_sendProbeResponseData;
865 ic_invCache;
866 pp_popProbeQueue;
867 }
868
869 transition(MO_PI, PrbInv, I){TagArrayWrite} {
870 pi_sendProbeResponseInv;
871 ic_invCache;
872 pp_popProbeQueue;
873 }
874
875 // normal exit when ownership is successfully transferred
876 transition(MO_PI, TCC_AckCtoD, I) {TagArrayWrite} {
877 ic_invCache;
878 pr_popResponseQueue;
879 }
880
881 transition({M, O, E, S, I}, PrbInv, I) {TagArrayRead, TagArrayWrite} {
882 pi_sendProbeResponseInv;
883 ic_invCache;
884 pp_popProbeQueue;
885 }
886
887 transition({E, S, I}, LocalPrbInv, I){TagArrayRead, TagArrayWrite} {
888 pi_sendProbeResponseInv;
889 ic_invCache;
890 pp_popProbeQueue;
891 }
892
893
894 transition({M, E, O}, PrbShrData, O) {TagArrayRead, TagArrayWrite, DataArrayRead} {
895 pd_sendProbeResponseData;
896 pp_popProbeQueue;
897 }
898
899 transition(MO_PI, PrbShrData) {DataArrayRead} {
900 pd_sendProbeResponseData;
901 pp_popProbeQueue;
902 }
903
904
905 transition(S, PrbShrData, S) {TagArrayRead, DataArrayRead} {
906 pd_sendProbeResponseData;
907 pp_popProbeQueue;
908 }
909
910 transition({I, I_C}, PrbShrData) {TagArrayRead} {
911 prm_sendProbeResponseMiss;
912 pp_popProbeQueue;
913 }
914
915 transition(I_C, PrbInv, I_C) {} {
916 pi_sendProbeResponseInv;
917 ic_invCache;
918 pp_popProbeQueue;
919 }
920
921 transition({I_M, I_ES}, {PrbInv, PrbInvData}){TagArrayRead} {
922 pi_sendProbeResponseInv;
923 ic_invCache;
924 a_allocate; // but make sure there is room for incoming data when it arrives
925 pp_popProbeQueue;
926 }
927
928 transition({I_M, I_ES}, PrbShrData) {} {
929 prm_sendProbeResponseMiss;
930 pp_popProbeQueue;
931 }
932
933 transition(S_M, PrbInvData, I_M) {TagArrayRead} {
934 pim_sendProbeResponseInvMs;
935 ic_invCache;
936 a_allocate;
937 pp_popProbeQueue;
938 }
939
940 transition(O_M, PrbInvData, I_M) {TagArrayRead,DataArrayRead} {
941 pdm_sendProbeResponseDataMs;
942 ic_invCache;
943 a_allocate;
944 pp_popProbeQueue;
945 }
946
947 transition({S_M, O_M}, {PrbInv}, I_M) {TagArrayRead} {
948 pim_sendProbeResponseInvMs;
949 ic_invCache;
950 a_allocate;
951 pp_popProbeQueue;
952 }
953
954 transition(S_M, {LocalPrbInv}, I_M) {TagArrayRead} {
955 pim_sendProbeResponseInvMs;
956 ic_invCache;
957 a_allocate;
958 pp_popProbeQueue;
959 }
960
961 transition(O_M, LocalPrbInv, I_M) {TagArrayRead} {
962 piu_sendProbeResponseInvUntransferredOwnership;
963 ic_invCache;
964 a_allocate;
965 pp_popProbeQueue;
966 }
967
968 transition({S_M, O_M}, PrbShrData) {DataArrayRead} {
969 pd_sendProbeResponseData;
970 pp_popProbeQueue;
971 }
972
973 transition(ES_I, PrbInvData, I_C){
974 pd_sendProbeResponseData;
975 ic_invCache;
976 pp_popProbeQueue;
977 }
978
979 transition(MO_I, PrbInvData, I_C) {DataArrayRead} {
980 pd_sendProbeResponseData;
981 ic_invCache;
982 pp_popProbeQueue;
983 }
984
985 transition(MO_I, PrbInv, I_C) {
986 pi_sendProbeResponseInv;
987 ic_invCache;
988 pp_popProbeQueue;
989 }
990
991 transition(ES_I, PrbInv, I_C) {
992 pi_sendProbeResponseInv;
993 ic_invCache;
994 pp_popProbeQueue;
995 }
996
997 transition(ES_I, PrbShrData, ES_I) {DataArrayRead} {
998 pd_sendProbeResponseData;
999 sf_setSharedFlip;
1000 pp_popProbeQueue;
1001 }
1002
1003 transition(MO_I, PrbShrData, MO_I) {DataArrayRead} {
1004 pd_sendProbeResponseData;
1005 sf_setSharedFlip;
1006 pp_popProbeQueue;
1007 }
1008
1009 }