2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
5 * For use for simulation and test purposes only
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
17 * 3. Neither the name of the copyright holder nor the names of its
18 * contributors may be used to endorse or promote products derived from this
19 * software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
33 * Author: Blake Hechtman
36 machine(MachineType:TCP, "GPU TCP (L1 Data Cache)")
37 : VIPERCoalescer* coalescer;
39 bool use_seq_not_coal;
40 CacheMemory * L1cache;
41 bool WB; /*is this cache Writeback?*/
42 bool disableL1; /* bypass L1 cache? */
43 int TCC_select_num_bits;
44 Cycles issue_latency := 40; // time to send data down to TCC
45 Cycles l2_hit_latency := 18;
47 MessageBuffer * requestFromTCP, network="To", virtual_network="1", vnet_type="request";
48 MessageBuffer * responseFromTCP, network="To", virtual_network="3", vnet_type="response";
49 MessageBuffer * unblockFromCore, network="To", virtual_network="5", vnet_type="unblock";
51 MessageBuffer * probeToTCP, network="From", virtual_network="1", vnet_type="request";
52 MessageBuffer * responseToTCP, network="From", virtual_network="3", vnet_type="response";
53 MessageBuffer * mandatoryQueue;
56 state_declaration(State, desc="TCP Cache States", default="TCP_State_I") {
57 I, AccessPermission:Invalid, desc="Invalid";
58 V, AccessPermission:Read_Only, desc="Valid";
59 W, AccessPermission:Read_Write, desc="Written";
60 M, AccessPermission:Read_Write, desc="Written and Valid";
61 L, AccessPermission:Read_Write, desc="Local access is modifable";
62 A, AccessPermission:Invalid, desc="Waiting on Atomic";
65 enumeration(Event, desc="TCP Events") {
68 Store, desc="Store to L1 (L1 is dirty)";
69 StoreThrough, desc="Store directly to L2(L1 is clean)";
70 StoreLocal, desc="Store to L1 but L1 is clean";
71 Atomic, desc="Atomic";
72 Flush, desc="Flush if dirty(wbL1 for Store Release)";
73 Evict, desc="Evict if clean(invL1 for Load Acquire)";
75 Repl, desc="Replacing block from cache";
78 TCC_Ack, desc="TCC Ack to Core Request";
79 TCC_AckWB, desc="TCC Ack for WB";
81 Bypass, desc="Bypass the entire L1 cache";
84 enumeration(RequestType,
85 desc="To communicate stats from transitions to recordStats") {
86 DataArrayRead, desc="Read the data array";
87 DataArrayWrite, desc="Write the data array";
88 TagArrayRead, desc="Read the data array";
89 TagArrayWrite, desc="Write the data array";
90 TagArrayFlash, desc="Flash clear the data array";
94 structure(Entry, desc="...", interface="AbstractCacheEntry") {
95 State CacheState, desc="cache state";
96 bool Dirty, desc="Is the data dirty (diff than memory)?";
97 DataBlock DataBlk, desc="data for the block";
98 bool FromL2, default="false", desc="block just moved from L2";
99 WriteMask writeMask, desc="written bytes masks";
102 structure(TBE, desc="...") {
103 State TBEState, desc="Transient state";
104 DataBlock DataBlk, desc="data for the block, required for concurrent writebacks";
105 bool Dirty, desc="Is the data dirty (different than memory)?";
106 int NumPendingMsgs,desc="Number of acks/data messages that this processor is waiting for";
107 bool Shared, desc="Victim hit by shared probe";
110 structure(TBETable, external="yes") {
113 void deallocate(Addr);
114 bool isPresent(Addr);
117 TBETable TBEs, template="<TCP_TBE>", constructor="m_number_of_TBEs";
118 int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()";
119 int WTcnt, default="0";
120 int Fcnt, default="0";
121 bool inFlush, default="false";
123 void set_cache_entry(AbstractCacheEntry b);
124 void unset_cache_entry();
127 void wakeUpAllBuffers();
128 void wakeUpBuffers(Addr a);
131 // Internal functions
133 Tick cyclesToTicks(Cycles c);
134 Entry getCacheEntry(Addr address), return_by_pointer="yes" {
135 Entry cache_entry := static_cast(Entry, "pointer", L1cache.lookup(address));
139 DataBlock getDataBlock(Addr addr), return_by_ref="yes" {
140 TBE tbe := TBEs.lookup(addr);
144 return getCacheEntry(addr).DataBlk;
148 State getState(TBE tbe, Entry cache_entry, Addr addr) {
151 } else if (is_valid(cache_entry)) {
152 return cache_entry.CacheState;
157 void setState(TBE tbe, Entry cache_entry, Addr addr, State state) {
159 tbe.TBEState := state;
162 if (is_valid(cache_entry)) {
163 cache_entry.CacheState := state;
167 void functionalRead(Addr addr, Packet *pkt) {
168 TBE tbe := TBEs.lookup(addr);
170 testAndRead(addr, tbe.DataBlk, pkt);
172 functionalMemoryRead(pkt);
176 int functionalWrite(Addr addr, Packet *pkt) {
177 int num_functional_writes := 0;
179 TBE tbe := TBEs.lookup(addr);
181 num_functional_writes := num_functional_writes +
182 testAndWrite(addr, tbe.DataBlk, pkt);
185 num_functional_writes := num_functional_writes +
186 functionalMemoryWrite(pkt);
187 return num_functional_writes;
190 AccessPermission getAccessPermission(Addr addr) {
191 TBE tbe := TBEs.lookup(addr);
193 return TCP_State_to_permission(tbe.TBEState);
196 Entry cache_entry := getCacheEntry(addr);
197 if(is_valid(cache_entry)) {
198 return TCP_State_to_permission(cache_entry.CacheState);
201 return AccessPermission:NotPresent;
204 bool isValid(Addr addr) {
205 AccessPermission perm := getAccessPermission(addr);
206 if (perm == AccessPermission:NotPresent ||
207 perm == AccessPermission:Invalid ||
208 perm == AccessPermission:Busy) {
215 void setAccessPermission(Entry cache_entry, Addr addr, State state) {
216 if (is_valid(cache_entry)) {
217 cache_entry.changePermission(TCP_State_to_permission(state));
221 void recordRequestType(RequestType request_type, Addr addr) {
222 if (request_type == RequestType:DataArrayRead) {
223 L1cache.recordRequestType(CacheRequestType:DataArrayRead, addr);
224 } else if (request_type == RequestType:DataArrayWrite) {
225 L1cache.recordRequestType(CacheRequestType:DataArrayWrite, addr);
226 } else if (request_type == RequestType:TagArrayRead) {
227 L1cache.recordRequestType(CacheRequestType:TagArrayRead, addr);
228 } else if (request_type == RequestType:TagArrayFlash) {
229 L1cache.recordRequestType(CacheRequestType:TagArrayRead, addr);
230 } else if (request_type == RequestType:TagArrayWrite) {
231 L1cache.recordRequestType(CacheRequestType:TagArrayWrite, addr);
235 bool checkResourceAvailable(RequestType request_type, Addr addr) {
236 if (request_type == RequestType:DataArrayRead) {
237 return L1cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
238 } else if (request_type == RequestType:DataArrayWrite) {
239 return L1cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
240 } else if (request_type == RequestType:TagArrayRead) {
241 return L1cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
242 } else if (request_type == RequestType:TagArrayWrite) {
243 return L1cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
244 } else if (request_type == RequestType:TagArrayFlash) {
245 // FIXME should check once per cache, rather than once per cacheline
246 return L1cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
248 error("Invalid RequestType type in checkResourceAvailable");
255 out_port(requestNetwork_out, CPURequestMsg, requestFromTCP);
259 in_port(responseToTCP_in, ResponseMsg, responseToTCP) {
260 if (responseToTCP_in.isReady(clockEdge())) {
261 peek(responseToTCP_in, ResponseMsg, block_on="addr") {
262 Entry cache_entry := getCacheEntry(in_msg.addr);
263 TBE tbe := TBEs.lookup(in_msg.addr);
264 if (in_msg.Type == CoherenceResponseType:TDSysResp) {
267 trigger(Event:Bypass, in_msg.addr, cache_entry, tbe);
269 if (is_valid(cache_entry) || L1cache.cacheAvail(in_msg.addr)) {
270 trigger(Event:TCC_Ack, in_msg.addr, cache_entry, tbe);
272 Addr victim := L1cache.cacheProbe(in_msg.addr);
273 trigger(Event:Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
276 } else if (in_msg.Type == CoherenceResponseType:TDSysWBAck ||
277 in_msg.Type == CoherenceResponseType:NBSysWBAck) {
278 trigger(Event:TCC_AckWB, in_msg.addr, cache_entry, tbe);
280 error("Unexpected Response Message to Core");
286 in_port(mandatoryQueue_in, RubyRequest, mandatoryQueue, desc="...") {
287 if (mandatoryQueue_in.isReady(clockEdge())) {
288 peek(mandatoryQueue_in, RubyRequest, block_on="LineAddress") {
289 Entry cache_entry := getCacheEntry(in_msg.LineAddress);
290 TBE tbe := TBEs.lookup(in_msg.LineAddress);
291 DPRINTF(RubySlicc, "%s\n", in_msg);
292 if (in_msg.Type == RubyRequestType:LD) {
293 trigger(Event:Load, in_msg.LineAddress, cache_entry, tbe);
294 } else if (in_msg.Type == RubyRequestType:ATOMIC) {
295 trigger(Event:Atomic, in_msg.LineAddress, cache_entry, tbe);
296 } else if (in_msg.Type == RubyRequestType:ST) {
298 trigger(Event:StoreThrough, in_msg.LineAddress, cache_entry, tbe);
300 if (is_valid(cache_entry) || L1cache.cacheAvail(in_msg.LineAddress)) {
301 if (in_msg.segment == HSASegment:SPILL) {
302 trigger(Event:StoreLocal, in_msg.LineAddress, cache_entry, tbe);
304 trigger(Event:Store, in_msg.LineAddress, cache_entry, tbe);
306 trigger(Event:StoreThrough, in_msg.LineAddress, cache_entry, tbe);
309 Addr victim := L1cache.cacheProbe(in_msg.LineAddress);
310 trigger(Event:Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
312 } // end if (disableL1)
313 } else if (in_msg.Type == RubyRequestType:FLUSH) {
314 trigger(Event:Flush, in_msg.LineAddress, cache_entry, tbe);
315 } else if (in_msg.Type == RubyRequestType:REPLACEMENT){
316 trigger(Event:Evict, in_msg.LineAddress, cache_entry, tbe);
318 error("Unexpected Request Message from VIC");
319 if (is_valid(cache_entry) || L1cache.cacheAvail(in_msg.LineAddress)) {
321 trigger(Event:Store, in_msg.LineAddress, cache_entry, tbe);
323 trigger(Event:StoreThrough, in_msg.LineAddress, cache_entry, tbe);
326 Addr victim := L1cache.cacheProbe(in_msg.LineAddress);
327 trigger(Event:Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
336 action(ic_invCache, "ic", desc="invalidate cache") {
337 if(is_valid(cache_entry)) {
338 cache_entry.writeMask.clear();
339 L1cache.deallocate(address);
344 action(n_issueRdBlk, "n", desc="Issue RdBlk") {
345 enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
346 out_msg.addr := address;
347 out_msg.Type := CoherenceRequestType:RdBlk;
348 out_msg.Requestor := machineID;
349 out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
350 TCC_select_low_bit, TCC_select_num_bits));
351 out_msg.MessageSize := MessageSizeType:Request_Control;
352 out_msg.InitialRequestTime := curCycle();
356 action(rb_bypassDone, "rb", desc="bypass L1 of read access") {
357 peek(responseToTCP_in, ResponseMsg) {
358 DataBlock tmp:= in_msg.DataBlk;
359 if (use_seq_not_coal) {
360 sequencer.readCallback(address, tmp, false, MachineType:L1Cache);
362 coalescer.readCallback(address, MachineType:L1Cache, tmp);
364 if(is_valid(cache_entry)) {
370 action(wab_bypassDone, "wab", desc="bypass L1 of write access") {
371 peek(responseToTCP_in, ResponseMsg) {
372 DataBlock tmp := in_msg.DataBlk;
373 if (use_seq_not_coal) {
374 sequencer.writeCallback(address, tmp, false, MachineType:L1Cache);
376 coalescer.writeCallback(address, MachineType:L1Cache, tmp);
381 action(norl_issueRdBlkOrloadDone, "norl", desc="local load done") {
382 peek(mandatoryQueue_in, RubyRequest){
383 if (cache_entry.writeMask.cmpMask(in_msg.writeMask)) {
384 if (use_seq_not_coal) {
385 sequencer.readCallback(address, cache_entry.DataBlk, false, MachineType:L1Cache);
387 coalescer.readCallback(address, MachineType:L1Cache, cache_entry.DataBlk);
390 enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
391 out_msg.addr := address;
392 out_msg.Type := CoherenceRequestType:RdBlk;
393 out_msg.Requestor := machineID;
394 out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
395 TCC_select_low_bit, TCC_select_num_bits));
396 out_msg.MessageSize := MessageSizeType:Request_Control;
397 out_msg.InitialRequestTime := curCycle();
403 action(wt_writeThrough, "wt", desc="Flush dirty data") {
405 APPEND_TRANSITION_COMMENT("write++ = ");
406 APPEND_TRANSITION_COMMENT(WTcnt);
407 enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
408 out_msg.addr := address;
409 out_msg.Requestor := machineID;
410 assert(is_valid(cache_entry));
411 out_msg.DataBlk := cache_entry.DataBlk;
412 out_msg.writeMask.clear();
413 out_msg.writeMask.orMask(cache_entry.writeMask);
414 out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
415 TCC_select_low_bit, TCC_select_num_bits));
416 out_msg.MessageSize := MessageSizeType:Data;
417 out_msg.Type := CoherenceRequestType:WriteThrough;
418 out_msg.InitialRequestTime := curCycle();
419 out_msg.Shared := false;
423 action(at_atomicThrough, "at", desc="send Atomic") {
424 peek(mandatoryQueue_in, RubyRequest) {
425 enqueue(requestNetwork_out, CPURequestMsg, issue_latency) {
426 out_msg.addr := address;
427 out_msg.Requestor := machineID;
428 out_msg.writeMask.clear();
429 out_msg.writeMask.orMask(in_msg.writeMask);
430 out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
431 TCC_select_low_bit, TCC_select_num_bits));
432 out_msg.MessageSize := MessageSizeType:Data;
433 out_msg.Type := CoherenceRequestType:Atomic;
434 out_msg.InitialRequestTime := curCycle();
435 out_msg.Shared := false;
440 action(a_allocate, "a", desc="allocate block") {
441 if (is_invalid(cache_entry)) {
442 set_cache_entry(L1cache.allocate(address, new Entry));
444 cache_entry.writeMask.clear();
447 action(t_allocateTBE, "t", desc="allocate TBE Entry") {
448 check_allocate(TBEs);
449 TBEs.allocate(address);
450 set_tbe(TBEs.lookup(address));
453 action(d_deallocateTBE, "d", desc="Deallocate TBE") {
454 TBEs.deallocate(address);
458 action(sf_setFlush, "sf", desc="set flush") {
460 APPEND_TRANSITION_COMMENT(" inFlush is true");
463 action(p_popMandatoryQueue, "pm", desc="Pop Mandatory Queue") {
464 mandatoryQueue_in.dequeue(clockEdge());
467 action(pr_popResponseQueue, "pr", desc="Pop Response Queue") {
468 responseToTCP_in.dequeue(clockEdge());
471 action(l_loadDone, "l", desc="local load done") {
472 assert(is_valid(cache_entry));
473 if (use_seq_not_coal) {
474 sequencer.readCallback(address, cache_entry.DataBlk, false, MachineType:L1Cache);
476 coalescer.readCallback(address, MachineType:L1Cache, cache_entry.DataBlk);
480 action(s_storeDone, "s", desc="local store done") {
481 assert(is_valid(cache_entry));
483 if (use_seq_not_coal) {
484 sequencer.writeCallback(address, cache_entry.DataBlk, false, MachineType:L1Cache);
486 coalescer.writeCallback(address, MachineType:L1Cache, cache_entry.DataBlk);
488 cache_entry.Dirty := true;
491 action(inv_invDone, "inv", desc="local inv done") {
492 if (use_seq_not_coal) {
493 DPRINTF(RubySlicc, "Sequencer does not define invCallback!\n");
496 coalescer.invCallback(address);
500 action(wb_wbDone, "wb", desc="local wb done") {
501 if (inFlush == true) {
504 if (use_seq_not_coal) {
505 DPRINTF(RubySlicc, "Sequencer does not define wbCallback!\n");
508 coalescer.wbCallback(address);
512 if (WTcnt == 0 && Fcnt == 0) {
514 APPEND_TRANSITION_COMMENT(" inFlush is false");
519 action(wd_wtDone, "wd", desc="writethrough done") {
521 if (inFlush == true) {
525 APPEND_TRANSITION_COMMENT("write-- = ");
526 APPEND_TRANSITION_COMMENT(WTcnt);
529 action(dw_dirtyWrite, "dw", desc="update write mask"){
530 peek(mandatoryQueue_in, RubyRequest) {
531 cache_entry.DataBlk.copyPartial(in_msg.WTData,in_msg.writeMask);
532 cache_entry.writeMask.orMask(in_msg.writeMask);
535 action(w_writeCache, "w", desc="write data to cache") {
536 peek(responseToTCP_in, ResponseMsg) {
537 assert(is_valid(cache_entry));
538 DataBlock tmp := in_msg.DataBlk;
539 tmp.copyPartial(cache_entry.DataBlk,cache_entry.writeMask);
540 cache_entry.DataBlk := tmp;
544 action(mru_updateMRU, "mru", desc="Touch block for replacement policy") {
545 L1cache.setMRU(address);
548 // action(zz_recycleMandatoryQueue, "\z", desc="recycle mandatory queue") {
549 // mandatoryQueue_in.recycle(clockEdge(), cyclesToTicks(recycle_latency));
552 action(z_stall, "z", desc="stall; built-in") {
557 // ArrayRead/Write assumptions:
558 // All requests read Tag Array
559 // TBE allocation write the TagArray to I
560 // TBE only checked on misses
561 // Stores will also write dirty bits in the tag
562 // WriteThroughs still need to use cache entry as staging buffer for wavefront
564 // Stalling transitions do NOT check the tag array...and if they do,
565 // they can cause a resource stall deadlock!
567 transition({A}, {Load, Store, Atomic, StoreThrough}) { //TagArrayRead} {
571 transition({M, V, L}, Load) {TagArrayRead, DataArrayRead} {
577 transition(I, Load) {TagArrayRead} {
582 transition({V, I}, Atomic, A) {TagArrayRead, TagArrayWrite} {
589 transition({M, W}, Atomic, A) {TagArrayRead, TagArrayWrite} {
596 transition(W, Load, I) {TagArrayRead, DataArrayRead} {
598 norl_issueRdBlkOrloadDone;
602 transition({I}, StoreLocal, L) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
609 transition({L, V}, StoreLocal, L) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
616 transition(I, Store, W) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
623 transition(V, Store, M) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
630 transition({M, W}, Store) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
637 //M,W should not see storeThrough
638 transition(I, StoreThrough) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
647 transition({V,L}, StoreThrough, I) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
655 transition(I, TCC_Ack, V) {TagArrayRead, TagArrayWrite, DataArrayRead, DataArrayWrite} {
662 transition(I, Bypass, I) {
667 transition(A, Bypass, I){
673 transition(A, TCC_Ack, I) {TagArrayRead, DataArrayRead, DataArrayWrite} {
682 transition(V, TCC_Ack, V) {TagArrayRead, DataArrayRead, DataArrayWrite} {
688 transition({W, M}, TCC_Ack, M) {TagArrayRead, TagArrayWrite, DataArrayRead, DataArrayWrite} {
694 transition({I, V}, Repl, I) {TagArrayRead, TagArrayWrite} {
698 transition({A}, Repl) {TagArrayRead, TagArrayWrite} {
702 transition({W, M}, Repl, I) {TagArrayRead, TagArrayWrite, DataArrayRead} {
707 transition(L, Repl, I) {TagArrayRead, TagArrayWrite, DataArrayRead} {
712 transition({W, M}, Flush, I) {TagArrayRead, TagArrayWrite, DataArrayRead} {
719 transition({V, I, A, L},Flush) {TagArrayFlash} {
725 transition({I, V}, Evict, I) {TagArrayFlash} {
731 transition({W, M}, Evict, W) {TagArrayFlash} {
736 transition({A, L}, Evict) {TagArrayFlash} {
741 // TCC_AckWB only snoops TBE
742 transition({V, I, A, M, W, L}, TCC_AckWB) {