add ClintBase
[shakti-core.git] / src / core / dcache_asic_generic.bsv
1 /*
2 Copyright (c) 2013, IIT Madras
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
6
7 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
8 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
9 * Neither the name of IIT Madras nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
10
11 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
12 ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
13 */
14 package dcache_asic;
15 /*===== Pacakge imports ===== */
16 import BRAMCore::*;
17 import FIFO::*;
18 import FIFOF::*;
19 import SpecialFIFOs::*;
20 import LFSR::*;
21 import ConfigReg::*;
22 import DReg::*;
23 import BUtils::*;
24 import MemoryMap::*;
25 import mem_config1::*;
26 /*===== project imports==== */
27 import defined_types::*;
28 `include "core_parameters.bsv"
29 import QuadMem::*;
30 /*========================= */
31
32
33 interface Ifc_dcache;
34 method Action virtual_address(Bit#(`VADDR) vaddress, Access_type load_store, Bit#(TMul#(`DCACHE_WORD_SIZE,8)) writedata, Bit#(3) transfer_size, Bit#(5) atomic_op, Bool signextend);
35 method Maybe#(Tuple3#(Bit#(`Reg_width), Trap_type,Bit#(`PERFMONITORS))) response_to_core;
36 method ActionValue#(To_Memory#(`PADDR)) read_request_to_memory;
37 method ActionValue#(To_Memory_Write) write_request_to_memory;
38 method Action read_response_from_memory(From_Memory#(`DCACHE_WORD_SIZE) resp);
39 method Action write_response_from_memory(From_Memory#(`DCACHE_WORD_SIZE) resp);
40 method Bool init_complete;
41 `ifdef MMU
42 method Action physical_address(Bit#(`PADDR) paddr, Trap_type exception);
43 `endif
44 endinterface
45
46 typedef enum {Idle,KeepPolling,Stall1,Initialize,ReadingCache,Fence,IOReadResp,IOWriteResp} DcacheState deriving (Bits,Eq,FShow);
47
48 (*synthesize*)
49 `ifdef MMU (*conflict_free ="virtual_address, physical_address"*) `endif
50 (*preempts="virtual_address,read_from_lbdata_into_hold_reg"*)
51 (*preempts="read_data_fromcache,read_from_lbdata_into_hold_reg"*)
52 module mkdcache(Ifc_dcache);
53 /* VAddr = [tag_bits|set_bits|word_bits|byte_bits] */
54 let byte_bits=valueOf(TLog#(`DCACHE_WORD_SIZE)); // number of bits to select a byte within a word. = 2
55 let word_bits=valueOf(TLog#(`DCACHE_BLOCK_SIZE)); // number of bits to select a word within a block. = 4
56 let set_bits=valueOf(TLog#(`DCACHE_SETS)); // number of bits to select a set from the cache. =
57 Reg#(Maybe#(Tuple2#(Bit#(1),Bit#(`PADDR)))) rg_lr_paddress<-mkReg(tagged Invalid);
58 function ActionValue#(Tuple3#(Maybe#(Bit#(1)),Bool, Bit#(TMul#(`DCACHE_WORD_SIZE,8)))) atomic_operation(Bit#(TMul#(`DCACHE_WORD_SIZE,8)) loaded_value, Bit#(TMul#(`DCACHE_WORD_SIZE,8)) rs2, Bit#(5) atomic_op, Bit#(`PADDR) addr);
59 return (
60 actionvalue
61 Bit#(TMul#(`DCACHE_WORD_SIZE,8)) atomic_result=rs2;
62 Bit#(TMul#(`DCACHE_WORD_SIZE,8)) op1;
63 Maybe#(Bit#(1)) sc_done=tagged Invalid;
64 if(atomic_op[4]==1)
65 op1=signExtend(loaded_value[31:0]);
66 else
67 op1=loaded_value;
68 Bit#(TMul#(`DCACHE_WORD_SIZE,8)) op2=(atomic_op[4]==1)?signExtend(rs2[31:0]):rs2;
69 Int#(TMul#(`DCACHE_WORD_SIZE,8)) s_op1=unpack(op1);
70 Int#(TMul#(`DCACHE_WORD_SIZE,8)) s_op2 = unpack(op2);
71 Bool store_result = True;
72 `ifdef verbose $display($time,"\tDCACHE: atomic instruction atomic op %b op1: %h op2: %h", atomic_op,op1,op2); `endif
73 case (atomic_op[3:0])
74 'b0011:atomic_result=op2;
75 'b0000: atomic_result= (op1+op2);
76 'b0010: atomic_result= (op1^op2);
77 'b0110: atomic_result= (op1&op2);
78 'b0100: atomic_result= (op1|op2);
79 'b1100: atomic_result= min(op1,op2);
80 'b1110: atomic_result= max(op1,op2);
81 'b1000: atomic_result= pack(min(s_op1,s_op2));
82 'b1010: atomic_result= pack(max(s_op1,s_op2));
83 'b0101: action begin
84 rg_lr_paddress <= tagged Valid tuple2(atomic_op[4],addr);
85 atomic_result=loaded_value; // LR
86 store_result = False;
87 end
88 endaction
89 'b0111: begin
90 atomic_result=rs2; // SC
91 sc_done = tagged Valid 1;
92 store_result = False;
93 `ifdef verbose $display($time,"\tDCACHE: store condition instruction"); `endif
94 if(rg_lr_paddress matches tagged Valid .lr) begin
95 let {x,y} = lr;
96 if(x==atomic_op[4] && addr== y) begin
97 `ifdef verbose $display($time,"\tDCACHE: store condition satisfied"); `endif
98 sc_done = tagged Valid 0;
99 rg_lr_paddress <= tagged Invalid;
100 store_result = True;
101 end
102 end
103 end
104 default: atomic_result= op1;
105 endcase
106 if(atomic_op[4]==1)
107 atomic_result=duplicate(atomic_result[31:0]);
108
109 return tuple3(sc_done,store_result,atomic_result);
110 endactionvalue );
111 endfunction
112 // BRAM_DUAL_PORT#(Bit#(TLog#(`DCACHE_SETS)),Bit#(TAdd#(20,2))) tag [`DCACHE_WAYS];
113 // BRAM_DUAL_PORT_BE#(Bit#(TLog#(`DCACHE_SETS)),Bit#(TMul#(TMul#(8,`DCACHE_WORD_SIZE),`DCACHE_BLOCK_SIZE)),64) data [`DCACHE_WAYS];
114 Ifc_dcache_data data [`DCACHE_WAYS];
115 Ifc_dcache_tag tag [`DCACHE_WAYS];
116 for(Integer i=0;i<`DCACHE_WAYS;i=i+1)begin
117 tag[i] <- mkdcache_tag;
118 data[i] <-mkdcache_data;
119 end
120 Ifc_QuadMem lbdata <-mkQuadMem;
121
122 LFSR#(Bit#(2)) random_line<-mkRCounter(3); // for random line replacement
123 /* storage for requests from the cpu */
124 Reg#(Bool) rg_global_dirty[2] <-mkCReg(2,False);
125 Reg#(Bit#(`VADDR)) rg_vaddress<-mkReg(0);
126 Reg#(Bit#(3)) rg_transfer_size<-mkReg(0);
127 Reg#(Bit#(5)) rg_atomic_op<-mkReg(0);
128 Reg#(Bit#(TMul#(`DCACHE_WORD_SIZE,8))) rg_writedata<-mkReg(0);
129 Reg#(Access_type) rg_load_store<-mkReg(Load);
130 Reg#(Bit#(TMul#(`DCACHE_WORD_SIZE,`DCACHE_BLOCK_SIZE))) rg_writeenable<-mkReg(0);
131 Reg#(Bool) rg_signextend<-mkReg(False);
132 Reg#(Bool) update_data_from_lb[2]<-mkCReg(2,False);
133 Reg#(Bool) hold_data_from_lb<-mkDReg(False);
134 Reg#(Bit#(1)) lb_dirty <-mkReg(0);
135 /*=================================== */
136 /* storage for physical translation */
137 `ifdef MMU
138 Reg#(Bit#(`PADDR)) rg_paddress<-mkReg(0);
139 Reg#(Bool) rg_trnslte_done[2] <- mkCReg(2, `ifdef MMU False `else True `endif );
140 Reg#(Trap_type) rg_tlb_exception<-mkReg(tagged None);
141 `endif
142 /*==================================== */
143
144 Reg#(Bit#(`PERFMONITORS)) rg_perf_monitor<-mkReg(0);
145 Reg#(DcacheState) rg_state[3]<-mkCReg(3,Initialize); // this needs to be a CReg so that request can fire in the same cycle as response
146 Reg#(Bit#(TLog#(`DCACHE_SETS))) set_index <-mkReg(0);
147 Reg#(Bit#(TLog#(`DCACHE_WAYS))) way_index <-mkReg(0);
148 Reg#(Bit#(TMul#(`DCACHE_WORD_SIZE,`DCACHE_BLOCK_SIZE))) rg_we<-mkReg(0);
149 Reg#(Bit#(TMul#(`DCACHE_WORD_SIZE,`DCACHE_BLOCK_SIZE))) line_bytes_written<-mkReg(0);
150
151 Wire#(Maybe#(Tuple2#(Bit#(20),Bit#(TLog#(`DCACHE_SETS))))) wr_write_info<-mkDWire(tagged Invalid);
152 Wire#(Maybe#(Tuple3#(Bit#(`Reg_width), Trap_type, Bit#(`PERFMONITORS)))) wr_response_to_cpu<-mkDWire(tagged Invalid);
153 FIFOF#(To_Memory#(`PADDR)) ff_read_request_to_memory <-mkLFIFOF();
154 FIFOF#(To_Memory_Write) ff_write_request_to_memory <-mkLFIFOF();
155 FIFOF#(From_Memory#(`DCACHE_WORD_SIZE)) ff_read_response_from_memory <-mkSizedBypassFIFOF(1);
156 FIFOF#(From_Memory#(`DCACHE_WORD_SIZE)) ff_write_response_from_memory <-mkSizedBypassFIFOF(1);
157 FIFOF#(Tuple4#(Bit#(20),Bit#(TLog#(`DCACHE_SETS)),Bit#(TLog#(`DCACHE_WAYS)),Bit#(TMul#(`DCACHE_WORD_SIZE,`DCACHE_BLOCK_SIZE)))) memoperation <-mkUGSizedFIFOF(2);
158 Reg#(Bool) increment_counters <-mkReg(True);
159 Reg#(Bool) capture_counters <-mkDReg(False);
160 Reg#(Bool) pending_fence_write_response[2]<-mkCReg(2,False);
161
162 rule display_state;
163 `ifdef verbose $display($time,"\tDCACHE: state",fshow(rg_state[0])); `endif
164 endrule
165
166 /*====== Invalidate all the entries in the cache on startup or during Fence ==== */
167 rule fencing_the_cache(rg_state[0]==Initialize && !memoperation.notEmpty);
168 `ifdef verbose $display($time,"\tDCACHE: Initializing index: %d",set_index," ",fshow(rg_load_store)); `endif
169 for(Integer i=0;i<`DCACHE_WAYS;i=i+1)begin
170 tag[i].write_request(truncate(set_index),0);
171 end
172 if(set_index==fromInteger(`DCACHE_SETS-1)) begin
173 rg_state[0]<=Idle;
174 set_index<=0;
175 way_index<=0;
176 random_line.seed('d3);
177 rg_global_dirty[0]<=False;
178 rg_trnslte_done[1]<=False;
179 if(rg_load_store==Fence)
180 wr_response_to_cpu<= tagged Valid (tuple3(0,tagged None,0));
181 end
182 else
183 set_index<=set_index+1;
184 endrule
185 rule deq_write_response_during_fence(pending_fence_write_response[0]);
186 ff_write_response_from_memory.deq;
187 pending_fence_write_response[0]<=False;
188 endrule
189 /*=============================================================================== */
190 rule handle_fence(rg_state[0]==Fence &&!memoperation.notEmpty);
191 Bit#(20) tag_values=tag[way_index].read_response[20-1:0]; // hold the tag values
192 Bit#(1) dirty_value=tag[way_index].read_response[20+1]; // holds the dirty bits
193 Bit#(1) valid_value=tag[way_index].read_response[20]; // holds the dirty bits
194 Bit#(TMul#(8,TMul#(`DCACHE_WORD_SIZE,`DCACHE_BLOCK_SIZE))) data_values; // holds the cache lines.
195 Bit#(TAdd#(TLog#(`DCACHE_WORD_SIZE),TLog#(`DCACHE_BLOCK_SIZE))) p_offset =0;
196 data_values=data[way_index].read_response;
197
198 Bit#(`PADDR) write_addr={tag_values,truncate(set_index),p_offset};
199 `ifdef verbose $display($time,"\tDCACHE: Handling Fence.tag %h setindex: %d way_index: %d Dirty: %b Valid: %b",tag_values,set_index,way_index,dirty_value,valid_value); `endif
200 `ifdef verbose $display($time,"\tDCACHE: Fence addr: %h line: %h ",write_addr,data_values); `endif
201 Bit#(TLog#(`DCACHE_SETS)) new_set=set_index;
202 Bit#(TLog#(`DCACHE_SETS)) old_set=set_index;
203 if(!pending_fence_write_response[1])begin
204 if(dirty_value==1 && valid_value==1)begin // valid and dirty
205 ff_write_request_to_memory.enq(To_Memory_Write { // send the request to memory to
206 address:write_addr, data_line:data_values,
207 burst_length:`DCACHE_BLOCK_SIZE, transfer_size:3, ld_st:Store});
208 pending_fence_write_response[1]<=True;
209 end
210 if(way_index==fromInteger(`DCACHE_WAYS-1))begin
211 new_set=set_index+1;
212 if(set_index==fromInteger(`DCACHE_SETS-1))begin
213 rg_state[0]<=Idle;
214 rg_global_dirty[0]<=False;
215 wr_response_to_cpu<= tagged Valid (tuple3(0,tagged None,0));
216 rg_trnslte_done[1]<=False;
217 set_index<=0;
218 end
219 else
220 set_index<=new_set;
221 end
222 way_index<=way_index+1;
223 end
224 tag[way_index+1].read_request(new_set);
225 tag[way_index].write_request(old_set,0);
226 data[way_index+1].read_request(new_set);
227
228 endrule
229
230 (*conflict_free="virtual_address,read_data_fromcache"*)
231 rule read_data_fromcache(rg_state[0]==ReadingCache && memoperation.notFull);
232 /*========== Check for hit or miss =================== */
233 Bit#(TLog#(`DCACHE_WAYS)) linenum=0;
234 Bit#(`PERFMONITORS) perf_monitor=rg_perf_monitor;
235 Bit#(TMul#(TMul#(`DCACHE_BLOCK_SIZE,`DCACHE_WORD_SIZE),8)) dataline=0;
236 Bool hit=False;
237 Bool lbhit=False;
238 Bit#(`DCACHE_WAYS) valid_values=0; // hold the valid and dirty bits
239 Bit#(`DCACHE_WAYS) dirty_values=0; // hold the valid and dirty bits
240 Bit#(TLog#(`DCACHE_BLOCK_SIZE)) word_offset=rg_vaddress[word_bits+byte_bits-1:byte_bits];
241 Bit#(TLog#(`DCACHE_WORD_SIZE)) byte_offset=rg_vaddress[byte_bits-1:0];
242 Bit#(TLog#(`DCACHE_SETS)) setindex=rg_vaddress[set_bits+word_bits+byte_bits-1:word_bits+byte_bits];
243 Bit#(TLog#(`DCACHE_WAYS)) replaceblock=0;
244 `ifdef MMU
245 Bit#(20) cpu_tag=rg_paddress[`PADDR-1:`PADDR-20];
246 `else
247 Bit#(20) cpu_tag=rg_vaddress[`PADDR-1:`PADDR-20];
248 `endif
249 `ifdef MMU
250 if(increment_counters)begin
251 if(rg_load_store==Load)
252 perf_monitor[`TOTAL_LOADS]=1;
253 else if(rg_load_store==Store)
254 perf_monitor[`TOTAL_STORES]=1;
255 else
256 perf_monitor[`TOTAL_ATOMIC]=1;
257 end
258 if(rg_trnslte_done[0] &&& rg_tlb_exception matches tagged None) begin
259 if(!is_IO_Addr(rg_paddress))begin
260 `else
261 if(!is_IO_Addr(truncate(rg_vaddress)))begin
262 `endif
263 if(increment_counters)begin
264 if(rg_load_store==Load)
265 perf_monitor[`DCACHE_CACHEABLE_LOAD]=1;
266 else if(rg_load_store==Store)
267 perf_monitor[`DCACHE_CACHEABLE_STORE]=1;
268 else if(rg_load_store==Atomic)
269 perf_monitor[`DCACHE_CACHEABLE_ATOMIC]=1;
270 end
271 else
272 increment_counters<=True;
273
274 valid_values={tag[3].read_response[20],tag[2].read_response[20],tag[1].read_response[20],tag[0].read_response[20]};
275 dirty_values={tag[3].read_response[21],tag[2].read_response[21],tag[1].read_response[21],tag[0].read_response[21]};
276
277 for(Integer i=0;i<`DCACHE_WAYS;i=i+1)begin
278 let stored_tag=tag[i].read_response[19:0];
279 let stored_valid=tag[i].read_response[20];
280 if(valid_values[i]==0)
281 replaceblock=fromInteger(i);
282 if(stored_valid==1 && stored_tag==cpu_tag)begin // if a tag matches capture the tag and data
283 hit=True;
284 linenum=fromInteger(i);
285 dataline=data[i].read_response;
286 end
287 end
288 let linebuffer=lbdata.response_portA;
289 let {lbtag,lbset,lbreplaceblock,lbwriteenable}=memoperation.first;
290 if(memoperation.notEmpty && lbset==setindex && lbtag==cpu_tag)begin
291 dataline=linebuffer;
292 lbhit=True;
293 end
294 `ifdef verbose $display($time,"DCACHE: DATALINE: %h",dataline); `endif
295 Bit#(`Reg_width) data_value=(dataline>>{6'd0,word_offset}*64)[`Reg_width-1:0];
296 data_value=data_value>>({4'b0,byte_offset}*8);
297 if(!rg_signextend)
298 data_value=rg_transfer_size==0?zeroExtend(data_value[7:0]):rg_transfer_size==1?zeroExtend(data_value[15:0]):rg_transfer_size==2?zeroExtend(data_value[31:0]):data_value;
299 else
300 data_value=rg_transfer_size==0?signExtend(data_value[7:0]):rg_transfer_size==1?signExtend(data_value[15:0]):rg_transfer_size==2?signExtend(data_value[31:0]):data_value;
301
302 /*====================================================== */
303 /*=========== Respond to Core ============================ */
304 if((rg_transfer_size=='b01 && rg_vaddress[0]!='b0) || (rg_transfer_size=='b10 && rg_vaddress[1:0]!=0) || (rg_transfer_size=='b11 && rg_vaddress[2:0]!=0))begin // miss-aligned error.
305 perf_monitor[`DCACHE_MISALIGNED]=1; // cache mis-aligned error.
306 if(rg_load_store==Load)
307 wr_response_to_cpu<= tagged Valid (tuple3(0,tagged Exception Load_addr_misaligned,perf_monitor));
308 else
309 wr_response_to_cpu<=tagged Valid (tuple3(0,tagged Exception Store_addr_misaligned,perf_monitor));
310 rg_state[0]<=Idle;
311 rg_perf_monitor<=0;
312 `ifdef MMU rg_trnslte_done[0] <= False; `endif
313 end
314 else if(hit||lbhit)begin // if there has been a hit.
315 let {success,storeResult,newdata} <- atomic_operation(data_value,rg_writedata,rg_atomic_op,rg_paddress);
316 if(rg_load_store==Load)
317 storeResult=False;
318 if(success matches tagged Valid .sc)
319 data_value = zeroExtend(sc);
320 if(lbhit && (line_bytes_written & rg_writeenable) != rg_writeenable)begin
321 rg_state[0]<=KeepPolling;
322 rg_perf_monitor<=perf_monitor;
323 `ifdef verbose $display($time,"\tDCACHE: Going to poll LB: %h we: %h",line_bytes_written,rg_writeenable); `endif
324 end
325 else begin
326 if(rg_load_store==Store)
327 data_value=0;
328 `ifdef verbose $display($time,"\tDCACHE: Hit for ",fshow(rg_load_store)," address : %h data: %h line: %d rg_writedata: %h rg_writeenable: %h lbhit: %b atomic_data %h storeResult %b",rg_vaddress,data_value,linenum,rg_writedata,rg_writeenable, lbhit, newdata, storeResult); `endif
329 wr_response_to_cpu<=tagged Valid (tuple3(data_value,tagged None,perf_monitor));
330 rg_trnslte_done[0] <= False;
331 rg_perf_monitor<=0;
332 rg_state[0]<=Idle;
333 if(rg_load_store==Store || storeResult)begin //Atomic but not LR
334 `ifdef verbose $display("Store or atomic kuch toh ho raha hai"); `endif
335 wr_write_info<=tagged Valid tuple2(cpu_tag,setindex);
336 if(lbhit)begin
337 if(rg_load_store==Store)
338 lbdata.write_portA(rg_writeenable,duplicate(rg_writedata));
339 else
340 lbdata.write_portA(rg_writeenable,duplicate(newdata));
341 `ifdef verbose if(line_bytes_written!='1)
342 $display("WRITING ON BOTH PORTS OF LB"); `endif
343 lb_dirty<=1;
344 end
345 else begin
346 tag[linenum].write_request(rg_vaddress[set_bits+word_bits+byte_bits-1:word_bits+byte_bits],{2'b11,tag[linenum].read_response[19:0]});
347 if(rg_load_store==Store)
348 data[linenum].write_request(rg_writeenable,rg_vaddress[set_bits+word_bits+byte_bits-1:word_bits+byte_bits],duplicate(rg_writedata));
349 else
350 data[linenum].write_request(rg_writeenable,rg_vaddress[set_bits+word_bits+byte_bits-1:word_bits+byte_bits],duplicate(newdata));
351 end
352 rg_global_dirty[0]<=True;
353 end
354 end
355 end
356 /*====================================================== */
357 /*==== Request to memory =============================== */
358 else begin // miss
359 rg_state[0]<=KeepPolling;
360 if(rg_load_store==Load)
361 perf_monitor[`DCACHE_LOAD_MISS]=1;
362 else if(rg_load_store==Store)
363 perf_monitor[`DCACHE_STORE_MISS]=1;
364 else if(rg_load_store==Atomic)
365 perf_monitor[`DCACHE_ATOMIC_MISS]=1;
366
367 if(valid_values=='1)begin // if all the lines are valid and no match then replace line
368 perf_monitor[`DCACHE_LINEREPLACE]=1; // cache line replacement increment.
369 if(dirty_values[0]==0)
370 replaceblock=0;
371 else if(dirty_values[1]==0)
372 replaceblock=1;
373 else if(dirty_values[2]==0)
374 replaceblock=2;
375 else if(dirty_values[3]==0)
376 replaceblock=3;
377 else begin
378 replaceblock=truncate(random_line.value);
379 random_line.next;
380 end
381 `ifdef verbose $display($time,"\tDCACHE: Miss of ",fshow(rg_load_store)," address: %h Replacing line: %d valid: %b dirty_values: %b",rg_vaddress,replaceblock,valid_values,dirty_values); `endif
382 end
383 else begin
384 `ifdef verbose $display($time,"\tDCACHE: Miss of ",fshow(rg_load_store)," address: %h Filling line: %d",rg_vaddress,replaceblock); `endif
385 end
386 if(memoperation.notEmpty && lbset==setindex && replaceblock==lbreplaceblock)begin
387 replaceblock=replaceblock+1;
388 end
389
390 `ifdef MMU
391 ff_read_request_to_memory.enq(To_Memory {address:rg_paddress&'hfffffff8,burst_length:fromInteger(`DCACHE_BLOCK_SIZE),ld_st:Load, transfer_size:3});
392 `else
393 ff_read_request_to_memory.enq(To_Memory {address:truncate(rg_vaddress&'hfffffff8),burst_length:fromInteger(`DCACHE_BLOCK_SIZE),ld_st:Load, transfer_size:3});
394 `endif
395 Bit#(TLog#(`DCACHE_BLOCK_SIZE)) val1=(rg_vaddress&'hfffffff8)[word_bits+byte_bits-1:byte_bits];
396 Bit#(TMul#(`DCACHE_WORD_SIZE,`DCACHE_BLOCK_SIZE)) writeenable='hFF;
397 writeenable=writeenable<<{3'b0,val1}*8;
398 if(dirty_values[replaceblock]==1)begin // if the replacing is dirty
399 perf_monitor[`DCACHE_WRITEBACKS]=1;
400 Bit#(TAdd#(TLog#(`DCACHE_WORD_SIZE),TLog#(`DCACHE_BLOCK_SIZE))) offset_zeros='d0;
401 Bit#(`PADDR) write_address={tag[replaceblock].read_response[20-1:0],setindex[6:0],offset_zeros};
402 `ifdef verbose $display($time,"\tDCACHE: Line being replaced is dirty. Addr: %h Data: %h",write_address,data[replaceblock].read_response); `endif
403 ff_write_request_to_memory.enq(To_Memory_Write {address:write_address,burst_length:fromInteger(`DCACHE_BLOCK_SIZE),ld_st:Load, transfer_size:3,
404 data_line:data[replaceblock].read_response });
405 pending_fence_write_response[0]<=True;
406 end
407 memoperation.enq(tuple4(cpu_tag,rg_vaddress[set_bits+word_bits+byte_bits-1:word_bits+byte_bits],replaceblock,writeenable));
408 `ifdef verbose $display($time,"\tDCACHE: mask: %h byteoffset: %h",writeenable,val1); `endif
409 rg_perf_monitor<=perf_monitor;
410 end
411 end
412 else begin
413 if(rg_load_store==Load || rg_load_store==Atomic)begin
414 `ifdef MMU
415 ff_read_request_to_memory.enq(To_Memory {address:rg_paddress,burst_length:1,ld_st:Load,transfer_size:rg_transfer_size});
416 `else
417 ff_read_request_to_memory.enq(To_Memory {address:truncate(rg_vaddress),burst_length:1,ld_st:Load,transfer_size:rg_transfer_size});
418 `endif
419 rg_state[0]<=IOReadResp;
420 end
421 else if(rg_load_store==Store)begin
422 `ifdef verbose $display($time,"\tDCACHE: Sending IO Write REQUEST"); `endif
423 `ifdef MMU
424 ff_write_request_to_memory.enq(To_Memory_Write{address:rg_paddress,data_line:zeroExtend(rg_writedata),burst_length:1,transfer_size:rg_transfer_size,ld_st:Store});
425 `else
426 ff_write_request_to_memory.enq(To_Memory_Write{address:truncate(rg_vaddress),data_line:zeroExtend(rg_writedata),burst_length:1,transfer_size:rg_transfer_size,ld_st:Store});
427 `endif
428 rg_state[0]<=IOWriteResp;
429 end
430 end
431 `ifdef MMU
432 end
433 else if(rg_trnslte_done[0])begin
434 rg_state[0]<=Idle;
435 wr_response_to_cpu<= tagged Valid (tuple3(0,rg_tlb_exception,perf_monitor));
436 rg_tlb_exception<=tagged None;
437 rg_perf_monitor<=0;
438 rg_trnslte_done[0]<=False;
439 $display($time,"\tDCACHE: Exception from TLB taken");
440 end
441 else begin
442 $display($time,"\tDCACHE: Translation not done");
443 rg_state[0] <= Idle;
444 end
445 `endif
446 endrule
447 rule wait_for_ioread_response(rg_state[0]==IOReadResp && memoperation.notFull);
448 `ifdef verbose $display($time,"\tDCACHE: Received IO Read Response"); `endif
449 Bit#(TLog#(`DCACHE_WORD_SIZE)) byte_offset=rg_vaddress[byte_bits-1:0];
450 Bit#(`Reg_width) data_value=ff_read_response_from_memory.first.data_line;
451 ff_read_response_from_memory.deq;
452 data_value=data_value>>({4'b0,byte_offset}*8);
453 if(!rg_signextend)
454 data_value=rg_transfer_size==0?zeroExtend(data_value[7:0]):rg_transfer_size==1?zeroExtend(data_value[15:0]):rg_transfer_size==2?zeroExtend(data_value[31:0]):data_value;
455 else
456 data_value=rg_transfer_size==0?signExtend(data_value[7:0]):rg_transfer_size==1?signExtend(data_value[15:0]):rg_transfer_size==2?signExtend(data_value[31:0]):data_value;
457 wr_response_to_cpu<=tagged Valid (tuple3(data_value,ff_read_response_from_memory.first.bus_error==1?tagged Exception Load_access_fault:tagged None,rg_perf_monitor));
458 if(rg_load_store==Atomic)begin
459 let {success,storeResult,newdata} <- atomic_operation(data_value,rg_writedata,rg_atomic_op,rg_paddress);
460 `ifdef MMU
461 ff_write_request_to_memory.enq(To_Memory_Write{address:rg_paddress,data_line:zeroExtend(newdata),burst_length:1,transfer_size:rg_transfer_size,ld_st:Store});
462 `else
463 ff_write_request_to_memory.enq(To_Memory_Write{address:truncate(rg_vaddress),data_line:zeroExtend(new_data),burst_length:1,transfer_size:rg_transfer_size,ld_st:Store});
464 `endif
465 rg_state[0]<=IOWriteResp;
466 end
467 else begin
468 rg_state[0]<=Idle;
469 end
470 rg_perf_monitor<=0;
471 endrule
472 rule wait_for_iowrite_response(rg_state[0]==IOWriteResp && !memoperation.notEmpty && !pending_fence_write_response[1]);
473 `ifdef verbose $display($time,"\tDCACHE: Received IO Write Response"); `endif
474 ff_write_response_from_memory.deq;
475 if(rg_load_store!=Atomic)
476 wr_response_to_cpu<=tagged Valid (tuple3(0,ff_write_response_from_memory.first.bus_error==1?tagged Exception Store_access_fault:tagged None,rg_perf_monitor));
477 rg_perf_monitor<=0;
478 rg_state[0]<=Idle;
479 endrule
480 /*============== One cycle delay to ensure the write is reflected in the BRAM ========= */
481 rule stall_the_next_request_by_one_cycle(rg_state[0]==Stall1);
482 Bit#(TLog#(`DCACHE_SETS)) setindex=rg_vaddress[set_bits+word_bits+byte_bits-1:word_bits+byte_bits];
483 for(Integer i=0;i<`DCACHE_WAYS;i=i+1)begin // send address to the Block_rams
484 tag[i].read_request(setindex);
485 data[i].read_request(setindex);
486 end
487 rg_state[0]<=ReadingCache;
488 endrule
489 /*===================================================================================== */
490 /*======= filling up the cache from the data recieved from the external memory ======= */
491 (*conflict_free="virtual_address,fillcache"*)
492 rule fillcache(memoperation.notEmpty && line_bytes_written!='1);
493 let memresp=ff_read_response_from_memory.first;
494 ff_read_response_from_memory.deq;
495 let {cpu_tag,setindex,replaceblock,writeenable}=memoperation.first;
496 `ifdef verbose $display($time,"\tDCACHE: Response from Memory: %h setindex: %d cpu_tag: %h replaceblock: %d",memresp.data_line,setindex,cpu_tag,replaceblock); `endif
497 let we=writeenable;
498 if(|line_bytes_written!=0)begin
499 we=rg_we;
500 end
501 Bit#(TMul#(2,TMul#(`DCACHE_WORD_SIZE,`DCACHE_BLOCK_SIZE))) extended_mask=zeroExtend(we)<<8;
502 lbdata.write_portB(we,duplicate(memresp.data_line));
503 `ifdef verbose $display($time,"\tDCACHE: linebytes: %h currently writing into: %h",line_bytes_written,we); `endif
504 if(memresp.last_word)begin // if all the data words have been fetched exit
505 `ifdef verbose $display($time,"\tDCACHE: Received Last response from Memory set: %d ",setindex); `endif
506 end
507 line_bytes_written<=line_bytes_written|we;
508 rg_we<=extended_mask[2*`DCACHE_BLOCK_SIZE*`DCACHE_WORD_SIZE-1:`DCACHE_BLOCK_SIZE*`DCACHE_WORD_SIZE]|extended_mask[`DCACHE_BLOCK_SIZE*`DCACHE_WORD_SIZE-1:0];
509 endrule
510 rule read_from_lbdata_into_hold_reg(line_bytes_written=='1);
511 let lb_hold_reg=lbdata.response_portB;
512 let {cputag,setindex,replaceblock,writeenable}=memoperation.first;
513 data[replaceblock].write_request('1,setindex,lb_hold_reg);
514 tag[replaceblock].write_request(setindex,{lb_dirty,1'b1,cputag});
515 line_bytes_written<=0;
516 lb_dirty<=0;
517 memoperation.deq;
518 `ifdef verbose $display($time,"\tDCACHE: capturing lbdata cpu_tag: %h setindex: %d addr: %h linenum: %d data: %h",cputag, setindex,{cputag,setindex,6'd0}, replaceblock,lb_hold_reg); `endif
519 if(rg_state[1]==ReadingCache)
520 rg_state[1]<=Stall1;
521 endrule
522 /*===================================================================================== */
523 /*===================================================================================== */
524 rule keep_polling_on_stall(rg_state[0]==KeepPolling);
525 Bit#(`PERFMONITORS) perf_monitor=rg_perf_monitor;
526 if(capture_counters)begin
527 $display($time,"\tDCACHE: Miss during polling for ",fshow(rg_load_store));
528 if(rg_load_store==Load)begin
529 perf_monitor[`DCACHE_LOAD_MISS]=1;
530 perf_monitor[`DCACHE_CACHEABLE_LOAD]=1;
531 end
532 else if(rg_load_store==Store)begin
533 perf_monitor[`DCACHE_STORE_MISS]=1;
534 perf_monitor[`DCACHE_CACHEABLE_STORE]=1;
535 end
536 else if(rg_load_store==Atomic) begin
537 perf_monitor[`DCACHE_ATOMIC_MISS]=1;
538 perf_monitor[`DCACHE_CACHEABLE_ATOMIC]=1;
539 end
540 rg_perf_monitor<=perf_monitor;
541 end
542
543 Bit#(TLog#(`DCACHE_SETS)) setindex=rg_vaddress[set_bits+word_bits+byte_bits-1:word_bits+byte_bits];
544 `ifdef MMU
545 Bit#(20) cpu_tag=rg_paddress[`PADDR-1:`PADDR-20];
546 `else
547 Bit#(20) cpu_tag=rg_vaddress[`PADDR-1:`PADDR-20];
548 `endif
549 let {lbtag,lbset,lbreplaceblock,lbwriteenable}=memoperation.first;
550 if((line_bytes_written & rg_writeenable) == rg_writeenable && (lbset==setindex && lbtag==cpu_tag))begin
551 `ifdef verbose $display($time,"\tDCACHE: Accessing LB"); `endif
552 rg_state[0]<=ReadingCache;
553 increment_counters<=False;
554 for(Integer i=0;i<`DCACHE_WAYS;i=i+1)begin // send address to the Block_rams
555 tag[i].read_request(setindex);
556 data[i].read_request(setindex);
557 end
558 end
559 `ifdef verbose $display($time,"\tDCACHE: Polling on LB. cpu_tag: %h lbtag: %h required: %h bytes in Buffer: %h",cpu_tag,lbtag,rg_writeenable,line_bytes_written); `endif
560 endrule
561
562 /*============= Prediction in burst mode ================================ */
563 method Action virtual_address(Bit#(`VADDR) vaddress, Access_type load_store, Bit#(TMul#(`DCACHE_WORD_SIZE,8)) writedata, Bit#(3) transfer_size, Bit#(5) atomic_op, Bool signextend)if(rg_state[1]==Idle);
564 Bit#(`PERFMONITORS) perf_monitor=0;
565 Bit#(TLog#(`DCACHE_SETS)) setindex=vaddress[set_bits+word_bits+byte_bits-1:word_bits+byte_bits];
566 `ifdef verbose $display($time,"\tDCACHE: ",fshow(load_store)," Request of VAddr: %h transfersize: %d signextend: %b setindex: %d",vaddress,transfer_size, signextend,setindex); `endif
567 Bit#(TMul#(`DCACHE_WORD_SIZE,`DCACHE_BLOCK_SIZE)) we=transfer_size==0?'b1:transfer_size==1?'b11:transfer_size==2?'hF:'hFF;
568 Bit#(TLog#(`DCACHE_BLOCK_SIZE)) word_offset= vaddress[word_bits+byte_bits-1:byte_bits];
569 Bit#(TLog#(`DCACHE_WORD_SIZE)) byte_offset=vaddress[byte_bits-1:0];
570 we=we<<{4'b0,word_offset}*8;
571 we=we<<byte_offset;
572 rg_load_store<=load_store;
573 Bool proceed=True;
574 if(wr_write_info matches tagged Valid .x)begin
575 let {newtag,newindex}=x;
576 if(newindex==setindex && load_store!=Store)
577 proceed=False;
578 end
579 if(load_store==Fence)begin
580 if(!rg_global_dirty[1])begin
581 rg_state[1]<=Initialize;
582 end
583 else begin
584 tag[0].read_request(0);
585 data[0].read_request(0);
586 rg_state[1]<=Fence;
587 end
588 end
589 else begin
590 rg_vaddress<=vaddress;
591 rg_transfer_size<=transfer_size;
592 rg_atomic_op<=atomic_op;
593 rg_writedata<=transfer_size==0?duplicate(writedata[7:0]):transfer_size==1?duplicate(writedata[15:0]):transfer_size==2?duplicate(writedata[31:0]):writedata;
594 rg_writeenable<=we;
595 rg_signextend<=signextend;
596 if(proceed)begin
597 for(Integer i=0;i<`DCACHE_WAYS;i=i+1)begin // send address to the Block_rams
598 tag[i].read_request(setindex);
599 data[i].read_request(setindex);
600 end
601 rg_state[1]<=ReadingCache;
602 end
603 else begin
604 capture_counters<=True;
605 rg_state[1]<=Stall1;
606 end
607 end
608 endmethod
609 method Maybe#(Tuple3#(Bit#(`Reg_width), Trap_type,Bit#(`PERFMONITORS))) response_to_core;
610 return wr_response_to_cpu;
611 endmethod
612 `ifdef MMU
613 method Action physical_address(Bit#(`PADDR) paddr, Trap_type exception);
614 `ifdef verbose $display($time,"\tDCACHE: Sending physical address %h to icache ",paddr); `endif
615 rg_paddress<=paddr;
616 rg_trnslte_done[1] <= True;
617 rg_tlb_exception<=exception;
618 endmethod
619 `endif
620 method ActionValue#(To_Memory#(`PADDR)) read_request_to_memory;
621 ff_read_request_to_memory.deq;
622 return ff_read_request_to_memory.first;
623 endmethod
624 method ActionValue#(To_Memory_Write) write_request_to_memory;
625 ff_write_request_to_memory.deq;
626 return ff_write_request_to_memory.first;
627 endmethod
628 method Bool init_complete;
629 return (rg_state[1]!=Fence);
630 endmethod
631 method Action read_response_from_memory(From_Memory#(`DCACHE_WORD_SIZE) resp);
632 `ifdef verbose $display($time,"\tDCACHE: Memory has responded"); `endif
633 ff_read_response_from_memory.enq(resp);
634 endmethod
635 method Action write_response_from_memory(From_Memory#(`DCACHE_WORD_SIZE) resp);
636 ff_write_response_from_memory.enq(resp);
637 endmethod
638
639 endmodule
640
641 module mkTb(Empty);
642 Ifc_dcache dcache<-mkdcache;
643 rule send_request;
644 dcache.virtual_address('d4,Load,'h01234567ABCDEF89,'d2,'d0,False);
645 endrule
646 rule terminate;
647 let x<-$stime;
648 if(x>10)
649 $finish(0);
650 endrule
651 endmodule
652 endpackage