2 Copyright (c) 2013, IIT Madras
5 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
7 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
8 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
9 * Neither the name of IIT Madras nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
11 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
12 ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
15 /*===== Pacakge imports ===== */
19 import SpecialFIFOs::*;
25 import mem_config1::*;
27 /*===== project imports==== */
28 import defined_types::*;
29 `include "core_parameters.bsv"
32 /*========================= */
34 method Action virtual_address(Bit#(`VADDR) vaddress, Access_type load_store, Bit#(TMul#(`DCACHE_WORD_SIZE,8)) writedata, Bit#(3) transfer_size, `ifdef atomic Bit#(5) atomic_op, `endif Bool signextend, Bit#(1) insnepoch);
35 method Maybe#(Tuple4#(Bit#(`Reg_width), Trap_type,Bit#(`PERFMONITORS),Bit#(1))) response_to_core;
36 method ActionValue#(To_Memory#(`PADDR)) read_request_to_memory;
37 method ActionValue#(To_Memory_Write) write_request_to_memory;
38 method Action read_response_from_memory(From_Memory#(`DCACHE_WORD_SIZE) resp);
39 method Action write_response_from_memory(From_Memory#(`DCACHE_WORD_SIZE) resp);
40 method Bool init_complete;
41 method Action flush_from_wb;
43 method Action physical_address(Bit#(`PADDR) paddr, Trap_type exception);
47 typedef enum {Idle,Dummy,KeepPolling,Stall1,ReadingCache,Initialize,Fence,FenceStart,IOReadResp,IOWriteResp} DcacheState deriving (Bits,Eq,FShow);
50 (*conflict_free="virtual_address,pre_fence_updating"*)
51 (*conflict_free="virtual_address,handle_fence"*)
52 (*conflict_free="keep_polling_on_stall,handle_fence"*)
53 (*conflict_free="keep_polling_on_stall,pre_fence_updating"*)
54 (*conflict_free="keep_polling_on_stall,wait_for_ioread_response"*)
55 (*conflict_free="keep_polling_on_stall,wait_for_iowrite_response"*)
56 (*conflict_free="dummy_cycle,read_from_lbdata_into_hold_reg"*)
57 (*preempts="virtual_address,read_from_lbdata_into_hold_reg"*)
58 // (*preempts="keep_polling_on_stall,read_from_lbdata_into_hold_reg"*)
59 (*preempts="stall_the_next_request_by_one_cycle,read_from_lbdata_into_hold_reg"*)
60 (*preempts="read_from_lbdata_into_hold_reg,keep_polling_on_stall"*)
61 module mkdcache(Ifc_dcache);
62 /* VAddr = [tag_bits|set_bits|word_bits|byte_bits] */
63 let byte_bits=valueOf(TLog#(`DCACHE_WORD_SIZE)); // number of bits to select a byte within a word. = 2
64 let word_bits=valueOf(TLog#(`DCACHE_BLOCK_SIZE)); // number of bits to select a word within a block. = 4
65 let set_bits=valueOf(TLog#(`DCACHE_SETS)); // number of bits to select a set from the cache. =
66 Reg#(Maybe#(Tuple2#(Bit#(1),Bit#(`PADDR)))) rg_lr_paddress<-mkReg(tagged Invalid);
68 function ActionValue#(Tuple3#(Maybe#(Bit#(1)),Bool, Bit#(TMul#(`DCACHE_WORD_SIZE,8)))) atomic_operation(Bit#(TMul#(`DCACHE_WORD_SIZE,8)) loaded_value, Bit#(TMul#(`DCACHE_WORD_SIZE,8)) rs2, Bit#(5) atomic_op, Bit#(`PADDR) addr);
71 Bit#(TMul#(`DCACHE_WORD_SIZE,8)) atomic_result=rs2;
72 Bit#(TMul#(`DCACHE_WORD_SIZE,8)) op1;
73 Maybe#(Bit#(1)) sc_done=tagged Invalid;
75 op1=signExtend(loaded_value[31:0]);
78 Bit#(TMul#(`DCACHE_WORD_SIZE,8)) op2=(atomic_op[4]==1)?signExtend(rs2[31:0]):rs2;
79 Int#(TMul#(`DCACHE_WORD_SIZE,8)) s_op1=unpack(op1);
80 Int#(TMul#(`DCACHE_WORD_SIZE,8)) s_op2 = unpack(op2);
81 Bool store_result = True;
82 `ifdef verbose $display($time,"\tDCACHE: atomic instruction atomic op %b op1: %h op2: %h", atomic_op,op1,op2); `endif
84 'b0011:atomic_result=op2;
85 'b0000:atomic_result= (op1+op2);
86 'b0010:atomic_result= (op1^op2);
87 'b0110:atomic_result= (op1&op2);
88 'b0100:atomic_result= (op1|op2);
89 'b1100:atomic_result= min(op1,op2);
90 'b1110:atomic_result= max(op1,op2);
91 'b1000:atomic_result= pack(min(s_op1,s_op2));
92 'b1010:atomic_result= pack(max(s_op1,s_op2));
93 default: begin atomic_result= op1; end
97 rg_lr_paddress <= tagged Valid tuple2(atomic_op[4],addr);
98 atomic_result=loaded_value; // LR
102 rg_lr_paddress <= tagged Invalid;
103 atomic_result=rs2; // SC
104 sc_done = tagged Valid 1;
105 store_result = False;
106 `ifdef verbose $display($time,"\tDCACHE: store condition instruction"); `endif
107 if(rg_lr_paddress matches tagged Valid .lr) begin
109 if(x==atomic_op[4] && addr== y) begin
110 `ifdef verbose $display($time,"\tDCACHE: store condition satisfied"); `endif
111 sc_done = tagged Valid 0;
116 default: begin rg_lr_paddress<=tagged Invalid ;end
119 atomic_result=duplicate(atomic_result[31:0]);
121 return tuple3(sc_done,store_result,atomic_result);
125 function Bit#(TMul#(TMul#(8,`DCACHE_WORD_SIZE),`DCACHE_BLOCK_SIZE)) update_line (Bit#(TMul#(`DCACHE_BLOCK_SIZE,`DCACHE_WORD_SIZE)) we, Bit#(TMul#(TMul#(8,`DCACHE_WORD_SIZE),`DCACHE_BLOCK_SIZE)) data, Bit#(TMul#(TMul#(8,`DCACHE_WORD_SIZE),`DCACHE_BLOCK_SIZE)) data_reg);
126 Bit#(TMul#(TMul#(8,`DCACHE_WORD_SIZE),`DCACHE_BLOCK_SIZE)) mask=0;
127 for(Integer i=0;i<32;i=i+1)begin
128 Bit#(8) ex_we=duplicate(we[i]);
129 mask[(i*8)+7:i*8]=ex_we;
131 Bit#(TMul#(TMul#(8,`DCACHE_WORD_SIZE),`DCACHE_BLOCK_SIZE)) x = mask& data;
132 Bit#(TMul#(TMul#(8,`DCACHE_WORD_SIZE),`DCACHE_BLOCK_SIZE)) y = ~mask& data_reg;
137 Ifc_dcache_data data [`DCACHE_WAYS];
138 Ifc_dcache_tag tag [`DCACHE_WAYS];
139 for(Integer i=0;i<`DCACHE_WAYS;i=i+1)begin
140 tag[i] <- mkdcache_tag;
141 data[i] <-mkdcache_data;
144 /*====== Hit buffer data structur======*/
145 Reg#(Bool) hb_valid <-mkReg(False);
146 Reg#(Bit#(`DCACHE_WAYS)) hb_way <-mkReg(0);
147 Reg#(Bit#(`DCACHE_TAG_BITS)) hb_tag <-mkReg(0);
148 Reg#(Bit#(TLog#(`DCACHE_SETS))) hb_setindex <- mkReg(0);
149 Ifc_QuadMem hb_data <-mkQuadMem;
150 /*=====================================*/
152 /*-====== Line buffer data structure ====*/
153 Ifc_QuadMem lb_data <-mkQuadMem;
154 FIFOF#(Tuple4#(Bit#(20),Bit#(TLog#(`DCACHE_SETS)),Bit#(`DCACHE_WAYS),Bit#(TMul#(`DCACHE_WORD_SIZE,`DCACHE_BLOCK_SIZE)))) memoperation <-mkUGSizedFIFOF(2);
155 Reg#(Bit#(1)) lb_dirty <-mkReg(0);
156 Reg#(Bit#(TMul#(`DCACHE_WORD_SIZE,`DCACHE_BLOCK_SIZE))) line_bytes_written<-mkReg(0);
157 /*=====================================*/
159 /*======= Request Capture =========*/
160 Reg#(Bit#(`VADDR)) rg_vaddress <-mkReg(0);
161 Reg#(Bit#(`PADDR)) rg_paddress <-mkReg(0);
162 Reg#(Bit#(`PADDR)) rg_poll_address <-mkReg(0);
163 Reg#(Bit#(3)) rg_transfer_size <-mkReg(0);
164 `ifdef atomic Reg#(Bit#(5)) rg_atomic_op <-mkReg(0); `endif
165 Reg#(Access_type) rg_access_type <-mkReg(Load);
166 Reg#(Bit#(TMul#(`DCACHE_WORD_SIZE,`DCACHE_BLOCK_SIZE))) rg_writeenable<-mkReg(0);
167 Reg#(Bool) rg_signextend<-mkReg(False);
168 Reg#(Bool) misaligned_addr <-mkReg(False);
169 Reg#(Bit#(1)) rg_insn_epoch <-mkReg(0);
170 Reg#(Bit#(TMul#(`DCACHE_WORD_SIZE,8))) rg_writedata<-mkReg(0);
171 /*=================================*/
172 /* storage for physical translation */
173 Reg#(Bool) rg_trnslte_done[2] <- mkCReg(2,`ifdef MMU False `else True `endif );
174 Reg#(Trap_type) rg_tlb_exception[2]<-mkCReg(2,tagged None);
175 /*==================================== */
177 /*===== registers for fencing/initializing ====*/
178 Reg#(Bit#(TLog#(`DCACHE_SETS))) fence_set <-mkReg(0);
179 Reg#(Bit#(TLog#(`DCACHE_WAYS))) fence_way <-mkReg(0);
180 /*==============================================*/
182 /*========= FIFO for interfaces ================*/
183 FIFOF#(To_Memory#(`PADDR)) ff_read_request_to_memory <-mkLFIFOF();
184 FIFOF#(To_Memory_Write) ff_write_request_to_memory <-mkLFIFOF();
185 FIFOF#(From_Memory#(`DCACHE_WORD_SIZE)) ff_read_response_from_memory <-mkSizedBypassFIFOF(1);
186 FIFOF#(From_Memory#(`DCACHE_WORD_SIZE)) ff_write_response_from_memory <-mkSizedBypassFIFOF(1);
187 /*===============================================*/
189 /*===== State Registers========*/
190 Reg#(Bit#(1)) wbEpoch [3] <-mkCReg(3,0);
191 Reg#(DcacheState) rg_state[3] <-mkCReg(3,Initialize);
192 /*============================*/
194 /*============ globals =========*/
195 Reg#(Bool) rg_global_dirty <-mkReg(False);
196 Wire#(Maybe#(Tuple2#(Bit#(20),Bit#(TLog#(`DCACHE_SETS))))) wr_write_info<-mkDWire(tagged Invalid);
197 Wire#(Maybe#(Tuple4#(Bit#(`Reg_width), Trap_type, Bit#(`PERFMONITORS),Bit#(1)))) wr_response_to_cpu<-mkDWire(tagged Invalid);
198 Reg#(Bit#(`PERFMONITORS)) rg_perf_monitor<-mkReg(0);
199 LFSR#(Bit#(2)) random_line<-mkRCounter(3); // for random line replacement
200 Reg#(Bool) pending_write_response[3]<-mkCReg(3,False);
201 Reg#(Bool) capture_counters <-mkDReg(False);
202 Reg#(Bool) rg_initialize <-mkReg(True);
203 Reg#(Bit#(TMul#(`DCACHE_WORD_SIZE,`DCACHE_BLOCK_SIZE))) rg_we<-mkReg(0);
204 Reg#(Bool) rg_bus_error<-mkReg(False);
205 /*==============================*/
207 `ifdef verbose $display($time,"\tDCACHE: state ",fshow(rg_state[0])," wbEpoch: %b",wbEpoch[0]); `endif
210 rule dummy_cycle(rg_state[1]==Dummy);
214 rule deq_write_response_during_fence(pending_write_response[2]);
215 ff_write_response_from_memory.deq;
216 pending_write_response[2]<=False;
218 rule pre_fence_updating(rg_state[0]==FenceStart && !memoperation.notEmpty && !pending_write_response[2]);
219 if(wbEpoch[0]==rg_insn_epoch)begin
221 for(Integer i=0;i<`DCACHE_WAYS;i=i+1)begin
222 tag[i].write_request(unpack(hb_way[i]),hb_setindex,{2'b11,hb_tag});
223 data[i].write_request(duplicate(hb_way[i]),hb_setindex,hb_data.response_portA);
229 tag[0].read_request(0);
230 data[0].read_request(0);
236 wr_response_to_cpu<=tagged Valid (tuple4(0,tagged None, 0, rg_insn_epoch));
239 /*====== Invalidate all the entries in the cache on startup or during Fence ==== */
240 rule fencing_the_cache(rg_state[0]==Initialize && !memoperation.notEmpty && !pending_write_response[2]);
241 `ifdef verbose $display($time,"\tDCACHE: Initializing index: %d",fence_set," ",fshow(rg_access_type)); `endif
242 for(Integer i=0;i<`DCACHE_WAYS;i=i+1)begin
243 tag[i].write_request(True,truncate(fence_set),0);
245 if(fence_set==fromInteger(`DCACHE_SETS-1)) begin
249 random_line.seed('d3);
250 rg_global_dirty<=False;
251 rg_trnslte_done[0]<=False;
252 if(rg_access_type==Fence)
253 wr_response_to_cpu<= tagged Valid (tuple4(0,tagged None,0,rg_insn_epoch));
256 fence_set<=fence_set+1;
258 /*=============================================================================== */
259 rule handle_fence(rg_state[0]==Fence &&!memoperation.notEmpty);
260 Bit#(20) tag_values=tag[fence_way].read_response[20-1:0]; // hold the tag values
261 Bit#(1) dirty_value=tag[fence_way].read_response[20+1]; // holds the dirty bits
262 Bit#(1) valid_value=tag[fence_way].read_response[20]; // holds the dirty bits
263 Bit#(TMul#(8,TMul#(`DCACHE_WORD_SIZE,`DCACHE_BLOCK_SIZE))) data_values; // holds the cache lines.
264 Bit#(TAdd#(TLog#(`DCACHE_WORD_SIZE),TLog#(`DCACHE_BLOCK_SIZE))) p_offset =0;
265 data_values=data[fence_way].read_response;
267 Bit#(`PADDR) write_addr={tag_values,truncate(fence_set),p_offset};
268 `ifdef verbose $display($time,"\tDCACHE: Handling Fence.tag %h setindex: %d fence_way: %d Dirty: %b Valid: %b",tag_values,fence_set,fence_way,dirty_value,valid_value); `endif
269 `ifdef verbose $display($time,"\tDCACHE: Fence addr: %h line: %h ",write_addr,data_values); `endif
270 Bit#(TLog#(`DCACHE_SETS)) new_set=fence_set;
271 Bit#(TLog#(`DCACHE_SETS)) old_set=fence_set;
272 Bit#(TLog#(`DCACHE_WAYS)) next_way=fence_way;
273 if(!pending_write_response[1])begin
274 if(dirty_value==1 && valid_value==1)begin // valid and dirty
275 ff_write_request_to_memory.enq(To_Memory_Write { // send the request to memory to
276 address:write_addr, data_line:data_values,
277 burst_length:`DCACHE_BLOCK_SIZE, transfer_size:3, ld_st:Store});
278 pending_write_response[1]<=True;
280 if(fence_way==fromInteger(`DCACHE_WAYS-1))begin
282 if(fence_set==fromInteger(`DCACHE_SETS-1))begin
284 rg_global_dirty<=False;
285 wr_response_to_cpu<= tagged Valid (tuple4(0,tagged None,0,rg_insn_epoch));
286 rg_trnslte_done[0]<=False;
292 next_way=fence_way+1;
293 tag[fence_way].write_request(True,old_set,0);
295 `ifdef verbose $display($time,"\tDCACHE: FENCE: sending request to setindex: %d way: %d",new_set,next_way); `endif
296 tag[next_way].read_request(new_set);
297 data[next_way].read_request(new_set);
301 rule read_from_lbdata_into_hold_reg(line_bytes_written=='1 && memoperation.notEmpty);
302 let lb_hold_reg=lb_data.response_portB;
303 let {cputag,setindex,replaceblock,writeenable}=memoperation.first;
304 for(Integer i=0;i<`DCACHE_WAYS;i=i+1)begin
305 tag[i].write_request((unpack(replaceblock[i])&&True),setindex,{lb_dirty,1'b1,cputag});
306 data[i].write_request(duplicate(replaceblock[i]),setindex,lb_hold_reg);
308 line_bytes_written<=0;
312 `ifdef verbose $display($time,"\tDCACHE: capturing lbdata cpu_tag: %h setindex: %d addr: %h linenum: %b data: %h",cputag, setindex,{cputag,setindex,6'd0}, replaceblock,lb_hold_reg); `endif
313 if(rg_state[1]==KeepPolling)
317 rule fillcache(memoperation.notEmpty && line_bytes_written!='1); // need to check line_bytes_written to ensure the same response is being served.
318 let memresp=ff_read_response_from_memory.first;
319 ff_read_response_from_memory.deq;
320 rg_bus_error<=unpack(memresp.bus_error)||rg_bus_error;
321 let {cpu_tag,setindex,replaceblock,writeenable}=memoperation.first;
322 `ifdef verbose $display($time,"\tDCACHE: Response from Memory: %h setindex: %d cpu_tag: %h replaceblock: %b",memresp.data_line,setindex,cpu_tag,replaceblock); `endif
324 if(|line_bytes_written!=0)begin
327 Bit#(TMul#(2,TMul#(`DCACHE_WORD_SIZE,`DCACHE_BLOCK_SIZE))) extended_mask=zeroExtend(we)<<8;
328 lb_data.write_portB(we,duplicate(memresp.data_line));
329 `ifdef verbose $display($time,"\tDCACHE: linebytes: %h currently writing into: %h",line_bytes_written,we); `endif
330 if(memresp.last_word)begin // if all the data words have been fetched exit
331 `ifdef verbose $display($time,"\tDCACHE: Received Last response from Memory set: %d ",setindex); `endif
333 line_bytes_written<=line_bytes_written|we;
334 rg_we<=extended_mask[2*`DCACHE_BLOCK_SIZE*`DCACHE_WORD_SIZE-1:`DCACHE_BLOCK_SIZE*`DCACHE_WORD_SIZE]|extended_mask[`DCACHE_BLOCK_SIZE*`DCACHE_WORD_SIZE-1:0];
337 rule drop_incoming_request(rg_state[0]==ReadingCache && memoperation.notFull && wbEpoch[0]!=rg_insn_epoch);
338 if(rg_trnslte_done[0])
339 wr_response_to_cpu<=tagged Valid (tuple4(0,tagged None, 0, rg_insn_epoch));
340 `ifdef verbose $display($time,"\tDCACHE: Dropping incoming request wbEpoch: %b rg_insn_epoch: %b",wbEpoch[0],rg_insn_epoch); `endif
341 rg_trnslte_done[0]<=False;
344 /*============== One cycle delay to ensure the write is reflected in the BRAM ========= */
345 rule stall_the_next_request_by_one_cycle(rg_state[1]==Stall1);
346 Bit#(TLog#(`DCACHE_SETS)) setindex=rg_vaddress[set_bits+word_bits+byte_bits-1:word_bits+byte_bits];
347 for(Integer i=0;i<`DCACHE_WAYS;i=i+1)begin // send address to the Block_rams
348 tag[i].read_request(setindex);
349 data[i].read_request(setindex);
351 rg_state[1]<=ReadingCache;
353 /*===================================================================================== */
355 rule keep_polling_on_stall(rg_state[1]==KeepPolling);
356 Bit#(`PERFMONITORS) perf_monitor=rg_perf_monitor;
357 if(capture_counters)begin
358 `ifdef verbose $display($time,"\tDCACHE: Miss during polling for ",fshow(rg_access_type)); `endif
359 if(rg_access_type==Load)begin
360 perf_monitor[`DCACHE_LOAD_MISS]=1;
361 perf_monitor[`DCACHE_CACHEABLE_LOAD]=1;
363 else if(rg_access_type==Store)begin
364 perf_monitor[`DCACHE_STORE_MISS]=1;
365 perf_monitor[`DCACHE_CACHEABLE_STORE]=1;
367 else if(rg_access_type==Atomic) begin
368 perf_monitor[`DCACHE_ATOMIC_MISS]=1;
369 perf_monitor[`DCACHE_CACHEABLE_ATOMIC]=1;
371 rg_perf_monitor<=perf_monitor;
373 Bit#(TLog#(`DCACHE_SETS)) setindex=rg_vaddress[set_bits+word_bits+byte_bits-1:word_bits+byte_bits];
374 Bit#(20) cpu_tag=rg_poll_address[`PADDR-1:`PADDR-20];
375 let {lbtag,lbset,lbreplaceblock,lbwriteenable}=memoperation.first;
376 if((line_bytes_written & rg_writeenable) == rg_writeenable && (lbset==setindex && lbtag==cpu_tag))begin
377 `ifdef verbose $display($time,"\tDCACHE: Accessing LB"); `endif
378 rg_state[1]<=ReadingCache;
379 for(Integer i=0;i<`DCACHE_WAYS;i=i+1)begin // send address to the Block_rams
380 tag[i].read_request(setindex);
381 data[i].read_request(setindex);
384 `ifdef verbose $display($time,"\tDCACHE: Polling on LB. cpu_tag: %h lbtag: %h required: %h bytes in Buffer: %h",cpu_tag,lbtag,rg_writeenable,line_bytes_written); `endif
388 rule read_from_memory_structures(rg_state[0]==ReadingCache && memoperation.notFull && wbEpoch[0]==rg_insn_epoch);
389 Bool cache_enabled = !is_IO_Addr(rg_paddress);
390 Trap_type exception = misaligned_addr?((rg_access_type==Load)?
391 tagged Exception Load_addr_misaligned:tagged Exception Store_addr_misaligned): rg_bus_error?
392 (rg_access_type==Load?tagged Exception Load_access_fault:tagged Exception Store_access_fault):rg_tlb_exception[0];
393 /*====== Get the states of the request ======*/
394 Bit#(20) cpu_tag=rg_paddress[`PADDR-1:`PADDR-20];
395 Bit#(TLog#(`DCACHE_BLOCK_SIZE)) word_offset=rg_vaddress[word_bits+byte_bits-1:byte_bits];
396 Bit#(TLog#(`DCACHE_WORD_SIZE)) byte_offset=rg_vaddress[byte_bits-1:0];
397 Bit#(TLog#(`DCACHE_SETS)) setindex=rg_vaddress[set_bits+word_bits+byte_bits-1:word_bits+byte_bits];
399 Bit#(TMul#(TMul#(`DCACHE_BLOCK_SIZE,`DCACHE_WORD_SIZE),8)) hbdataline=0;
400 Bit#(TMul#(TMul#(`DCACHE_BLOCK_SIZE,`DCACHE_WORD_SIZE),8)) lbdataline=0;
402 /*========== Check hit on Hit buffer =======*/
404 if(hb_valid && (hb_setindex==setindex) && (hb_tag==cpu_tag) && !misaligned_addr)begin
406 hbdataline=hb_data.response_portA;
408 /*==========================================*/
410 /*========= Check Line buffer ==============*/
411 Bool stall_on_lb=((line_bytes_written & rg_writeenable) != rg_writeenable) && memoperation.notEmpty;
412 Bool lb_valid=memoperation.notEmpty;
413 let {lb_tag,lb_setindex,lb_way,lb_we}=memoperation.first;
415 if(lb_valid && (lb_setindex==setindex) && (lb_tag==cpu_tag) && !misaligned_addr)begin
417 lbdataline=lb_data.response_portA;
419 /*===========================================*/
421 /*======= Check SRAMS ==============*/
422 Bit#(`DCACHE_WAYS) tag_hit=0;
424 Bit#(TMul#(TMul#(`DCACHE_BLOCK_SIZE,`DCACHE_WORD_SIZE),8)) dataline0=data[0].read_response;
425 Bit#(TMul#(TMul#(`DCACHE_BLOCK_SIZE,`DCACHE_WORD_SIZE),8)) dataline1=data[1].read_response;
426 Bit#(TMul#(TMul#(`DCACHE_BLOCK_SIZE,`DCACHE_WORD_SIZE),8)) dataline2=data[2].read_response;
427 Bit#(TMul#(TMul#(`DCACHE_BLOCK_SIZE,`DCACHE_WORD_SIZE),8)) dataline3=data[3].read_response;
429 Bit#(`DCACHE_WAYS) valid_values={tag[3].read_response[20],tag[2].read_response[20],tag[1].read_response[20],tag[0].read_response[20]};
430 Bit#(`DCACHE_WAYS) dirty_values={tag[3].read_response[21],tag[2].read_response[21],tag[1].read_response[21],tag[0].read_response[21]};
432 if(cpu_tag==tag[0].read_response[19:0] && valid_values[0]==1) tag_hit[0]=1;
433 if(cpu_tag==tag[1].read_response[19:0] && valid_values[1]==1) tag_hit[1]=1;
434 if(cpu_tag==tag[2].read_response[19:0] && valid_values[2]==1) tag_hit[2]=1;
435 if(cpu_tag==tag[3].read_response[19:0] && valid_values[3]==1) tag_hit[3]=1;
438 hit=unpack(|(tag_hit)) && (!hb_hit) && (!lb_hit) && !misaligned_addr;
439 // We are not invalidating a replaced line when enquing into the linebuffer.
440 // So it is possible that the next request finds this to be a hit and proceeds to change the SRAM.
441 // While the linebuffer, having received all the bytes from the memory will simply
442 // go ahead and replace the dirty line without eviction. The following condition ensures
443 // that for the same index if the SRAM hit is to the same line as the LB treat is as a miss.
444 if(hit && tag_hit==lb_way && lb_valid && lb_setindex==setindex)
446 dynamicAssert(!(lb_hit&&hb_hit),"ASSERT: lb_hit and hb_hit are both 1");
448 Bit#(TMul#(TMul#(`DCACHE_BLOCK_SIZE,`DCACHE_WORD_SIZE),8)) temp0=duplicate(tag_hit[0]&pack(hit));
449 Bit#(TMul#(TMul#(`DCACHE_BLOCK_SIZE,`DCACHE_WORD_SIZE),8)) temp1=duplicate(tag_hit[1]&pack(hit));
450 Bit#(TMul#(TMul#(`DCACHE_BLOCK_SIZE,`DCACHE_WORD_SIZE),8)) temp2=duplicate(tag_hit[2]&pack(hit));
451 Bit#(TMul#(TMul#(`DCACHE_BLOCK_SIZE,`DCACHE_WORD_SIZE),8)) temp3=duplicate(tag_hit[3]&pack(hit));
452 Bit#(TMul#(TMul#(`DCACHE_BLOCK_SIZE,`DCACHE_WORD_SIZE),8)) hitline0=temp0&dataline0;
453 Bit#(TMul#(TMul#(`DCACHE_BLOCK_SIZE,`DCACHE_WORD_SIZE),8)) hitline1=temp1&dataline1;
454 Bit#(TMul#(TMul#(`DCACHE_BLOCK_SIZE,`DCACHE_WORD_SIZE),8)) hitline2=temp2&dataline2;
455 Bit#(TMul#(TMul#(`DCACHE_BLOCK_SIZE,`DCACHE_WORD_SIZE),8)) hitline3=temp3&dataline3;
456 Bit#(TMul#(TMul#(`DCACHE_BLOCK_SIZE,`DCACHE_WORD_SIZE),8)) sram_dataline=hitline0|hitline1|hitline2|hitline3;
457 `ifdef verbose $display($time,"\tDCACHE: valid_values: %b dirty_values: %b stall_on_lb: %b we: %h Access_type: ",valid_values,dirty_values,stall_on_lb,rg_writeenable,fshow(rg_access_type)); `endif
458 /*================================================*/
459 /*===== replacement line selection ==============*/
460 Bit#(`DCACHE_WAYS) replace_vec=valid_values;
461 if(&(valid_values)==1)
462 replace_vec=dirty_values;
463 case (replace_vec) matches
464 'b???0:replace_vec='b0001;
465 'b??01:replace_vec='b0010;
466 'b?011:replace_vec='b0100;
467 'b0111:replace_vec='b1000;
470 replace_vec[random_line.value]=1;
474 if(replace_vec==lb_way && lb_setindex==setindex && lb_valid)
475 replace_vec=rotateBitsBy(lb_way,1);
476 if(replace_vec==hb_way && hb_valid && hb_setindex==setindex)
477 replace_vec=rotateBitsBy(replace_vec,1);
478 `ifdef verbose $display($time,"\tDCACHE: replacevec: %b hb_way: %b lb_way: %b",replace_vec,hb_way,lb_way); `endif
479 `ifdef verbose $display($time,"\tDCACHE: CPUTAG: %h lb_tag: %h hb_tag :%h",cpu_tag,lb_tag,hb_tag); `endif
480 `ifdef verbose $display($time,"\tDCACHE: CPUIndex: %d lb_index: %d hb_inex :%d",setindex,lb_setindex,hb_setindex); `endif
482 Bit#(TAdd#(TLog#(`DCACHE_WORD_SIZE),TLog#(`DCACHE_BLOCK_SIZE))) offset_zeros='d0;
483 Bit#(`PADDR) r0=duplicate(replace_vec[0]);
484 Bit#(`PADDR) r1=duplicate(replace_vec[1]);
485 Bit#(`PADDR) r2=duplicate(replace_vec[2]);
486 Bit#(`PADDR) r3=duplicate(replace_vec[3]);
487 Bit#(`PADDR) write_address0=r0&{tag[0].read_response[20-1:0],setindex[6:0],offset_zeros};
488 Bit#(`PADDR) write_address1=r1&{tag[1].read_response[20-1:0],setindex[6:0],offset_zeros};
489 Bit#(`PADDR) write_address2=r2&{tag[2].read_response[20-1:0],setindex[6:0],offset_zeros};
490 Bit#(`PADDR) write_address3=r3&{tag[3].read_response[20-1:0],setindex[6:0],offset_zeros};
491 Bit#(`PADDR) write_address = write_address0 | write_address1 | write_address2 | write_address3;
492 Bit#(TMul#(TMul#(`DCACHE_BLOCK_SIZE,`DCACHE_WORD_SIZE),8)) replace_dataline =
500 `ifdef verbose $display($time,"\tDCACHE: Replace vec: %h line: %h address :%h",replace_vec,replace_dataline, write_address); `endif
501 /*==============================================*/
503 /*==== capture the word to be operated on and perform the atomic operation as well on it=======*/
504 Bit#(TMul#(TMul#(`DCACHE_BLOCK_SIZE,`DCACHE_WORD_SIZE),8)) dataline=hbdataline|lbdataline|sram_dataline;
505 Bit#(`Reg_width) data_word=(dataline>>{6'd0,word_offset}*64)[`Reg_width-1:0];
506 data_word=data_word>>({4'b0,byte_offset}*8);
509 data_word=rg_transfer_size==0?zeroExtend(data_word[7:0]):rg_transfer_size==1?zeroExtend(data_word[15:0]):rg_transfer_size==2?zeroExtend(data_word[31:0]):data_word;
511 data_word=rg_transfer_size==0?signExtend(data_word[7:0]):rg_transfer_size==1?signExtend(data_word[15:0]):rg_transfer_size==2?signExtend(data_word[31:0]):data_word;
513 let {success,storeResult,atomicdata} <- atomic_operation(data_word,rg_writedata,rg_atomic_op,rg_paddress);
514 if(rg_access_type==Load)
516 if(success matches tagged Valid .sc)
517 data_word = zeroExtend(sc);
519 Bit#(`Reg_width) final_word = `ifdef atomic (rg_access_type==Atomic)?atomicdata: `endif (rg_access_type==Store)?rg_writedata:data_word;
520 `ifdef verbose $display($time,"\tDCACHE: hbhit: %b hbdataline: %h",hb_hit,hbdataline); `endif
521 `ifdef verbose $display($time,"\tDCACHE: lb_hit: %b lbdataline: %h",lb_hit,lbdataline); `endif
522 `ifdef verbose $display($time,"\tDCACHE: tag_hit: %b hit : %b srdataline: %h",tag_hit,hit , sram_dataline); `endif
523 `ifdef verbose $display($time,"\tDCACHE: Sending to Core: %h Final line: %h",data_word,dataline); `endif
524 `ifdef verbose $display($time,"\tDCACHE: translation done: %b tlb_exception: ",rg_trnslte_done[0],fshow(rg_tlb_exception[0])); `endif
525 /*=============================================================================================*/
526 /*============ perform Store/Atomic operations =============*/
527 if(rg_trnslte_done[0] &&& rg_tlb_exception[0] matches tagged None)begin
528 if(cache_enabled)begin
529 /*======= Calculate the next state ===*/
530 DcacheState nextstate=Idle;
531 if(misaligned_addr || hit || hb_hit || (lb_hit && !stall_on_lb))
533 else if(lb_hit && stall_on_lb || (!hit && !lb_hit &&!hb_hit))begin
534 nextstate=KeepPolling;
535 rg_poll_address<=rg_paddress;
538 rg_trnslte_done[0]<=False;
539 rg_state[0]<=nextstate;
540 `ifdef verbose $display($time,"\tDCACHE: NextState: ",fshow(nextstate)); `endif
541 /*=====================================*/
542 /*========= response to CPU =============*/
543 if(rg_access_type!=Store && nextstate==Idle)
544 wr_response_to_cpu<= tagged Valid (tuple4(data_word,exception,0,rg_insn_epoch));// TODO perf
545 else if(nextstate==Idle)
546 wr_response_to_cpu<= tagged Valid (tuple4(0,exception,0,rg_insn_epoch)); // TODO perf
548 if(exception matches tagged None)
549 wbEpoch[0]<=wbEpoch[0];
551 wbEpoch[0]<=~wbEpoch[0];
552 /*=======================================*/
554 if(rg_access_type==Store `ifdef atomic || storeResult `endif )
555 rg_global_dirty<=True;
556 /*=============== updated hit buffer on a write =========*/
557 if(hb_hit && (rg_access_type==Store `ifdef atomic || storeResult `endif ))begin
558 `ifdef verbose $display($time,"\tDCACHE: HB Hit. Writing Tag: %h Data: %h Way: %h",hb_tag,final_word,hb_way); `endif
559 hb_data.write_portA(rg_writeenable,duplicate(final_word));
561 /*============================================*/
562 /*=============== updated line buffer on a write =========*/
563 if(lb_hit && !stall_on_lb && (rg_access_type==Store `ifdef atomic || storeResult `endif ))begin
564 `ifdef verbose $display($time,"\tDCACHE: LB Hit. Writing Tag: %h Data: %h Way: %h setindex: %d",lb_tag,final_word,lb_way,lb_setindex); `endif
565 lb_data.write_portA(rg_writeenable,duplicate(final_word));
568 if(hit && (rg_access_type==Store `ifdef atomic || storeResult `endif ) && !hb_hit)begin
569 `ifdef verbose $display($time,"\tDCACHE: Hit in SRAMS and writing new value :%h to HB",update_line(rg_writeenable,duplicate(final_word),dataline)); `endif
571 hb_setindex<=setindex;
572 hb_data.write_portA('1,update_line(rg_writeenable,duplicate(final_word),dataline));
575 if(hit && (rg_access_type==Store `ifdef atomic || storeResult `endif ) && !hb_hit)
577 else if(hb_valid && !hb_hit)
579 /*=============== updated SRAM entries with Hit buffer when possible =========*/
580 if(hb_valid &&!hb_hit)begin
581 `ifdef verbose $display($time,"\tDCACHE: HB updating SRAM Tag: %h Data: %h Way: %h setindex: %d",hb_tag,hb_data.response_portA,hb_way,hb_setindex); `endif
582 wr_write_info<=tagged Valid tuple2(hb_tag,hb_setindex);
583 for(Integer i=0;i<`DCACHE_WAYS;i=i+1)begin
584 tag[i].write_request(unpack(hb_way[i]),hb_setindex,{2'b11,hb_tag});
585 data[i].write_request(duplicate(hb_way[i]),hb_setindex,hb_data.response_portA);
588 /*============================================================================*/
589 if(!hit && !lb_hit && !hb_hit && !misaligned_addr)begin// a complete miss
590 `ifdef verbose $display($time,"\tDCACHE: A complete miss in Data Cache. Enquing into the memoperation FIFO"); `endif
591 Bit#(TLog#(`DCACHE_BLOCK_SIZE)) val1=(rg_vaddress&'hfffffff8)[word_bits+byte_bits-1:byte_bits];
592 Bit#(TMul#(`DCACHE_WORD_SIZE,`DCACHE_BLOCK_SIZE)) writeenable='hFF;
593 writeenable=writeenable<<{3'b0,val1}*8;
594 memoperation.enq(tuple4(cpu_tag,rg_vaddress[set_bits+word_bits+byte_bits-1:word_bits+byte_bits],replace_vec,writeenable));
595 ff_read_request_to_memory.enq(To_Memory {address:rg_paddress&'hfffffff8,burst_length:fromInteger(`DCACHE_BLOCK_SIZE),ld_st:Load, transfer_size:3});
596 if((valid_values&dirty_values&replace_vec)==replace_vec)begin // if the replacing is dirty
597 `ifdef verbose $display($time,"\tDCACHE: Line being replaced is dirty. Addr: %h Data: %h",write_address,replace_vec); `endif
598 ff_write_request_to_memory.enq(To_Memory_Write {address:write_address,burst_length:fromInteger(`DCACHE_BLOCK_SIZE),ld_st:Load, transfer_size:3,
599 data_line:replace_dataline });
600 pending_write_response[0]<=True;
604 else if(rg_access_type==Load || rg_access_type==Atomic)begin
605 ff_read_request_to_memory.enq(To_Memory {address:rg_paddress,burst_length:1,ld_st:Load,transfer_size:rg_transfer_size});
606 rg_state[0]<=IOReadResp;
608 else if(rg_access_type==Store)begin
609 ff_write_request_to_memory.enq(To_Memory_Write{address:rg_paddress,data_line:zeroExtend(rg_writedata),burst_length:1,transfer_size:rg_transfer_size,ld_st:Store});
610 rg_state[0]<=IOWriteResp;
613 else if(rg_trnslte_done[0])begin
615 wr_response_to_cpu<= tagged Valid (tuple4(0,rg_tlb_exception[0],0,rg_insn_epoch));//TODO perf
616 wbEpoch[0]<=~wbEpoch[0];
617 rg_tlb_exception[0]<=tagged None;
619 rg_trnslte_done[0]<=False;
620 `ifdef verbose $display($time,"\tDCACHE: Exception from TLB taken"); `endif
623 `ifdef verbose $display($time,"\tDCACHE: Translation not done"); `endif
626 /*==========================================================*/
628 rule wait_for_ioread_response(rg_state[0]==IOReadResp && !memoperation.notEmpty);
629 `ifdef verbose $display($time,"\tDCACHE: Received IO Read Response"); `endif
630 Bit#(TLog#(`DCACHE_WORD_SIZE)) byte_offset=rg_vaddress[byte_bits-1:0];
631 Bit#(`Reg_width) data_value=ff_read_response_from_memory.first.data_line;
632 ff_read_response_from_memory.deq;
633 data_value=data_value>>({4'b0,byte_offset}*8);
635 data_value=rg_transfer_size==0?zeroExtend(data_value[7:0]):rg_transfer_size==1?zeroExtend(data_value[15:0]):rg_transfer_size==2?zeroExtend(data_value[31:0]):data_value;
637 data_value=rg_transfer_size==0?signExtend(data_value[7:0]):rg_transfer_size==1?signExtend(data_value[15:0]):rg_transfer_size==2?signExtend(data_value[31:0]):data_value;
638 wr_response_to_cpu<=tagged Valid (tuple4(data_value,ff_read_response_from_memory.first.bus_error==1?tagged Exception Load_access_fault:tagged None,rg_perf_monitor,rg_insn_epoch));
639 wbEpoch[0]<=ff_read_response_from_memory.first.bus_error==1?~wbEpoch[0]:wbEpoch[0];
641 if(rg_access_type==Atomic)begin
642 let {success,storeResult,atomicdata} <- atomic_operation(data_value,rg_writedata,rg_atomic_op,rg_paddress);
644 ff_write_request_to_memory.enq(To_Memory_Write{address:rg_paddress,data_line:zeroExtend(atomicdata),burst_length:1,transfer_size:rg_transfer_size,ld_st:Store});
646 ff_write_request_to_memory.enq(To_Memory_Write{address:truncate(rg_vaddress),data_line:zeroExtend(new_data),burst_length:1,transfer_size:rg_transfer_size,ld_st:Store});
648 rg_state[0]<=IOWriteResp;
657 rule wait_for_iowrite_response(rg_state[0]==IOWriteResp && !memoperation.notEmpty && !pending_write_response[2]);
658 `ifdef verbose $display($time,"\tDCACHE: Received IO Write Response"); `endif
659 ff_write_response_from_memory.deq;
660 if(rg_access_type!=Atomic) begin
661 wr_response_to_cpu<=tagged Valid (tuple4(0,ff_write_response_from_memory.first.bus_error==1?tagged Exception Store_access_fault:tagged None,rg_perf_monitor,rg_insn_epoch));
662 wbEpoch[0]<=ff_write_response_from_memory.first.bus_error==1?~wbEpoch[0]:wbEpoch[0];
667 method Action virtual_address(Bit#(`VADDR) vaddress, Access_type load_store, Bit#(TMul#(`DCACHE_WORD_SIZE,8)) writedata, Bit#(3) transfer_size, `ifdef atomic Bit#(5) atomic_op, `endif Bool signextend, Bit#(1) insnepoch) if(rg_state[1]==Idle);
668 if((transfer_size=='b01 && vaddress[0]!='b0) || (transfer_size=='b10 && vaddress[1:0]!=0) || (transfer_size=='b11 && vaddress[2:0]!=0))
669 misaligned_addr<=True;
671 misaligned_addr<=False;
672 Bit#(`PERFMONITORS) perf_monitor=0;
673 Bit#(TLog#(`DCACHE_SETS)) setindex=vaddress[set_bits+word_bits+byte_bits-1:word_bits+byte_bits];
674 `ifdef verbose $display($time,"\tDCACHE: ",fshow(load_store)," Request of VAddr: %h transfersize: %d signextend: %b setindex: %d data:%h",vaddress,transfer_size, signextend,setindex,writedata); `endif
675 Bit#(TMul#(`DCACHE_WORD_SIZE,`DCACHE_BLOCK_SIZE)) we=transfer_size==0?'b1:transfer_size==1?'b11:transfer_size==2?'hF:'hFF;
676 Bit#(TLog#(`DCACHE_BLOCK_SIZE)) word_offset= vaddress[word_bits+byte_bits-1:byte_bits];
677 Bit#(TLog#(`DCACHE_WORD_SIZE)) byte_offset=vaddress[byte_bits-1:0];
678 we=we<<{4'b0,word_offset}*8;
680 rg_access_type<=load_store;
682 rg_vaddress<=vaddress;
683 rg_transfer_size<=transfer_size;
684 `ifdef atomic rg_atomic_op<=atomic_op; `endif
685 rg_writedata<=transfer_size==0?duplicate(writedata[7:0]):transfer_size==1?duplicate(writedata[15:0]):transfer_size==2?duplicate(writedata[31:0]):writedata;
687 rg_signextend<=signextend;
688 rg_insn_epoch<=insnepoch;
689 if(wr_write_info matches tagged Valid .x)begin
690 let {newtag,newindex}=x;
691 if(newindex==setindex)
694 if(load_store==Fence)begin
695 rg_state[1]<=FenceStart;
699 for(Integer i=0;i<`DCACHE_WAYS;i=i+1)begin // send address to the Block_rams
700 tag[i].read_request(setindex);
701 data[i].read_request(setindex);
703 rg_state[1]<=ReadingCache;
706 capture_counters<=True;
711 method Maybe#(Tuple4#(Bit#(`Reg_width), Trap_type,Bit#(`PERFMONITORS),Bit#(1))) response_to_core;
712 return wr_response_to_cpu;
715 method Action physical_address(Bit#(`PADDR) paddr, Trap_type exception);
716 `ifdef verbose $display($time,"\tDCACHE: Sending physical address %h to dcache ",paddr); `endif
718 rg_trnslte_done[1] <= True;
719 rg_tlb_exception[1]<=exception;
722 method ActionValue#(To_Memory#(`PADDR)) read_request_to_memory;
723 ff_read_request_to_memory.deq;
724 return ff_read_request_to_memory.first;
726 method ActionValue#(To_Memory_Write) write_request_to_memory;
727 ff_write_request_to_memory.deq;
728 return ff_write_request_to_memory.first;
730 method Bool init_complete;
731 return (rg_state[0]!=Fence);
733 method Action read_response_from_memory(From_Memory#(`DCACHE_WORD_SIZE) resp);
734 `ifdef verbose $display($time,"\tDCACHE: Memory has responded"); `endif
735 ff_read_response_from_memory.enq(resp);
737 method Action write_response_from_memory(From_Memory#(`DCACHE_WORD_SIZE) resp);
738 ff_write_response_from_memory.enq(resp);
740 method Action flush_from_wb;
741 `ifdef verbose $display($time,"\tDCACHE: Inverting the wbEPOCH due to WB stage flush"); `endif
742 wbEpoch[1]<=~wbEpoch[1];