Merge pull request #191 from ozbenh/litedram
[microwatt.git] / loadstore1.vhdl
1 library ieee;
2 use ieee.std_logic_1164.all;
3 use ieee.numeric_std.all;
4
5 library work;
6 use work.decode_types.all;
7 use work.common.all;
8
9 -- 2 cycle LSU
10 -- We calculate the address in the first cycle
11
12 entity loadstore1 is
13 port (
14 clk : in std_ulogic;
15 rst : in std_ulogic;
16
17 l_in : in Execute1ToLoadstore1Type;
18 e_out : out Loadstore1ToExecute1Type;
19 l_out : out Loadstore1ToWritebackType;
20
21 d_out : out Loadstore1ToDcacheType;
22 d_in : in DcacheToLoadstore1Type;
23
24 m_out : out Loadstore1ToMmuType;
25 m_in : in MmuToLoadstore1Type;
26
27 dc_stall : in std_ulogic;
28 stall_out : out std_ulogic
29 );
30 end loadstore1;
31
32 -- Note, we don't currently use the stall output from the dcache because
33 -- we know it can take two requests without stalling when idle, we are
34 -- its only user, and we know it never stalls when idle.
35
36 architecture behave of loadstore1 is
37
38 -- State machine for unaligned loads/stores
39 type state_t is (IDLE, -- ready for instruction
40 SECOND_REQ, -- send 2nd request of unaligned xfer
41 ACK_WAIT, -- waiting for ack from dcache
42 LD_UPDATE, -- writing rA with computed addr on load
43 MMU_LOOKUP, -- waiting for MMU to look up translation
44 TLBIE_WAIT -- waiting for MMU to finish doing a tlbie
45 );
46
47 type reg_stage_t is record
48 -- latch most of the input request
49 load : std_ulogic;
50 tlbie : std_ulogic;
51 dcbz : std_ulogic;
52 addr : std_ulogic_vector(63 downto 0);
53 store_data : std_ulogic_vector(63 downto 0);
54 load_data : std_ulogic_vector(63 downto 0);
55 write_reg : gpr_index_t;
56 length : std_ulogic_vector(3 downto 0);
57 byte_reverse : std_ulogic;
58 sign_extend : std_ulogic;
59 update : std_ulogic;
60 update_reg : gpr_index_t;
61 xerc : xer_common_t;
62 reserve : std_ulogic;
63 rc : std_ulogic;
64 nc : std_ulogic; -- non-cacheable access
65 virt_mode : std_ulogic;
66 priv_mode : std_ulogic;
67 state : state_t;
68 dwords_done : std_ulogic;
69 first_bytes : std_ulogic_vector(7 downto 0);
70 second_bytes : std_ulogic_vector(7 downto 0);
71 dar : std_ulogic_vector(63 downto 0);
72 dsisr : std_ulogic_vector(31 downto 0);
73 instr_fault : std_ulogic;
74 end record;
75
76 type byte_sel_t is array(0 to 7) of std_ulogic;
77 subtype byte_trim_t is std_ulogic_vector(1 downto 0);
78 type trim_ctl_t is array(0 to 7) of byte_trim_t;
79
80 signal r, rin : reg_stage_t;
81 signal lsu_sum : std_ulogic_vector(63 downto 0);
82
83 -- Generate byte enables from sizes
84 function length_to_sel(length : in std_logic_vector(3 downto 0)) return std_ulogic_vector is
85 begin
86 case length is
87 when "0001" =>
88 return "00000001";
89 when "0010" =>
90 return "00000011";
91 when "0100" =>
92 return "00001111";
93 when "1000" =>
94 return "11111111";
95 when others =>
96 return "00000000";
97 end case;
98 end function length_to_sel;
99
100 -- Calculate byte enables
101 -- This returns 16 bits, giving the select signals for two transfers,
102 -- to account for unaligned loads or stores
103 function xfer_data_sel(size : in std_logic_vector(3 downto 0);
104 address : in std_logic_vector(2 downto 0))
105 return std_ulogic_vector is
106 variable longsel : std_ulogic_vector(15 downto 0);
107 begin
108 longsel := "00000000" & length_to_sel(size);
109 return std_ulogic_vector(shift_left(unsigned(longsel),
110 to_integer(unsigned(address))));
111 end function xfer_data_sel;
112
113 begin
114 -- Calculate the address in the first cycle
115 lsu_sum <= std_ulogic_vector(unsigned(l_in.addr1) + unsigned(l_in.addr2)) when l_in.valid = '1' else (others => '0');
116
117 loadstore1_0: process(clk)
118 begin
119 if rising_edge(clk) then
120 if rst = '1' then
121 r.state <= IDLE;
122 else
123 r <= rin;
124 end if;
125 end if;
126 end process;
127
128 loadstore1_1: process(all)
129 variable v : reg_stage_t;
130 variable brev_lenm1 : unsigned(2 downto 0);
131 variable byte_offset : unsigned(2 downto 0);
132 variable j : integer;
133 variable k : unsigned(2 downto 0);
134 variable kk : unsigned(3 downto 0);
135 variable long_sel : std_ulogic_vector(15 downto 0);
136 variable byte_sel : std_ulogic_vector(7 downto 0);
137 variable req : std_ulogic;
138 variable stall : std_ulogic;
139 variable addr : std_ulogic_vector(63 downto 0);
140 variable wdata : std_ulogic_vector(63 downto 0);
141 variable write_enable : std_ulogic;
142 variable do_update : std_ulogic;
143 variable two_dwords : std_ulogic;
144 variable done : std_ulogic;
145 variable data_permuted : std_ulogic_vector(63 downto 0);
146 variable data_trimmed : std_ulogic_vector(63 downto 0);
147 variable use_second : byte_sel_t;
148 variable trim_ctl : trim_ctl_t;
149 variable negative : std_ulogic;
150 variable mfspr : std_ulogic;
151 variable sprn : std_ulogic_vector(9 downto 0);
152 variable sprval : std_ulogic_vector(63 downto 0);
153 variable exception : std_ulogic;
154 variable next_addr : std_ulogic_vector(63 downto 0);
155 variable mmureq : std_ulogic;
156 variable dsisr : std_ulogic_vector(31 downto 0);
157 variable mmu_mtspr : std_ulogic;
158 variable itlb_fault : std_ulogic;
159 begin
160 v := r;
161 req := '0';
162 stall := '0';
163 done := '0';
164 byte_sel := (others => '0');
165 addr := lsu_sum;
166 mfspr := '0';
167 mmu_mtspr := '0';
168 itlb_fault := '0';
169 sprn := std_ulogic_vector(to_unsigned(decode_spr_num(l_in.insn), 10));
170 sprval := (others => '0'); -- avoid inferred latches
171 exception := '0';
172 dsisr := (others => '0');
173 mmureq := '0';
174
175 write_enable := '0';
176 do_update := '0';
177 two_dwords := or (r.second_bytes);
178
179 -- load data formatting
180 byte_offset := unsigned(r.addr(2 downto 0));
181 brev_lenm1 := "000";
182 if r.byte_reverse = '1' then
183 brev_lenm1 := unsigned(r.length(2 downto 0)) - 1;
184 end if;
185
186 -- shift and byte-reverse data bytes
187 for i in 0 to 7 loop
188 kk := ('0' & (to_unsigned(i, 3) xor brev_lenm1)) + ('0' & byte_offset);
189 use_second(i) := kk(3);
190 j := to_integer(kk(2 downto 0)) * 8;
191 data_permuted(i * 8 + 7 downto i * 8) := d_in.data(j + 7 downto j);
192 end loop;
193
194 -- Work out the sign bit for sign extension.
195 -- Assumes we are not doing both sign extension and byte reversal,
196 -- in that for unaligned loads crossing two dwords we end up
197 -- using a bit from the second dword, whereas for a byte-reversed
198 -- (i.e. big-endian) load the sign bit would be in the first dword.
199 negative := (r.length(3) and data_permuted(63)) or
200 (r.length(2) and data_permuted(31)) or
201 (r.length(1) and data_permuted(15)) or
202 (r.length(0) and data_permuted(7));
203
204 -- trim and sign-extend
205 for i in 0 to 7 loop
206 if i < to_integer(unsigned(r.length)) then
207 if two_dwords = '1' then
208 trim_ctl(i) := '1' & not use_second(i);
209 else
210 trim_ctl(i) := not use_second(i) & '0';
211 end if;
212 else
213 trim_ctl(i) := '0' & (negative and r.sign_extend);
214 end if;
215 case trim_ctl(i) is
216 when "11" =>
217 data_trimmed(i * 8 + 7 downto i * 8) := r.load_data(i * 8 + 7 downto i * 8);
218 when "10" =>
219 data_trimmed(i * 8 + 7 downto i * 8) := data_permuted(i * 8 + 7 downto i * 8);
220 when "01" =>
221 data_trimmed(i * 8 + 7 downto i * 8) := x"FF";
222 when others =>
223 data_trimmed(i * 8 + 7 downto i * 8) := x"00";
224 end case;
225 end loop;
226
227 -- compute (addr + 8) & ~7 for the second doubleword when unaligned
228 next_addr := std_ulogic_vector(unsigned(r.addr(63 downto 3)) + 1) & "000";
229
230 case r.state is
231 when IDLE =>
232 if l_in.valid = '1' then
233 v.addr := lsu_sum;
234 v.load := '0';
235 v.dcbz := '0';
236 v.tlbie := '0';
237 v.instr_fault := '0';
238 v.dwords_done := '0';
239 case l_in.op is
240 when OP_STORE =>
241 req := '1';
242 when OP_LOAD =>
243 req := '1';
244 v.load := '1';
245 when OP_DCBZ =>
246 req := '1';
247 v.dcbz := '1';
248 when OP_TLBIE =>
249 mmureq := '1';
250 stall := '1';
251 v.tlbie := '1';
252 v.state := TLBIE_WAIT;
253 when OP_MFSPR =>
254 done := '1';
255 mfspr := '1';
256 -- partial decode on SPR number should be adequate given
257 -- the restricted set that get sent down this path
258 if sprn(9) = '0' and sprn(5) = '0' then
259 if sprn(0) = '0' then
260 sprval := x"00000000" & r.dsisr;
261 else
262 sprval := r.dar;
263 end if;
264 else
265 -- reading one of the SPRs in the MMU
266 sprval := m_in.sprval;
267 end if;
268 when OP_MTSPR =>
269 if sprn(9) = '0' and sprn(5) = '0' then
270 if sprn(0) = '0' then
271 v.dsisr := l_in.data(31 downto 0);
272 else
273 v.dar := l_in.data;
274 end if;
275 done := '1';
276 else
277 -- writing one of the SPRs in the MMU
278 mmu_mtspr := '1';
279 stall := '1';
280 v.state := TLBIE_WAIT;
281 end if;
282 when OP_FETCH_FAILED =>
283 -- send it to the MMU to do the radix walk
284 addr := l_in.nia;
285 v.addr := l_in.nia;
286 v.instr_fault := '1';
287 mmureq := '1';
288 stall := '1';
289 v.state := MMU_LOOKUP;
290 when others =>
291 assert false report "unknown op sent to loadstore1";
292 end case;
293
294 v.write_reg := l_in.write_reg;
295 v.length := l_in.length;
296 v.byte_reverse := l_in.byte_reverse;
297 v.sign_extend := l_in.sign_extend;
298 v.update := l_in.update;
299 v.update_reg := l_in.update_reg;
300 v.xerc := l_in.xerc;
301 v.reserve := l_in.reserve;
302 v.rc := l_in.rc;
303 v.nc := l_in.ci;
304 v.virt_mode := l_in.virt_mode;
305 v.priv_mode := l_in.priv_mode;
306
307 -- XXX Temporary hack. Mark the op as non-cachable if the address
308 -- is the form 0xc------- for a real-mode access.
309 --
310 -- This will have to be replaced by a combination of implementing the
311 -- proper HV CI load/store instructions and having an MMU to get the I
312 -- bit otherwise.
313 if lsu_sum(31 downto 28) = "1100" and l_in.virt_mode = '0' then
314 v.nc := '1';
315 end if;
316
317 -- Do length_to_sel and work out if we are doing 2 dwords
318 long_sel := xfer_data_sel(l_in.length, v.addr(2 downto 0));
319 byte_sel := long_sel(7 downto 0);
320 v.first_bytes := byte_sel;
321 v.second_bytes := long_sel(15 downto 8);
322
323 -- Do byte reversing and rotating for stores in the first cycle
324 byte_offset := unsigned(lsu_sum(2 downto 0));
325 brev_lenm1 := "000";
326 if l_in.byte_reverse = '1' then
327 brev_lenm1 := unsigned(l_in.length(2 downto 0)) - 1;
328 end if;
329 for i in 0 to 7 loop
330 k := (to_unsigned(i, 3) xor brev_lenm1) + byte_offset;
331 j := to_integer(k) * 8;
332 v.store_data(j + 7 downto j) := l_in.data(i * 8 + 7 downto i * 8);
333 end loop;
334
335 if req = '1' then
336 stall := '1';
337 if long_sel(15 downto 8) = "00000000" then
338 v.state := ACK_WAIT;
339 else
340 v.state := SECOND_REQ;
341 end if;
342 end if;
343 end if;
344
345 when SECOND_REQ =>
346 addr := next_addr;
347 byte_sel := r.second_bytes;
348 req := '1';
349 stall := '1';
350 v.state := ACK_WAIT;
351
352 when ACK_WAIT =>
353 stall := '1';
354 if d_in.valid = '1' then
355 if d_in.error = '1' then
356 -- dcache will discard the second request if it
357 -- gets an error on the 1st of two requests
358 if r.dwords_done = '1' then
359 addr := next_addr;
360 else
361 addr := r.addr;
362 end if;
363 if d_in.cache_paradox = '1' then
364 -- signal an interrupt straight away
365 exception := '1';
366 dsisr(63 - 38) := not r.load;
367 -- XXX there is no architected bit for this
368 dsisr(63 - 35) := d_in.cache_paradox;
369 v.state := IDLE;
370 else
371 -- Look up the translation for TLB miss
372 -- and also for permission error and RC error
373 -- in case the PTE has been updated.
374 mmureq := '1';
375 v.state := MMU_LOOKUP;
376 end if;
377 else
378 if two_dwords = '1' and r.dwords_done = '0' then
379 v.dwords_done := '1';
380 if r.load = '1' then
381 v.load_data := data_permuted;
382 end if;
383 else
384 write_enable := r.load;
385 if r.load = '1' and r.update = '1' then
386 -- loads with rA update need an extra cycle
387 v.state := LD_UPDATE;
388 else
389 -- stores write back rA update in this cycle
390 do_update := r.update;
391 stall := '0';
392 done := '1';
393 v.state := IDLE;
394 end if;
395 end if;
396 end if;
397 end if;
398
399 when MMU_LOOKUP =>
400 stall := '1';
401 if r.dwords_done = '1' then
402 addr := next_addr;
403 byte_sel := r.second_bytes;
404 else
405 addr := r.addr;
406 byte_sel := r.first_bytes;
407 end if;
408 if m_in.done = '1' then
409 if m_in.invalid = '0' and m_in.perm_error = '0' and m_in.rc_error = '0' and
410 m_in.badtree = '0' and m_in.segerr = '0' then
411 if r.instr_fault = '0' then
412 -- retry the request now that the MMU has installed a TLB entry
413 req := '1';
414 if two_dwords = '1' and r.dwords_done = '0' then
415 v.state := SECOND_REQ;
416 else
417 v.state := ACK_WAIT;
418 end if;
419 else
420 -- nothing to do, the icache retries automatically
421 stall := '0';
422 done := '1';
423 v.state := IDLE;
424 end if;
425 else
426 exception := '1';
427 dsisr(63 - 33) := m_in.invalid;
428 dsisr(63 - 36) := m_in.perm_error;
429 dsisr(63 - 38) := not r.load;
430 dsisr(63 - 44) := m_in.badtree;
431 dsisr(63 - 45) := m_in.rc_error;
432 v.state := IDLE;
433 end if;
434 end if;
435
436 when TLBIE_WAIT =>
437 stall := '1';
438 if m_in.done = '1' then
439 -- tlbie is finished
440 stall := '0';
441 done := '1';
442 v.state := IDLE;
443 end if;
444
445 when LD_UPDATE =>
446 do_update := '1';
447 v.state := IDLE;
448 done := '1';
449
450 end case;
451
452 -- Update outputs to dcache
453 d_out.valid <= req;
454 d_out.load <= v.load;
455 d_out.dcbz <= v.dcbz;
456 d_out.nc <= v.nc;
457 d_out.reserve <= v.reserve;
458 d_out.addr <= addr;
459 d_out.data <= v.store_data;
460 d_out.byte_sel <= byte_sel;
461 d_out.virt_mode <= v.virt_mode;
462 d_out.priv_mode <= v.priv_mode;
463
464 -- Update outputs to MMU
465 m_out.valid <= mmureq;
466 m_out.iside <= v.instr_fault;
467 m_out.load <= r.load;
468 m_out.priv <= r.priv_mode;
469 m_out.tlbie <= v.tlbie;
470 m_out.mtspr <= mmu_mtspr;
471 m_out.sprn <= sprn;
472 m_out.addr <= addr;
473 m_out.slbia <= l_in.insn(7);
474 m_out.rs <= l_in.data;
475
476 -- Update outputs to writeback
477 -- Multiplex either cache data to the destination GPR or
478 -- the address for the rA update.
479 l_out.valid <= done;
480 if mfspr = '1' then
481 l_out.write_enable <= '1';
482 l_out.write_reg <= l_in.write_reg;
483 l_out.write_data <= sprval;
484 elsif do_update = '1' then
485 l_out.write_enable <= '1';
486 l_out.write_reg <= r.update_reg;
487 l_out.write_data <= r.addr;
488 else
489 l_out.write_enable <= write_enable;
490 l_out.write_reg <= r.write_reg;
491 l_out.write_data <= data_trimmed;
492 end if;
493 l_out.xerc <= r.xerc;
494 l_out.rc <= r.rc and done;
495 l_out.store_done <= d_in.store_done;
496
497 -- update exception info back to execute1
498 e_out.exception <= exception;
499 e_out.instr_fault <= r.instr_fault;
500 e_out.invalid <= m_in.invalid;
501 e_out.badtree <= m_in.badtree;
502 e_out.perm_error <= m_in.perm_error;
503 e_out.rc_error <= m_in.rc_error;
504 e_out.segment_fault <= m_in.segerr;
505 if exception = '1' and r.instr_fault = '0' then
506 v.dar := addr;
507 if m_in.segerr = '0' then
508 v.dsisr := dsisr;
509 end if;
510 end if;
511
512 stall_out <= stall;
513
514 -- Update registers
515 rin <= v;
516
517 end process;
518
519 end;