multiply: Move selection of result bits into execute1
[microwatt.git] / loadstore1.vhdl
1 library ieee;
2 use ieee.std_logic_1164.all;
3 use ieee.numeric_std.all;
4
5 library work;
6 use work.decode_types.all;
7 use work.common.all;
8
9 -- 2 cycle LSU
10 -- We calculate the address in the first cycle
11
12 entity loadstore1 is
13 port (
14 clk : in std_ulogic;
15 rst : in std_ulogic;
16
17 l_in : in Execute1ToLoadstore1Type;
18 e_out : out Loadstore1ToExecute1Type;
19 l_out : out Loadstore1ToWritebackType;
20
21 d_out : out Loadstore1ToDcacheType;
22 d_in : in DcacheToLoadstore1Type;
23
24 m_out : out Loadstore1ToMmuType;
25 m_in : in MmuToLoadstore1Type;
26
27 dc_stall : in std_ulogic;
28 stall_out : out std_ulogic;
29
30 log_out : out std_ulogic_vector(9 downto 0)
31 );
32 end loadstore1;
33
34 -- Note, we don't currently use the stall output from the dcache because
35 -- we know it can take two requests without stalling when idle, we are
36 -- its only user, and we know it never stalls when idle.
37
38 architecture behave of loadstore1 is
39
40 -- State machine for unaligned loads/stores
41 type state_t is (IDLE, -- ready for instruction
42 SECOND_REQ, -- send 2nd request of unaligned xfer
43 ACK_WAIT, -- waiting for ack from dcache
44 LD_UPDATE, -- writing rA with computed addr on load
45 MMU_LOOKUP, -- waiting for MMU to look up translation
46 TLBIE_WAIT -- waiting for MMU to finish doing a tlbie
47 );
48
49 type reg_stage_t is record
50 -- latch most of the input request
51 load : std_ulogic;
52 tlbie : std_ulogic;
53 dcbz : std_ulogic;
54 addr : std_ulogic_vector(63 downto 0);
55 store_data : std_ulogic_vector(63 downto 0);
56 load_data : std_ulogic_vector(63 downto 0);
57 write_reg : gpr_index_t;
58 length : std_ulogic_vector(3 downto 0);
59 byte_reverse : std_ulogic;
60 sign_extend : std_ulogic;
61 update : std_ulogic;
62 update_reg : gpr_index_t;
63 xerc : xer_common_t;
64 reserve : std_ulogic;
65 rc : std_ulogic;
66 nc : std_ulogic; -- non-cacheable access
67 virt_mode : std_ulogic;
68 priv_mode : std_ulogic;
69 state : state_t;
70 dwords_done : std_ulogic;
71 first_bytes : std_ulogic_vector(7 downto 0);
72 second_bytes : std_ulogic_vector(7 downto 0);
73 dar : std_ulogic_vector(63 downto 0);
74 dsisr : std_ulogic_vector(31 downto 0);
75 instr_fault : std_ulogic;
76 end record;
77
78 type byte_sel_t is array(0 to 7) of std_ulogic;
79 subtype byte_trim_t is std_ulogic_vector(1 downto 0);
80 type trim_ctl_t is array(0 to 7) of byte_trim_t;
81
82 signal r, rin : reg_stage_t;
83 signal lsu_sum : std_ulogic_vector(63 downto 0);
84
85 signal log_data : std_ulogic_vector(9 downto 0);
86
87 -- Generate byte enables from sizes
88 function length_to_sel(length : in std_logic_vector(3 downto 0)) return std_ulogic_vector is
89 begin
90 case length is
91 when "0001" =>
92 return "00000001";
93 when "0010" =>
94 return "00000011";
95 when "0100" =>
96 return "00001111";
97 when "1000" =>
98 return "11111111";
99 when others =>
100 return "00000000";
101 end case;
102 end function length_to_sel;
103
104 -- Calculate byte enables
105 -- This returns 16 bits, giving the select signals for two transfers,
106 -- to account for unaligned loads or stores
107 function xfer_data_sel(size : in std_logic_vector(3 downto 0);
108 address : in std_logic_vector(2 downto 0))
109 return std_ulogic_vector is
110 variable longsel : std_ulogic_vector(15 downto 0);
111 begin
112 longsel := "00000000" & length_to_sel(size);
113 return std_ulogic_vector(shift_left(unsigned(longsel),
114 to_integer(unsigned(address))));
115 end function xfer_data_sel;
116
117 begin
118 -- Calculate the address in the first cycle
119 lsu_sum <= std_ulogic_vector(unsigned(l_in.addr1) + unsigned(l_in.addr2)) when l_in.valid = '1' else (others => '0');
120
121 loadstore1_0: process(clk)
122 begin
123 if rising_edge(clk) then
124 if rst = '1' then
125 r.state <= IDLE;
126 else
127 r <= rin;
128 end if;
129 end if;
130 end process;
131
132 loadstore1_1: process(all)
133 variable v : reg_stage_t;
134 variable brev_lenm1 : unsigned(2 downto 0);
135 variable byte_offset : unsigned(2 downto 0);
136 variable j : integer;
137 variable k : unsigned(2 downto 0);
138 variable kk : unsigned(3 downto 0);
139 variable long_sel : std_ulogic_vector(15 downto 0);
140 variable byte_sel : std_ulogic_vector(7 downto 0);
141 variable req : std_ulogic;
142 variable stall : std_ulogic;
143 variable addr : std_ulogic_vector(63 downto 0);
144 variable wdata : std_ulogic_vector(63 downto 0);
145 variable write_enable : std_ulogic;
146 variable do_update : std_ulogic;
147 variable two_dwords : std_ulogic;
148 variable done : std_ulogic;
149 variable data_permuted : std_ulogic_vector(63 downto 0);
150 variable data_trimmed : std_ulogic_vector(63 downto 0);
151 variable use_second : byte_sel_t;
152 variable trim_ctl : trim_ctl_t;
153 variable negative : std_ulogic;
154 variable mfspr : std_ulogic;
155 variable sprn : std_ulogic_vector(9 downto 0);
156 variable sprval : std_ulogic_vector(63 downto 0);
157 variable exception : std_ulogic;
158 variable next_addr : std_ulogic_vector(63 downto 0);
159 variable mmureq : std_ulogic;
160 variable dsisr : std_ulogic_vector(31 downto 0);
161 variable mmu_mtspr : std_ulogic;
162 variable itlb_fault : std_ulogic;
163 begin
164 v := r;
165 req := '0';
166 stall := '0';
167 done := '0';
168 byte_sel := (others => '0');
169 addr := lsu_sum;
170 mfspr := '0';
171 mmu_mtspr := '0';
172 itlb_fault := '0';
173 sprn := std_ulogic_vector(to_unsigned(decode_spr_num(l_in.insn), 10));
174 sprval := (others => '0'); -- avoid inferred latches
175 exception := '0';
176 dsisr := (others => '0');
177 mmureq := '0';
178
179 write_enable := '0';
180 do_update := '0';
181 two_dwords := or (r.second_bytes);
182
183 -- load data formatting
184 byte_offset := unsigned(r.addr(2 downto 0));
185 brev_lenm1 := "000";
186 if r.byte_reverse = '1' then
187 brev_lenm1 := unsigned(r.length(2 downto 0)) - 1;
188 end if;
189
190 -- shift and byte-reverse data bytes
191 for i in 0 to 7 loop
192 kk := ('0' & (to_unsigned(i, 3) xor brev_lenm1)) + ('0' & byte_offset);
193 use_second(i) := kk(3);
194 j := to_integer(kk(2 downto 0)) * 8;
195 data_permuted(i * 8 + 7 downto i * 8) := d_in.data(j + 7 downto j);
196 end loop;
197
198 -- Work out the sign bit for sign extension.
199 -- Assumes we are not doing both sign extension and byte reversal,
200 -- in that for unaligned loads crossing two dwords we end up
201 -- using a bit from the second dword, whereas for a byte-reversed
202 -- (i.e. big-endian) load the sign bit would be in the first dword.
203 negative := (r.length(3) and data_permuted(63)) or
204 (r.length(2) and data_permuted(31)) or
205 (r.length(1) and data_permuted(15)) or
206 (r.length(0) and data_permuted(7));
207
208 -- trim and sign-extend
209 for i in 0 to 7 loop
210 if i < to_integer(unsigned(r.length)) then
211 if two_dwords = '1' then
212 trim_ctl(i) := '1' & not use_second(i);
213 else
214 trim_ctl(i) := not use_second(i) & '0';
215 end if;
216 else
217 trim_ctl(i) := '0' & (negative and r.sign_extend);
218 end if;
219 case trim_ctl(i) is
220 when "11" =>
221 data_trimmed(i * 8 + 7 downto i * 8) := r.load_data(i * 8 + 7 downto i * 8);
222 when "10" =>
223 data_trimmed(i * 8 + 7 downto i * 8) := data_permuted(i * 8 + 7 downto i * 8);
224 when "01" =>
225 data_trimmed(i * 8 + 7 downto i * 8) := x"FF";
226 when others =>
227 data_trimmed(i * 8 + 7 downto i * 8) := x"00";
228 end case;
229 end loop;
230
231 -- compute (addr + 8) & ~7 for the second doubleword when unaligned
232 next_addr := std_ulogic_vector(unsigned(r.addr(63 downto 3)) + 1) & "000";
233
234 case r.state is
235 when IDLE =>
236 if l_in.valid = '1' then
237 v.addr := lsu_sum;
238 v.load := '0';
239 v.dcbz := '0';
240 v.tlbie := '0';
241 v.instr_fault := '0';
242 v.dwords_done := '0';
243 case l_in.op is
244 when OP_STORE =>
245 req := '1';
246 when OP_LOAD =>
247 req := '1';
248 v.load := '1';
249 when OP_DCBZ =>
250 req := '1';
251 v.dcbz := '1';
252 when OP_TLBIE =>
253 mmureq := '1';
254 stall := '1';
255 v.tlbie := '1';
256 v.state := TLBIE_WAIT;
257 when OP_MFSPR =>
258 done := '1';
259 mfspr := '1';
260 -- partial decode on SPR number should be adequate given
261 -- the restricted set that get sent down this path
262 if sprn(9) = '0' and sprn(5) = '0' then
263 if sprn(0) = '0' then
264 sprval := x"00000000" & r.dsisr;
265 else
266 sprval := r.dar;
267 end if;
268 else
269 -- reading one of the SPRs in the MMU
270 sprval := m_in.sprval;
271 end if;
272 when OP_MTSPR =>
273 if sprn(9) = '0' and sprn(5) = '0' then
274 if sprn(0) = '0' then
275 v.dsisr := l_in.data(31 downto 0);
276 else
277 v.dar := l_in.data;
278 end if;
279 done := '1';
280 else
281 -- writing one of the SPRs in the MMU
282 mmu_mtspr := '1';
283 stall := '1';
284 v.state := TLBIE_WAIT;
285 end if;
286 when OP_FETCH_FAILED =>
287 -- send it to the MMU to do the radix walk
288 addr := l_in.nia;
289 v.addr := l_in.nia;
290 v.instr_fault := '1';
291 mmureq := '1';
292 stall := '1';
293 v.state := MMU_LOOKUP;
294 when others =>
295 assert false report "unknown op sent to loadstore1";
296 end case;
297
298 v.write_reg := l_in.write_reg;
299 v.length := l_in.length;
300 v.byte_reverse := l_in.byte_reverse;
301 v.sign_extend := l_in.sign_extend;
302 v.update := l_in.update;
303 v.update_reg := l_in.update_reg;
304 v.xerc := l_in.xerc;
305 v.reserve := l_in.reserve;
306 v.rc := l_in.rc;
307 v.nc := l_in.ci;
308 v.virt_mode := l_in.virt_mode;
309 v.priv_mode := l_in.priv_mode;
310
311 -- XXX Temporary hack. Mark the op as non-cachable if the address
312 -- is the form 0xc------- for a real-mode access.
313 --
314 -- This will have to be replaced by a combination of implementing the
315 -- proper HV CI load/store instructions and having an MMU to get the I
316 -- bit otherwise.
317 if lsu_sum(31 downto 28) = "1100" and l_in.virt_mode = '0' then
318 v.nc := '1';
319 end if;
320
321 -- Do length_to_sel and work out if we are doing 2 dwords
322 long_sel := xfer_data_sel(l_in.length, v.addr(2 downto 0));
323 byte_sel := long_sel(7 downto 0);
324 v.first_bytes := byte_sel;
325 v.second_bytes := long_sel(15 downto 8);
326
327 -- Do byte reversing and rotating for stores in the first cycle
328 byte_offset := unsigned(lsu_sum(2 downto 0));
329 brev_lenm1 := "000";
330 if l_in.byte_reverse = '1' then
331 brev_lenm1 := unsigned(l_in.length(2 downto 0)) - 1;
332 end if;
333 for i in 0 to 7 loop
334 k := (to_unsigned(i, 3) xor brev_lenm1) + byte_offset;
335 j := to_integer(k) * 8;
336 v.store_data(j + 7 downto j) := l_in.data(i * 8 + 7 downto i * 8);
337 end loop;
338
339 if req = '1' then
340 stall := '1';
341 if long_sel(15 downto 8) = "00000000" then
342 v.state := ACK_WAIT;
343 else
344 v.state := SECOND_REQ;
345 end if;
346 end if;
347 end if;
348
349 when SECOND_REQ =>
350 addr := next_addr;
351 byte_sel := r.second_bytes;
352 req := '1';
353 stall := '1';
354 v.state := ACK_WAIT;
355
356 when ACK_WAIT =>
357 stall := '1';
358 if d_in.valid = '1' then
359 if d_in.error = '1' then
360 -- dcache will discard the second request if it
361 -- gets an error on the 1st of two requests
362 if r.dwords_done = '1' then
363 addr := next_addr;
364 else
365 addr := r.addr;
366 end if;
367 if d_in.cache_paradox = '1' then
368 -- signal an interrupt straight away
369 exception := '1';
370 dsisr(63 - 38) := not r.load;
371 -- XXX there is no architected bit for this
372 dsisr(63 - 35) := d_in.cache_paradox;
373 v.state := IDLE;
374 else
375 -- Look up the translation for TLB miss
376 -- and also for permission error and RC error
377 -- in case the PTE has been updated.
378 mmureq := '1';
379 v.state := MMU_LOOKUP;
380 end if;
381 else
382 if two_dwords = '1' and r.dwords_done = '0' then
383 v.dwords_done := '1';
384 if r.load = '1' then
385 v.load_data := data_permuted;
386 end if;
387 else
388 write_enable := r.load;
389 if r.load = '1' and r.update = '1' then
390 -- loads with rA update need an extra cycle
391 v.state := LD_UPDATE;
392 else
393 -- stores write back rA update in this cycle
394 do_update := r.update;
395 stall := '0';
396 done := '1';
397 v.state := IDLE;
398 end if;
399 end if;
400 end if;
401 end if;
402
403 when MMU_LOOKUP =>
404 stall := '1';
405 if r.dwords_done = '1' then
406 addr := next_addr;
407 byte_sel := r.second_bytes;
408 else
409 addr := r.addr;
410 byte_sel := r.first_bytes;
411 end if;
412 if m_in.done = '1' then
413 if m_in.invalid = '0' and m_in.perm_error = '0' and m_in.rc_error = '0' and
414 m_in.badtree = '0' and m_in.segerr = '0' then
415 if r.instr_fault = '0' then
416 -- retry the request now that the MMU has installed a TLB entry
417 req := '1';
418 if two_dwords = '1' and r.dwords_done = '0' then
419 v.state := SECOND_REQ;
420 else
421 v.state := ACK_WAIT;
422 end if;
423 else
424 -- nothing to do, the icache retries automatically
425 stall := '0';
426 done := '1';
427 v.state := IDLE;
428 end if;
429 else
430 exception := '1';
431 dsisr(63 - 33) := m_in.invalid;
432 dsisr(63 - 36) := m_in.perm_error;
433 dsisr(63 - 38) := not r.load;
434 dsisr(63 - 44) := m_in.badtree;
435 dsisr(63 - 45) := m_in.rc_error;
436 v.state := IDLE;
437 end if;
438 end if;
439
440 when TLBIE_WAIT =>
441 stall := '1';
442 if m_in.done = '1' then
443 -- tlbie is finished
444 stall := '0';
445 done := '1';
446 v.state := IDLE;
447 end if;
448
449 when LD_UPDATE =>
450 do_update := '1';
451 v.state := IDLE;
452 done := '1';
453
454 end case;
455
456 -- Update outputs to dcache
457 d_out.valid <= req;
458 d_out.load <= v.load;
459 d_out.dcbz <= v.dcbz;
460 d_out.nc <= v.nc;
461 d_out.reserve <= v.reserve;
462 d_out.addr <= addr;
463 d_out.data <= v.store_data;
464 d_out.byte_sel <= byte_sel;
465 d_out.virt_mode <= v.virt_mode;
466 d_out.priv_mode <= v.priv_mode;
467
468 -- Update outputs to MMU
469 m_out.valid <= mmureq;
470 m_out.iside <= v.instr_fault;
471 m_out.load <= r.load;
472 m_out.priv <= r.priv_mode;
473 m_out.tlbie <= v.tlbie;
474 m_out.mtspr <= mmu_mtspr;
475 m_out.sprn <= sprn;
476 m_out.addr <= addr;
477 m_out.slbia <= l_in.insn(7);
478 m_out.rs <= l_in.data;
479
480 -- Update outputs to writeback
481 -- Multiplex either cache data to the destination GPR or
482 -- the address for the rA update.
483 l_out.valid <= done;
484 if mfspr = '1' then
485 l_out.write_enable <= '1';
486 l_out.write_reg <= l_in.write_reg;
487 l_out.write_data <= sprval;
488 elsif do_update = '1' then
489 l_out.write_enable <= '1';
490 l_out.write_reg <= r.update_reg;
491 l_out.write_data <= r.addr;
492 else
493 l_out.write_enable <= write_enable;
494 l_out.write_reg <= r.write_reg;
495 l_out.write_data <= data_trimmed;
496 end if;
497 l_out.xerc <= r.xerc;
498 l_out.rc <= r.rc and done;
499 l_out.store_done <= d_in.store_done;
500
501 -- update exception info back to execute1
502 e_out.exception <= exception;
503 e_out.instr_fault <= r.instr_fault;
504 e_out.invalid <= m_in.invalid;
505 e_out.badtree <= m_in.badtree;
506 e_out.perm_error <= m_in.perm_error;
507 e_out.rc_error <= m_in.rc_error;
508 e_out.segment_fault <= m_in.segerr;
509 if exception = '1' and r.instr_fault = '0' then
510 v.dar := addr;
511 if m_in.segerr = '0' then
512 v.dsisr := dsisr;
513 end if;
514 end if;
515
516 stall_out <= stall;
517
518 -- Update registers
519 rin <= v;
520
521 end process;
522
523 ls1_log: process(clk)
524 begin
525 if rising_edge(clk) then
526 log_data <= stall_out &
527 e_out.exception &
528 l_out.valid &
529 m_out.valid &
530 d_out.valid &
531 m_in.done &
532 r.dwords_done &
533 std_ulogic_vector(to_unsigned(state_t'pos(r.state), 3));
534 end if;
535 end process;
536 log_out <= log_data;
537 end;