Plumb attn instruction through to execute1
[microwatt.git] / execute1.vhdl
1 library ieee;
2 use ieee.std_logic_1164.all;
3 use ieee.numeric_std.all;
4
5 library work;
6 use work.decode_types.all;
7 use work.common.all;
8 use work.helpers.all;
9 use work.crhelpers.all;
10 use work.insn_helpers.all;
11 use work.ppc_fx_insns.all;
12
13 entity execute1 is
14 generic (
15 EX1_BYPASS : boolean := true
16 );
17 port (
18 clk : in std_ulogic;
19 rst : in std_ulogic;
20
21 -- asynchronous
22 flush_out : out std_ulogic;
23 stall_out : out std_ulogic;
24
25 e_in : in Decode2ToExecute1Type;
26
27 -- asynchronous
28 l_out : out Execute1ToLoadstore1Type;
29 f_out : out Execute1ToFetch1Type;
30
31 e_out : out Execute1ToWritebackType;
32
33 icache_inval : out std_ulogic;
34 terminate_out : out std_ulogic
35 );
36 end entity execute1;
37
38 architecture behaviour of execute1 is
39 type reg_type is record
40 e : Execute1ToWritebackType;
41 lr_update : std_ulogic;
42 next_lr : std_ulogic_vector(63 downto 0);
43 mul_in_progress : std_ulogic;
44 div_in_progress : std_ulogic;
45 cntz_in_progress : std_ulogic;
46 slow_op_dest : gpr_index_t;
47 slow_op_rc : std_ulogic;
48 slow_op_oe : std_ulogic;
49 slow_op_xerc : xer_common_t;
50 end record;
51
52 signal r, rin : reg_type;
53
54 signal a_in, b_in, c_in : std_ulogic_vector(63 downto 0);
55
56 signal ctrl: ctrl_t := (others => (others => '0'));
57 signal ctrl_tmp: ctrl_t := (others => (others => '0'));
58
59 signal right_shift, rot_clear_left, rot_clear_right: std_ulogic;
60 signal rotator_result: std_ulogic_vector(63 downto 0);
61 signal rotator_carry: std_ulogic;
62 signal logical_result: std_ulogic_vector(63 downto 0);
63 signal countzero_result: std_ulogic_vector(63 downto 0);
64 signal popcnt_result: std_ulogic_vector(63 downto 0);
65 signal parity_result: std_ulogic_vector(63 downto 0);
66
67 -- multiply signals
68 signal x_to_multiply: Execute1ToMultiplyType;
69 signal multiply_to_x: MultiplyToExecute1Type;
70
71 -- divider signals
72 signal x_to_divider: Execute1ToDividerType;
73 signal divider_to_x: DividerToExecute1Type;
74
75 procedure set_carry(e: inout Execute1ToWritebackType;
76 carry32 : in std_ulogic;
77 carry : in std_ulogic) is
78 begin
79 e.xerc.ca32 := carry32;
80 e.xerc.ca := carry;
81 e.write_xerc_enable := '1';
82 end;
83
84 procedure set_ov(e: inout Execute1ToWritebackType;
85 ov : in std_ulogic;
86 ov32 : in std_ulogic) is
87 begin
88 e.xerc.ov32 := ov32;
89 e.xerc.ov := ov;
90 if ov = '1' then
91 e.xerc.so := '1';
92 end if;
93 e.write_xerc_enable := '1';
94 end;
95
96 function calc_ov(msb_a : std_ulogic; msb_b: std_ulogic;
97 ca: std_ulogic; msb_r: std_ulogic) return std_ulogic is
98 begin
99 return (ca xor msb_r) and not (msb_a xor msb_b);
100 end;
101
102 function decode_input_carry(ic : carry_in_t;
103 xerc : xer_common_t) return std_ulogic is
104 begin
105 case ic is
106 when ZERO =>
107 return '0';
108 when CA =>
109 return xerc.ca;
110 when ONE =>
111 return '1';
112 end case;
113 end;
114
115 begin
116
117 rotator_0: entity work.rotator
118 port map (
119 rs => c_in,
120 ra => a_in,
121 shift => b_in(6 downto 0),
122 insn => e_in.insn,
123 is_32bit => e_in.is_32bit,
124 right_shift => right_shift,
125 arith => e_in.is_signed,
126 clear_left => rot_clear_left,
127 clear_right => rot_clear_right,
128 result => rotator_result,
129 carry_out => rotator_carry
130 );
131
132 logical_0: entity work.logical
133 port map (
134 rs => c_in,
135 rb => b_in,
136 op => e_in.insn_type,
137 invert_in => e_in.invert_a,
138 invert_out => e_in.invert_out,
139 result => logical_result,
140 datalen => e_in.data_len,
141 popcnt => popcnt_result,
142 parity => parity_result
143 );
144
145 countzero_0: entity work.zero_counter
146 port map (
147 clk => clk,
148 rs => c_in,
149 count_right => e_in.insn(10),
150 is_32bit => e_in.is_32bit,
151 result => countzero_result
152 );
153
154 multiply_0: entity work.multiply
155 port map (
156 clk => clk,
157 m_in => x_to_multiply,
158 m_out => multiply_to_x
159 );
160
161 divider_0: entity work.divider
162 port map (
163 clk => clk,
164 rst => rst,
165 d_in => x_to_divider,
166 d_out => divider_to_x
167 );
168
169 a_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data1 = '1' else e_in.read_data1;
170 b_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data2 = '1' else e_in.read_data2;
171 c_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data3 = '1' else e_in.read_data3;
172
173 execute1_0: process(clk)
174 begin
175 if rising_edge(clk) then
176 r <= rin;
177 ctrl <= ctrl_tmp;
178 assert not (r.lr_update = '1' and e_in.valid = '1')
179 report "LR update collision with valid in EX1"
180 severity failure;
181 if r.lr_update = '1' then
182 report "LR update to " & to_hstring(r.next_lr);
183 end if;
184 end if;
185 end process;
186
187 execute1_1: process(all)
188 variable v : reg_type;
189 variable a_inv : std_ulogic_vector(63 downto 0);
190 variable result : std_ulogic_vector(63 downto 0);
191 variable newcrf : std_ulogic_vector(3 downto 0);
192 variable result_with_carry : std_ulogic_vector(64 downto 0);
193 variable result_en : std_ulogic;
194 variable crnum : crnum_t;
195 variable crbit : integer range 0 to 31;
196 variable scrnum : crnum_t;
197 variable lo, hi : integer;
198 variable sh, mb, me : std_ulogic_vector(5 downto 0);
199 variable sh32, mb32, me32 : std_ulogic_vector(4 downto 0);
200 variable bo, bi : std_ulogic_vector(4 downto 0);
201 variable bf, bfa : std_ulogic_vector(2 downto 0);
202 variable cr_op : std_ulogic_vector(9 downto 0);
203 variable cr_operands : std_ulogic_vector(1 downto 0);
204 variable bt, ba, bb : std_ulogic_vector(4 downto 0);
205 variable btnum, banum, bbnum : integer range 0 to 31;
206 variable crresult : std_ulogic;
207 variable l : std_ulogic;
208 variable next_nia : std_ulogic_vector(63 downto 0);
209 variable carry_32, carry_64 : std_ulogic;
210 variable sign1, sign2 : std_ulogic;
211 variable abs1, abs2 : signed(63 downto 0);
212 variable overflow : std_ulogic;
213 variable negative : std_ulogic;
214 variable zerohi, zerolo : std_ulogic;
215 variable msb_a, msb_b : std_ulogic;
216 variable a_lt : std_ulogic;
217 variable lv : Execute1ToLoadstore1Type;
218 begin
219 result := (others => '0');
220 result_with_carry := (others => '0');
221 result_en := '0';
222 newcrf := (others => '0');
223
224 v := r;
225 v.e := Execute1ToWritebackInit;
226
227 -- XER forwarding. To avoid having to track XER hazards, we
228 -- use the previously latched value.
229 --
230 -- If the XER was modified by a multiply or a divide, those are
231 -- single issue, we'll get the up to date value from decode2 from
232 -- the register file.
233 --
234 -- If it was modified by an instruction older than the previous
235 -- one in EX1, it will have also hit writeback and will be up
236 -- to date in decode2.
237 --
238 -- That leaves us with the case where it was updated by the previous
239 -- instruction in EX1. In that case, we can forward it back here.
240 --
241 -- This will break if we allow pipelining of multiply and divide,
242 -- but ideally, those should go via EX1 anyway and run as a state
243 -- machine from here.
244 --
245 -- One additional hazard to beware of is an XER:SO modifying instruction
246 -- in EX1 followed immediately by a store conditional. Due to our
247 -- writeback latency, the store will go down the LSU with the previous
248 -- XER value, thus the stcx. will set CR0:SO using an obsolete SO value.
249 --
250 -- We will need to handle that if we ever make stcx. not single issue
251 --
252 -- We always pass a valid XER value downto writeback even when
253 -- we aren't updating it, in order for XER:SO -> CR0:SO transfer
254 -- to work for RC instructions.
255 --
256 if r.e.write_xerc_enable = '1' then
257 v.e.xerc := r.e.xerc;
258 else
259 v.e.xerc := e_in.xerc;
260 end if;
261
262 v.lr_update := '0';
263 v.mul_in_progress := '0';
264 v.div_in_progress := '0';
265 v.cntz_in_progress := '0';
266
267 -- signals to multiply unit
268 x_to_multiply <= Execute1ToMultiplyInit;
269 x_to_multiply.insn_type <= e_in.insn_type;
270 x_to_multiply.is_32bit <= e_in.is_32bit;
271
272 if e_in.is_32bit = '1' then
273 if e_in.is_signed = '1' then
274 x_to_multiply.data1 <= (others => a_in(31));
275 x_to_multiply.data1(31 downto 0) <= a_in(31 downto 0);
276 x_to_multiply.data2 <= (others => b_in(31));
277 x_to_multiply.data2(31 downto 0) <= b_in(31 downto 0);
278 else
279 x_to_multiply.data1 <= '0' & x"00000000" & a_in(31 downto 0);
280 x_to_multiply.data2 <= '0' & x"00000000" & b_in(31 downto 0);
281 end if;
282 else
283 if e_in.is_signed = '1' then
284 x_to_multiply.data1 <= a_in(63) & a_in;
285 x_to_multiply.data2 <= b_in(63) & b_in;
286 else
287 x_to_multiply.data1 <= '0' & a_in;
288 x_to_multiply.data2 <= '0' & b_in;
289 end if;
290 end if;
291
292 -- signals to divide unit
293 sign1 := '0';
294 sign2 := '0';
295 if e_in.is_signed = '1' then
296 if e_in.is_32bit = '1' then
297 sign1 := a_in(31);
298 sign2 := b_in(31);
299 else
300 sign1 := a_in(63);
301 sign2 := b_in(63);
302 end if;
303 end if;
304 -- take absolute values
305 if sign1 = '0' then
306 abs1 := signed(a_in);
307 else
308 abs1 := - signed(a_in);
309 end if;
310 if sign2 = '0' then
311 abs2 := signed(b_in);
312 else
313 abs2 := - signed(b_in);
314 end if;
315
316 x_to_divider <= Execute1ToDividerInit;
317 x_to_divider.is_signed <= e_in.is_signed;
318 x_to_divider.is_32bit <= e_in.is_32bit;
319 if e_in.insn_type = OP_MOD then
320 x_to_divider.is_modulus <= '1';
321 end if;
322 x_to_divider.neg_result <= sign1 xor (sign2 and not x_to_divider.is_modulus);
323 if e_in.is_32bit = '0' then
324 -- 64-bit forms
325 if e_in.insn_type = OP_DIVE then
326 x_to_divider.is_extended <= '1';
327 end if;
328 x_to_divider.dividend <= std_ulogic_vector(abs1);
329 x_to_divider.divisor <= std_ulogic_vector(abs2);
330 else
331 -- 32-bit forms
332 x_to_divider.is_extended <= '0';
333 if e_in.insn_type = OP_DIVE then -- extended forms
334 x_to_divider.dividend <= std_ulogic_vector(abs1(31 downto 0)) & x"00000000";
335 else
336 x_to_divider.dividend <= x"00000000" & std_ulogic_vector(abs1(31 downto 0));
337 end if;
338 x_to_divider.divisor <= x"00000000" & std_ulogic_vector(abs2(31 downto 0));
339 end if;
340
341 ctrl_tmp <= ctrl;
342 -- FIXME: run at 512MHz not core freq
343 ctrl_tmp.tb <= std_ulogic_vector(unsigned(ctrl.tb) + 1);
344
345 terminate_out <= '0';
346 icache_inval <= '0';
347 stall_out <= '0';
348 f_out <= Execute1ToFetch1TypeInit;
349
350 -- Next insn adder used in a couple of places
351 next_nia := std_ulogic_vector(unsigned(e_in.nia) + 4);
352
353 -- rotator control signals
354 right_shift <= '1' when e_in.insn_type = OP_SHR else '0';
355 rot_clear_left <= '1' when e_in.insn_type = OP_RLC or e_in.insn_type = OP_RLCL else '0';
356 rot_clear_right <= '1' when e_in.insn_type = OP_RLC or e_in.insn_type = OP_RLCR else '0';
357
358 if e_in.valid = '1' then
359
360 v.e.valid := '1';
361 v.e.write_reg := e_in.write_reg;
362 v.slow_op_dest := gspr_to_gpr(e_in.write_reg);
363 v.slow_op_rc := e_in.rc;
364 v.slow_op_oe := e_in.oe;
365 v.slow_op_xerc := v.e.xerc;
366
367 case_0: case e_in.insn_type is
368
369 when OP_ILLEGAL =>
370 terminate_out <= '1';
371 report "illegal";
372 when OP_ATTN =>
373 terminate_out <= '1';
374 report "ATTN";
375 when OP_NOP =>
376 -- Do nothing
377 when OP_ADD | OP_CMP =>
378 if e_in.invert_a = '0' then
379 a_inv := a_in;
380 else
381 a_inv := not a_in;
382 end if;
383 result_with_carry := ppc_adde(a_inv, b_in,
384 decode_input_carry(e_in.input_carry, v.e.xerc));
385 result := result_with_carry(63 downto 0);
386 carry_32 := result(32) xor a_inv(32) xor b_in(32);
387 carry_64 := result_with_carry(64);
388 if e_in.insn_type = OP_ADD then
389 if e_in.output_carry = '1' then
390 set_carry(v.e, carry_32, carry_64);
391 end if;
392 if e_in.oe = '1' then
393 set_ov(v.e,
394 calc_ov(a_inv(63), b_in(63), carry_64, result_with_carry(63)),
395 calc_ov(a_inv(31), b_in(31), carry_32, result_with_carry(31)));
396 end if;
397 result_en := '1';
398 else
399 -- CMP and CMPL instructions
400 -- Note, we have done RB - RA, not RA - RB
401 bf := insn_bf(e_in.insn);
402 l := insn_l(e_in.insn);
403 v.e.write_cr_enable := '1';
404 crnum := to_integer(unsigned(bf));
405 v.e.write_cr_mask := num_to_fxm(crnum);
406 zerolo := not (or (a_in(31 downto 0) xor b_in(31 downto 0)));
407 zerohi := not (or (a_in(63 downto 32) xor b_in(63 downto 32)));
408 if zerolo = '1' and (l = '0' or zerohi = '1') then
409 -- values are equal
410 newcrf := "001" & v.e.xerc.so;
411 else
412 if l = '1' then
413 -- 64-bit comparison
414 msb_a := a_in(63);
415 msb_b := b_in(63);
416 else
417 -- 32-bit comparison
418 msb_a := a_in(31);
419 msb_b := b_in(31);
420 end if;
421 if msb_a /= msb_b then
422 -- Subtraction might overflow, but
423 -- comparison is clear from MSB difference.
424 -- for signed, 0 is greater; for unsigned, 1 is greater
425 a_lt := msb_a xnor e_in.is_signed;
426 else
427 -- Subtraction cannot overflow since MSBs are equal.
428 -- carry = 1 indicates RA is smaller (signed or unsigned)
429 a_lt := (not l and carry_32) or (l and carry_64);
430 end if;
431 newcrf := a_lt & not a_lt & '0' & v.e.xerc.so;
432 end if;
433 for i in 0 to 7 loop
434 lo := i*4;
435 hi := lo + 3;
436 v.e.write_cr_data(hi downto lo) := newcrf;
437 end loop;
438 end if;
439 when OP_AND | OP_OR | OP_XOR =>
440 result := logical_result;
441 result_en := '1';
442 when OP_B =>
443 f_out.redirect <= '1';
444 if (insn_aa(e_in.insn)) then
445 f_out.redirect_nia <= std_ulogic_vector(signed(b_in));
446 else
447 f_out.redirect_nia <= std_ulogic_vector(signed(e_in.nia) + signed(b_in));
448 end if;
449 when OP_BC =>
450 -- read_data1 is CTR
451 bo := insn_bo(e_in.insn);
452 bi := insn_bi(e_in.insn);
453 if bo(4-2) = '0' then
454 result := std_ulogic_vector(unsigned(a_in) - 1);
455 result_en := '1';
456 v.e.write_reg := fast_spr_num(SPR_CTR);
457 end if;
458 if ppc_bc_taken(bo, bi, e_in.cr, a_in) = 1 then
459 f_out.redirect <= '1';
460 if (insn_aa(e_in.insn)) then
461 f_out.redirect_nia <= std_ulogic_vector(signed(b_in));
462 else
463 f_out.redirect_nia <= std_ulogic_vector(signed(e_in.nia) + signed(b_in));
464 end if;
465 end if;
466 when OP_BCREG =>
467 -- read_data1 is CTR
468 -- read_data2 is target register (CTR, LR or TAR)
469 bo := insn_bo(e_in.insn);
470 bi := insn_bi(e_in.insn);
471 if bo(4-2) = '0' and e_in.insn(10) = '0' then
472 result := std_ulogic_vector(unsigned(a_in) - 1);
473 result_en := '1';
474 v.e.write_reg := fast_spr_num(SPR_CTR);
475 end if;
476 if ppc_bc_taken(bo, bi, e_in.cr, a_in) = 1 then
477 f_out.redirect <= '1';
478 f_out.redirect_nia <= b_in(63 downto 2) & "00";
479 end if;
480 when OP_CMPB =>
481 result := ppc_cmpb(c_in, b_in);
482 result_en := '1';
483 when OP_CNTZ =>
484 v.e.valid := '0';
485 v.cntz_in_progress := '1';
486 stall_out <= '1';
487 when OP_EXTS =>
488 -- note data_len is a 1-hot encoding
489 negative := (e_in.data_len(0) and c_in(7)) or
490 (e_in.data_len(1) and c_in(15)) or
491 (e_in.data_len(2) and c_in(31));
492 result := (others => negative);
493 if e_in.data_len(2) = '1' then
494 result(31 downto 16) := c_in(31 downto 16);
495 end if;
496 if e_in.data_len(2) = '1' or e_in.data_len(1) = '1' then
497 result(15 downto 8) := c_in(15 downto 8);
498 end if;
499 result(7 downto 0) := c_in(7 downto 0);
500 result_en := '1';
501 when OP_ISEL =>
502 crbit := to_integer(unsigned(insn_bc(e_in.insn)));
503 if e_in.cr(31-crbit) = '1' then
504 result := a_in;
505 else
506 result := b_in;
507 end if;
508 result_en := '1';
509 when OP_MCRF =>
510 cr_op := insn_cr(e_in.insn);
511 report "CR OP " & to_hstring(cr_op);
512 if cr_op(0) = '0' then -- MCRF
513 bf := insn_bf(e_in.insn);
514 bfa := insn_bfa(e_in.insn);
515 v.e.write_cr_enable := '1';
516 crnum := to_integer(unsigned(bf));
517 scrnum := to_integer(unsigned(bfa));
518 v.e.write_cr_mask := num_to_fxm(crnum);
519 for i in 0 to 7 loop
520 lo := (7-i)*4;
521 hi := lo + 3;
522 if i = scrnum then
523 newcrf := e_in.cr(hi downto lo);
524 end if;
525 end loop;
526 for i in 0 to 7 loop
527 lo := i*4;
528 hi := lo + 3;
529 v.e.write_cr_data(hi downto lo) := newcrf;
530 end loop;
531 else
532 v.e.write_cr_enable := '1';
533 bt := insn_bt(e_in.insn);
534 ba := insn_ba(e_in.insn);
535 bb := insn_bb(e_in.insn);
536 btnum := 31 - to_integer(unsigned(bt));
537 banum := 31 - to_integer(unsigned(ba));
538 bbnum := 31 - to_integer(unsigned(bb));
539 -- Bits 5-8 of cr_op give the truth table of the requested
540 -- logical operation
541 cr_operands := e_in.cr(banum) & e_in.cr(bbnum);
542 crresult := cr_op(5 + to_integer(unsigned(cr_operands)));
543 v.e.write_cr_mask := num_to_fxm((31-btnum) / 4);
544 for i in 0 to 31 loop
545 if i = btnum then
546 v.e.write_cr_data(i) := crresult;
547 else
548 v.e.write_cr_data(i) := e_in.cr(i);
549 end if;
550 end loop;
551 end if;
552 when OP_MFSPR =>
553 if is_fast_spr(e_in.read_reg1) then
554 result := a_in;
555 if decode_spr_num(e_in.insn) = SPR_XER then
556 -- bits 0:31 and 35:43 are treated as reserved and return 0s when read using mfxer
557 result(63 downto 32) := (others => '0');
558 result(63-32) := v.e.xerc.so;
559 result(63-33) := v.e.xerc.ov;
560 result(63-34) := v.e.xerc.ca;
561 result(63-35 downto 63-43) := "000000000";
562 result(63-44) := v.e.xerc.ov32;
563 result(63-45) := v.e.xerc.ca32;
564 end if;
565 else
566 case decode_spr_num(e_in.insn) is
567 when SPR_TB =>
568 result := ctrl.tb;
569 when others =>
570 result := (others => '0');
571 end case;
572 end if;
573 result_en := '1';
574 when OP_MFCR =>
575 if e_in.insn(20) = '0' then
576 -- mfcr
577 result := x"00000000" & e_in.cr;
578 else
579 -- mfocrf
580 crnum := fxm_to_num(insn_fxm(e_in.insn));
581 result := (others => '0');
582 for i in 0 to 7 loop
583 lo := (7-i)*4;
584 hi := lo + 3;
585 if crnum = i then
586 result(hi downto lo) := e_in.cr(hi downto lo);
587 end if;
588 end loop;
589 end if;
590 result_en := '1';
591 when OP_MTCRF =>
592 v.e.write_cr_enable := '1';
593 if e_in.insn(20) = '0' then
594 -- mtcrf
595 v.e.write_cr_mask := insn_fxm(e_in.insn);
596 else
597 -- mtocrf: We require one hot priority encoding here
598 crnum := fxm_to_num(insn_fxm(e_in.insn));
599 v.e.write_cr_mask := num_to_fxm(crnum);
600 end if;
601 v.e.write_cr_data := c_in(31 downto 0);
602 when OP_MTSPR =>
603 report "MTSPR to SPR " & integer'image(decode_spr_num(e_in.insn)) &
604 "=" & to_hstring(c_in);
605 if is_fast_spr(e_in.write_reg) then
606 result := c_in;
607 result_en := '1';
608 if decode_spr_num(e_in.insn) = SPR_XER then
609 v.e.xerc.so := c_in(63-32);
610 v.e.xerc.ov := c_in(63-33);
611 v.e.xerc.ca := c_in(63-34);
612 v.e.xerc.ov32 := c_in(63-44);
613 v.e.xerc.ca32 := c_in(63-45);
614 v.e.write_xerc_enable := '1';
615 end if;
616 else
617 -- TODO: Implement slow SPRs
618 -- case decode_spr_num(e_in.insn) is
619 -- when others =>
620 -- end case;
621 end if;
622 when OP_POPCNT =>
623 result := popcnt_result;
624 result_en := '1';
625 when OP_PRTY =>
626 result := parity_result;
627 result_en := '1';
628 when OP_RLC | OP_RLCL | OP_RLCR | OP_SHL | OP_SHR =>
629 result := rotator_result;
630 if e_in.output_carry = '1' then
631 set_carry(v.e, rotator_carry, rotator_carry);
632 end if;
633 result_en := '1';
634 when OP_SIM_CONFIG =>
635 -- bit 0 was used to select the microwatt console, which
636 -- we no longer support.
637 result := x"0000000000000000";
638 result_en := '1';
639
640 when OP_TDI =>
641 -- Keep our test cases happy for now, ignore trap instructions
642 report "OP_TDI FIXME";
643
644 when OP_ISYNC =>
645 f_out.redirect <= '1';
646 f_out.redirect_nia <= next_nia;
647
648 when OP_ICBI =>
649 icache_inval <= '1';
650
651 when OP_MUL_L64 | OP_MUL_H64 | OP_MUL_H32 =>
652 v.e.valid := '0';
653 v.mul_in_progress := '1';
654 stall_out <= '1';
655 x_to_multiply.valid <= '1';
656
657 when OP_DIV | OP_DIVE | OP_MOD =>
658 v.e.valid := '0';
659 v.div_in_progress := '1';
660 stall_out <= '1';
661 x_to_divider.valid <= '1';
662
663 when OP_LOAD | OP_STORE =>
664 -- loadstore/dcache has its own port to writeback
665 v.e.valid := '0';
666
667 when others =>
668 terminate_out <= '1';
669 report "illegal";
670 end case;
671
672 v.e.rc := e_in.rc and e_in.valid;
673
674 -- Update LR on the next cycle after a branch link
675 --
676 -- WARNING: The LR update isn't tracked by our hazard tracker. This
677 -- will work (well I hope) because it only happens on branches
678 -- which will flush all decoded instructions. By the time
679 -- fetch catches up, we'll have the new LR. This will
680 -- *not* work properly however if we have a branch predictor,
681 -- in which case the solution would probably be to keep a
682 -- local cache of the updated LR in execute1 (flushed on
683 -- exceptions) that is used instead of the value from
684 -- decode when its content is valid.
685 if e_in.lr = '1' then
686 v.lr_update := '1';
687 v.next_lr := next_nia;
688 v.e.valid := '0';
689 report "Delayed LR update to " & to_hstring(next_nia);
690 stall_out <= '1';
691 end if;
692 elsif r.lr_update = '1' then
693 result_en := '1';
694 result := r.next_lr;
695 v.e.write_reg := fast_spr_num(SPR_LR);
696 v.e.valid := '1';
697 elsif r.cntz_in_progress = '1' then
698 -- cnt[lt]z always takes two cycles
699 result := countzero_result;
700 result_en := '1';
701 v.e.write_reg := gpr_to_gspr(v.slow_op_dest);
702 v.e.rc := v.slow_op_rc;
703 v.e.xerc := v.slow_op_xerc;
704 v.e.valid := '1';
705 elsif r.mul_in_progress = '1' or r.div_in_progress = '1' then
706 if (r.mul_in_progress = '1' and multiply_to_x.valid = '1') or
707 (r.div_in_progress = '1' and divider_to_x.valid = '1') then
708 if r.mul_in_progress = '1' then
709 result := multiply_to_x.write_reg_data;
710 overflow := multiply_to_x.overflow;
711 else
712 result := divider_to_x.write_reg_data;
713 overflow := divider_to_x.overflow;
714 end if;
715 result_en := '1';
716 v.e.write_reg := gpr_to_gspr(v.slow_op_dest);
717 v.e.rc := v.slow_op_rc;
718 v.e.xerc := v.slow_op_xerc;
719 v.e.write_xerc_enable := v.slow_op_oe;
720 -- We must test oe because the RC update code in writeback
721 -- will use the xerc value to set CR0:SO so we must not clobber
722 -- xerc if OE wasn't set.
723 if v.slow_op_oe = '1' then
724 v.e.xerc.ov := overflow;
725 v.e.xerc.ov32 := overflow;
726 v.e.xerc.so := v.slow_op_xerc.so or overflow;
727 end if;
728 v.e.valid := '1';
729 else
730 stall_out <= '1';
731 v.mul_in_progress := r.mul_in_progress;
732 v.div_in_progress := r.div_in_progress;
733 end if;
734 end if;
735
736 v.e.write_data := result;
737 v.e.write_enable := result_en;
738
739 -- Outputs to loadstore1 (async)
740 lv := Execute1ToLoadstore1Init;
741 if e_in.valid = '1' and (e_in.insn_type = OP_LOAD or e_in.insn_type = OP_STORE) then
742 lv.valid := '1';
743 end if;
744 if e_in.insn_type = OP_LOAD then
745 lv.load := '1';
746 end if;
747 lv.addr1 := a_in;
748 lv.addr2 := b_in;
749 lv.data := c_in;
750 lv.write_reg := gspr_to_gpr(e_in.write_reg);
751 lv.length := e_in.data_len;
752 lv.byte_reverse := e_in.byte_reverse;
753 lv.sign_extend := e_in.sign_extend;
754 lv.update := e_in.update;
755 lv.update_reg := gspr_to_gpr(e_in.read_reg1);
756 lv.xerc := v.e.xerc;
757 lv.reserve := e_in.reserve;
758 lv.rc := e_in.rc;
759 -- decode l*cix and st*cix instructions here
760 if e_in.insn(31 downto 26) = "011111" and e_in.insn(10 downto 9) = "11" and
761 e_in.insn(5 downto 1) = "10101" then
762 lv.ci := '1';
763 end if;
764
765 -- Update registers
766 rin <= v;
767
768 -- update outputs
769 --f_out <= r.f;
770 l_out <= lv;
771 e_out <= r.e;
772 flush_out <= f_out.redirect;
773 end process;
774 end architecture behaviour;