2 use ieee.std_logic_1164.all;
3 use ieee.numeric_std.all;
6 use work.decode_types.all;
9 use work.crhelpers.all;
10 use work.insn_helpers.all;
11 use work.ppc_fx_insns.all;
15 EX1_BYPASS : boolean := true
22 flush_out : out std_ulogic;
23 stall_out : out std_ulogic;
25 e_in : in Decode2ToExecute1Type;
28 l_out : out Execute1ToLoadstore1Type;
29 f_out : out Execute1ToFetch1Type;
31 e_out : out Execute1ToWritebackType;
33 icache_inval : out std_ulogic;
34 terminate_out : out std_ulogic
38 architecture behaviour of execute1 is
39 type reg_type is record
40 e : Execute1ToWritebackType;
41 lr_update : std_ulogic;
42 next_lr : std_ulogic_vector(63 downto 0);
43 mul_in_progress : std_ulogic;
44 div_in_progress : std_ulogic;
45 cntz_in_progress : std_ulogic;
46 slow_op_dest : gpr_index_t;
47 slow_op_rc : std_ulogic;
48 slow_op_oe : std_ulogic;
49 slow_op_xerc : xer_common_t;
52 signal r, rin : reg_type;
54 signal a_in, b_in, c_in : std_ulogic_vector(63 downto 0);
56 signal ctrl: ctrl_t := (others => (others => '0'));
57 signal ctrl_tmp: ctrl_t := (others => (others => '0'));
59 signal right_shift, rot_clear_left, rot_clear_right: std_ulogic;
60 signal rotator_result: std_ulogic_vector(63 downto 0);
61 signal rotator_carry: std_ulogic;
62 signal logical_result: std_ulogic_vector(63 downto 0);
63 signal countzero_result: std_ulogic_vector(63 downto 0);
64 signal popcnt_result: std_ulogic_vector(63 downto 0);
65 signal parity_result: std_ulogic_vector(63 downto 0);
68 signal x_to_multiply: Execute1ToMultiplyType;
69 signal multiply_to_x: MultiplyToExecute1Type;
72 signal x_to_divider: Execute1ToDividerType;
73 signal divider_to_x: DividerToExecute1Type;
75 procedure set_carry(e: inout Execute1ToWritebackType;
76 carry32 : in std_ulogic;
77 carry : in std_ulogic) is
79 e.xerc.ca32 := carry32;
81 e.write_xerc_enable := '1';
84 procedure set_ov(e: inout Execute1ToWritebackType;
86 ov32 : in std_ulogic) is
93 e.write_xerc_enable := '1';
96 function calc_ov(msb_a : std_ulogic; msb_b: std_ulogic;
97 ca: std_ulogic; msb_r: std_ulogic) return std_ulogic is
99 return (ca xor msb_r) and not (msb_a xor msb_b);
102 function decode_input_carry(ic : carry_in_t;
103 xerc : xer_common_t) return std_ulogic is
117 rotator_0: entity work.rotator
121 shift => b_in(6 downto 0),
123 is_32bit => e_in.is_32bit,
124 right_shift => right_shift,
125 arith => e_in.is_signed,
126 clear_left => rot_clear_left,
127 clear_right => rot_clear_right,
128 result => rotator_result,
129 carry_out => rotator_carry
132 logical_0: entity work.logical
136 op => e_in.insn_type,
137 invert_in => e_in.invert_a,
138 invert_out => e_in.invert_out,
139 result => logical_result,
140 datalen => e_in.data_len,
141 popcnt => popcnt_result,
142 parity => parity_result
145 countzero_0: entity work.zero_counter
149 count_right => e_in.insn(10),
150 is_32bit => e_in.is_32bit,
151 result => countzero_result
154 multiply_0: entity work.multiply
157 m_in => x_to_multiply,
158 m_out => multiply_to_x
161 divider_0: entity work.divider
165 d_in => x_to_divider,
166 d_out => divider_to_x
169 a_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data1 = '1' else e_in.read_data1;
170 b_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data2 = '1' else e_in.read_data2;
171 c_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data3 = '1' else e_in.read_data3;
173 execute1_0: process(clk)
175 if rising_edge(clk) then
178 assert not (r.lr_update = '1' and e_in.valid = '1')
179 report "LR update collision with valid in EX1"
181 if r.lr_update = '1' then
182 report "LR update to " & to_hstring(r.next_lr);
187 execute1_1: process(all)
188 variable v : reg_type;
189 variable a_inv : std_ulogic_vector(63 downto 0);
190 variable result : std_ulogic_vector(63 downto 0);
191 variable newcrf : std_ulogic_vector(3 downto 0);
192 variable result_with_carry : std_ulogic_vector(64 downto 0);
193 variable result_en : std_ulogic;
194 variable crnum : crnum_t;
195 variable crbit : integer range 0 to 31;
196 variable scrnum : crnum_t;
197 variable lo, hi : integer;
198 variable sh, mb, me : std_ulogic_vector(5 downto 0);
199 variable sh32, mb32, me32 : std_ulogic_vector(4 downto 0);
200 variable bo, bi : std_ulogic_vector(4 downto 0);
201 variable bf, bfa : std_ulogic_vector(2 downto 0);
202 variable cr_op : std_ulogic_vector(9 downto 0);
203 variable cr_operands : std_ulogic_vector(1 downto 0);
204 variable bt, ba, bb : std_ulogic_vector(4 downto 0);
205 variable btnum, banum, bbnum : integer range 0 to 31;
206 variable crresult : std_ulogic;
207 variable l : std_ulogic;
208 variable next_nia : std_ulogic_vector(63 downto 0);
209 variable carry_32, carry_64 : std_ulogic;
210 variable sign1, sign2 : std_ulogic;
211 variable abs1, abs2 : signed(63 downto 0);
212 variable overflow : std_ulogic;
213 variable negative : std_ulogic;
214 variable zerohi, zerolo : std_ulogic;
215 variable msb_a, msb_b : std_ulogic;
216 variable a_lt : std_ulogic;
217 variable lv : Execute1ToLoadstore1Type;
219 result := (others => '0');
220 result_with_carry := (others => '0');
222 newcrf := (others => '0');
225 v.e := Execute1ToWritebackInit;
227 -- XER forwarding. To avoid having to track XER hazards, we
228 -- use the previously latched value.
230 -- If the XER was modified by a multiply or a divide, those are
231 -- single issue, we'll get the up to date value from decode2 from
232 -- the register file.
234 -- If it was modified by an instruction older than the previous
235 -- one in EX1, it will have also hit writeback and will be up
236 -- to date in decode2.
238 -- That leaves us with the case where it was updated by the previous
239 -- instruction in EX1. In that case, we can forward it back here.
241 -- This will break if we allow pipelining of multiply and divide,
242 -- but ideally, those should go via EX1 anyway and run as a state
243 -- machine from here.
245 -- One additional hazard to beware of is an XER:SO modifying instruction
246 -- in EX1 followed immediately by a store conditional. Due to our
247 -- writeback latency, the store will go down the LSU with the previous
248 -- XER value, thus the stcx. will set CR0:SO using an obsolete SO value.
250 -- We will need to handle that if we ever make stcx. not single issue
252 -- We always pass a valid XER value downto writeback even when
253 -- we aren't updating it, in order for XER:SO -> CR0:SO transfer
254 -- to work for RC instructions.
256 if r.e.write_xerc_enable = '1' then
257 v.e.xerc := r.e.xerc;
259 v.e.xerc := e_in.xerc;
263 v.mul_in_progress := '0';
264 v.div_in_progress := '0';
265 v.cntz_in_progress := '0';
267 -- signals to multiply unit
268 x_to_multiply <= Execute1ToMultiplyInit;
269 x_to_multiply.insn_type <= e_in.insn_type;
270 x_to_multiply.is_32bit <= e_in.is_32bit;
272 if e_in.is_32bit = '1' then
273 if e_in.is_signed = '1' then
274 x_to_multiply.data1 <= (others => a_in(31));
275 x_to_multiply.data1(31 downto 0) <= a_in(31 downto 0);
276 x_to_multiply.data2 <= (others => b_in(31));
277 x_to_multiply.data2(31 downto 0) <= b_in(31 downto 0);
279 x_to_multiply.data1 <= '0' & x"00000000" & a_in(31 downto 0);
280 x_to_multiply.data2 <= '0' & x"00000000" & b_in(31 downto 0);
283 if e_in.is_signed = '1' then
284 x_to_multiply.data1 <= a_in(63) & a_in;
285 x_to_multiply.data2 <= b_in(63) & b_in;
287 x_to_multiply.data1 <= '0' & a_in;
288 x_to_multiply.data2 <= '0' & b_in;
292 -- signals to divide unit
295 if e_in.is_signed = '1' then
296 if e_in.is_32bit = '1' then
304 -- take absolute values
306 abs1 := signed(a_in);
308 abs1 := - signed(a_in);
311 abs2 := signed(b_in);
313 abs2 := - signed(b_in);
316 x_to_divider <= Execute1ToDividerInit;
317 x_to_divider.is_signed <= e_in.is_signed;
318 x_to_divider.is_32bit <= e_in.is_32bit;
319 if e_in.insn_type = OP_MOD then
320 x_to_divider.is_modulus <= '1';
322 x_to_divider.neg_result <= sign1 xor (sign2 and not x_to_divider.is_modulus);
323 if e_in.is_32bit = '0' then
325 if e_in.insn_type = OP_DIVE then
326 x_to_divider.is_extended <= '1';
328 x_to_divider.dividend <= std_ulogic_vector(abs1);
329 x_to_divider.divisor <= std_ulogic_vector(abs2);
332 x_to_divider.is_extended <= '0';
333 if e_in.insn_type = OP_DIVE then -- extended forms
334 x_to_divider.dividend <= std_ulogic_vector(abs1(31 downto 0)) & x"00000000";
336 x_to_divider.dividend <= x"00000000" & std_ulogic_vector(abs1(31 downto 0));
338 x_to_divider.divisor <= x"00000000" & std_ulogic_vector(abs2(31 downto 0));
342 -- FIXME: run at 512MHz not core freq
343 ctrl_tmp.tb <= std_ulogic_vector(unsigned(ctrl.tb) + 1);
345 terminate_out <= '0';
348 f_out <= Execute1ToFetch1TypeInit;
350 -- Next insn adder used in a couple of places
351 next_nia := std_ulogic_vector(unsigned(e_in.nia) + 4);
353 -- rotator control signals
354 right_shift <= '1' when e_in.insn_type = OP_SHR else '0';
355 rot_clear_left <= '1' when e_in.insn_type = OP_RLC or e_in.insn_type = OP_RLCL else '0';
356 rot_clear_right <= '1' when e_in.insn_type = OP_RLC or e_in.insn_type = OP_RLCR else '0';
358 if e_in.valid = '1' then
361 v.e.write_reg := e_in.write_reg;
362 v.slow_op_dest := gspr_to_gpr(e_in.write_reg);
363 v.slow_op_rc := e_in.rc;
364 v.slow_op_oe := e_in.oe;
365 v.slow_op_xerc := v.e.xerc;
367 case_0: case e_in.insn_type is
370 terminate_out <= '1';
373 terminate_out <= '1';
377 when OP_ADD | OP_CMP =>
378 if e_in.invert_a = '0' then
383 result_with_carry := ppc_adde(a_inv, b_in,
384 decode_input_carry(e_in.input_carry, v.e.xerc));
385 result := result_with_carry(63 downto 0);
386 carry_32 := result(32) xor a_inv(32) xor b_in(32);
387 carry_64 := result_with_carry(64);
388 if e_in.insn_type = OP_ADD then
389 if e_in.output_carry = '1' then
390 set_carry(v.e, carry_32, carry_64);
392 if e_in.oe = '1' then
394 calc_ov(a_inv(63), b_in(63), carry_64, result_with_carry(63)),
395 calc_ov(a_inv(31), b_in(31), carry_32, result_with_carry(31)));
399 -- CMP and CMPL instructions
400 -- Note, we have done RB - RA, not RA - RB
401 bf := insn_bf(e_in.insn);
402 l := insn_l(e_in.insn);
403 v.e.write_cr_enable := '1';
404 crnum := to_integer(unsigned(bf));
405 v.e.write_cr_mask := num_to_fxm(crnum);
406 zerolo := not (or (a_in(31 downto 0) xor b_in(31 downto 0)));
407 zerohi := not (or (a_in(63 downto 32) xor b_in(63 downto 32)));
408 if zerolo = '1' and (l = '0' or zerohi = '1') then
410 newcrf := "001" & v.e.xerc.so;
421 if msb_a /= msb_b then
422 -- Subtraction might overflow, but
423 -- comparison is clear from MSB difference.
424 -- for signed, 0 is greater; for unsigned, 1 is greater
425 a_lt := msb_a xnor e_in.is_signed;
427 -- Subtraction cannot overflow since MSBs are equal.
428 -- carry = 1 indicates RA is smaller (signed or unsigned)
429 a_lt := (not l and carry_32) or (l and carry_64);
431 newcrf := a_lt & not a_lt & '0' & v.e.xerc.so;
436 v.e.write_cr_data(hi downto lo) := newcrf;
439 when OP_AND | OP_OR | OP_XOR =>
440 result := logical_result;
443 f_out.redirect <= '1';
444 if (insn_aa(e_in.insn)) then
445 f_out.redirect_nia <= std_ulogic_vector(signed(b_in));
447 f_out.redirect_nia <= std_ulogic_vector(signed(e_in.nia) + signed(b_in));
451 bo := insn_bo(e_in.insn);
452 bi := insn_bi(e_in.insn);
453 if bo(4-2) = '0' then
454 result := std_ulogic_vector(unsigned(a_in) - 1);
456 v.e.write_reg := fast_spr_num(SPR_CTR);
458 if ppc_bc_taken(bo, bi, e_in.cr, a_in) = 1 then
459 f_out.redirect <= '1';
460 if (insn_aa(e_in.insn)) then
461 f_out.redirect_nia <= std_ulogic_vector(signed(b_in));
463 f_out.redirect_nia <= std_ulogic_vector(signed(e_in.nia) + signed(b_in));
468 -- read_data2 is target register (CTR, LR or TAR)
469 bo := insn_bo(e_in.insn);
470 bi := insn_bi(e_in.insn);
471 if bo(4-2) = '0' and e_in.insn(10) = '0' then
472 result := std_ulogic_vector(unsigned(a_in) - 1);
474 v.e.write_reg := fast_spr_num(SPR_CTR);
476 if ppc_bc_taken(bo, bi, e_in.cr, a_in) = 1 then
477 f_out.redirect <= '1';
478 f_out.redirect_nia <= b_in(63 downto 2) & "00";
481 result := ppc_cmpb(c_in, b_in);
485 v.cntz_in_progress := '1';
488 -- note data_len is a 1-hot encoding
489 negative := (e_in.data_len(0) and c_in(7)) or
490 (e_in.data_len(1) and c_in(15)) or
491 (e_in.data_len(2) and c_in(31));
492 result := (others => negative);
493 if e_in.data_len(2) = '1' then
494 result(31 downto 16) := c_in(31 downto 16);
496 if e_in.data_len(2) = '1' or e_in.data_len(1) = '1' then
497 result(15 downto 8) := c_in(15 downto 8);
499 result(7 downto 0) := c_in(7 downto 0);
502 crbit := to_integer(unsigned(insn_bc(e_in.insn)));
503 if e_in.cr(31-crbit) = '1' then
510 cr_op := insn_cr(e_in.insn);
511 report "CR OP " & to_hstring(cr_op);
512 if cr_op(0) = '0' then -- MCRF
513 bf := insn_bf(e_in.insn);
514 bfa := insn_bfa(e_in.insn);
515 v.e.write_cr_enable := '1';
516 crnum := to_integer(unsigned(bf));
517 scrnum := to_integer(unsigned(bfa));
518 v.e.write_cr_mask := num_to_fxm(crnum);
523 newcrf := e_in.cr(hi downto lo);
529 v.e.write_cr_data(hi downto lo) := newcrf;
532 v.e.write_cr_enable := '1';
533 bt := insn_bt(e_in.insn);
534 ba := insn_ba(e_in.insn);
535 bb := insn_bb(e_in.insn);
536 btnum := 31 - to_integer(unsigned(bt));
537 banum := 31 - to_integer(unsigned(ba));
538 bbnum := 31 - to_integer(unsigned(bb));
539 -- Bits 5-8 of cr_op give the truth table of the requested
541 cr_operands := e_in.cr(banum) & e_in.cr(bbnum);
542 crresult := cr_op(5 + to_integer(unsigned(cr_operands)));
543 v.e.write_cr_mask := num_to_fxm((31-btnum) / 4);
544 for i in 0 to 31 loop
546 v.e.write_cr_data(i) := crresult;
548 v.e.write_cr_data(i) := e_in.cr(i);
553 if is_fast_spr(e_in.read_reg1) then
555 if decode_spr_num(e_in.insn) = SPR_XER then
556 -- bits 0:31 and 35:43 are treated as reserved and return 0s when read using mfxer
557 result(63 downto 32) := (others => '0');
558 result(63-32) := v.e.xerc.so;
559 result(63-33) := v.e.xerc.ov;
560 result(63-34) := v.e.xerc.ca;
561 result(63-35 downto 63-43) := "000000000";
562 result(63-44) := v.e.xerc.ov32;
563 result(63-45) := v.e.xerc.ca32;
566 case decode_spr_num(e_in.insn) is
570 result := (others => '0');
575 if e_in.insn(20) = '0' then
577 result := x"00000000" & e_in.cr;
580 crnum := fxm_to_num(insn_fxm(e_in.insn));
581 result := (others => '0');
586 result(hi downto lo) := e_in.cr(hi downto lo);
592 v.e.write_cr_enable := '1';
593 if e_in.insn(20) = '0' then
595 v.e.write_cr_mask := insn_fxm(e_in.insn);
597 -- mtocrf: We require one hot priority encoding here
598 crnum := fxm_to_num(insn_fxm(e_in.insn));
599 v.e.write_cr_mask := num_to_fxm(crnum);
601 v.e.write_cr_data := c_in(31 downto 0);
603 report "MTSPR to SPR " & integer'image(decode_spr_num(e_in.insn)) &
604 "=" & to_hstring(c_in);
605 if is_fast_spr(e_in.write_reg) then
608 if decode_spr_num(e_in.insn) = SPR_XER then
609 v.e.xerc.so := c_in(63-32);
610 v.e.xerc.ov := c_in(63-33);
611 v.e.xerc.ca := c_in(63-34);
612 v.e.xerc.ov32 := c_in(63-44);
613 v.e.xerc.ca32 := c_in(63-45);
614 v.e.write_xerc_enable := '1';
617 -- TODO: Implement slow SPRs
618 -- case decode_spr_num(e_in.insn) is
623 result := popcnt_result;
626 result := parity_result;
628 when OP_RLC | OP_RLCL | OP_RLCR | OP_SHL | OP_SHR =>
629 result := rotator_result;
630 if e_in.output_carry = '1' then
631 set_carry(v.e, rotator_carry, rotator_carry);
634 when OP_SIM_CONFIG =>
635 -- bit 0 was used to select the microwatt console, which
636 -- we no longer support.
637 result := x"0000000000000000";
641 -- Keep our test cases happy for now, ignore trap instructions
642 report "OP_TDI FIXME";
645 f_out.redirect <= '1';
646 f_out.redirect_nia <= next_nia;
651 when OP_MUL_L64 | OP_MUL_H64 | OP_MUL_H32 =>
653 v.mul_in_progress := '1';
655 x_to_multiply.valid <= '1';
657 when OP_DIV | OP_DIVE | OP_MOD =>
659 v.div_in_progress := '1';
661 x_to_divider.valid <= '1';
663 when OP_LOAD | OP_STORE =>
664 -- loadstore/dcache has its own port to writeback
668 terminate_out <= '1';
672 v.e.rc := e_in.rc and e_in.valid;
674 -- Update LR on the next cycle after a branch link
676 -- WARNING: The LR update isn't tracked by our hazard tracker. This
677 -- will work (well I hope) because it only happens on branches
678 -- which will flush all decoded instructions. By the time
679 -- fetch catches up, we'll have the new LR. This will
680 -- *not* work properly however if we have a branch predictor,
681 -- in which case the solution would probably be to keep a
682 -- local cache of the updated LR in execute1 (flushed on
683 -- exceptions) that is used instead of the value from
684 -- decode when its content is valid.
685 if e_in.lr = '1' then
687 v.next_lr := next_nia;
689 report "Delayed LR update to " & to_hstring(next_nia);
692 elsif r.lr_update = '1' then
695 v.e.write_reg := fast_spr_num(SPR_LR);
697 elsif r.cntz_in_progress = '1' then
698 -- cnt[lt]z always takes two cycles
699 result := countzero_result;
701 v.e.write_reg := gpr_to_gspr(v.slow_op_dest);
702 v.e.rc := v.slow_op_rc;
703 v.e.xerc := v.slow_op_xerc;
705 elsif r.mul_in_progress = '1' or r.div_in_progress = '1' then
706 if (r.mul_in_progress = '1' and multiply_to_x.valid = '1') or
707 (r.div_in_progress = '1' and divider_to_x.valid = '1') then
708 if r.mul_in_progress = '1' then
709 result := multiply_to_x.write_reg_data;
710 overflow := multiply_to_x.overflow;
712 result := divider_to_x.write_reg_data;
713 overflow := divider_to_x.overflow;
716 v.e.write_reg := gpr_to_gspr(v.slow_op_dest);
717 v.e.rc := v.slow_op_rc;
718 v.e.xerc := v.slow_op_xerc;
719 v.e.write_xerc_enable := v.slow_op_oe;
720 -- We must test oe because the RC update code in writeback
721 -- will use the xerc value to set CR0:SO so we must not clobber
722 -- xerc if OE wasn't set.
723 if v.slow_op_oe = '1' then
724 v.e.xerc.ov := overflow;
725 v.e.xerc.ov32 := overflow;
726 v.e.xerc.so := v.slow_op_xerc.so or overflow;
731 v.mul_in_progress := r.mul_in_progress;
732 v.div_in_progress := r.div_in_progress;
736 v.e.write_data := result;
737 v.e.write_enable := result_en;
739 -- Outputs to loadstore1 (async)
740 lv := Execute1ToLoadstore1Init;
741 if e_in.valid = '1' and (e_in.insn_type = OP_LOAD or e_in.insn_type = OP_STORE) then
744 if e_in.insn_type = OP_LOAD then
750 lv.write_reg := gspr_to_gpr(e_in.write_reg);
751 lv.length := e_in.data_len;
752 lv.byte_reverse := e_in.byte_reverse;
753 lv.sign_extend := e_in.sign_extend;
754 lv.update := e_in.update;
755 lv.update_reg := gspr_to_gpr(e_in.read_reg1);
757 lv.reserve := e_in.reserve;
759 -- decode l*cix and st*cix instructions here
760 if e_in.insn(31 downto 26) = "011111" and e_in.insn(10 downto 9) = "11" and
761 e_in.insn(5 downto 1) = "10101" then
772 flush_out <= f_out.redirect;
774 end architecture behaviour;