2 use ieee.std_logic_1164.all;
3 use ieee.numeric_std.all;
6 use work.decode_types.all;
9 use work.crhelpers.all;
10 use work.insn_helpers.all;
11 use work.ppc_fx_insns.all;
15 EX1_BYPASS : boolean := true
22 flush_out : out std_ulogic;
23 stall_out : out std_ulogic;
25 e_in : in Decode2ToExecute1Type;
28 l_out : out Execute1ToLoadstore1Type;
29 f_out : out Execute1ToFetch1Type;
31 e_out : out Execute1ToWritebackType;
33 icache_inval : out std_ulogic;
34 terminate_out : out std_ulogic
38 architecture behaviour of execute1 is
39 type reg_type is record
40 e : Execute1ToWritebackType;
41 lr_update : std_ulogic;
42 next_lr : std_ulogic_vector(63 downto 0);
43 mul_in_progress : std_ulogic;
44 div_in_progress : std_ulogic;
45 cntz_in_progress : std_ulogic;
46 slow_op_dest : gpr_index_t;
47 slow_op_rc : std_ulogic;
48 slow_op_oe : std_ulogic;
49 slow_op_xerc : xer_common_t;
52 signal r, rin : reg_type;
54 signal a_in, b_in, c_in : std_ulogic_vector(63 downto 0);
56 signal ctrl: ctrl_t := (irq_state => WRITE_SRR0, others => (others => '0'));
57 signal ctrl_tmp: ctrl_t := (irq_state => WRITE_SRR0, others => (others => '0'));
58 signal right_shift, rot_clear_left, rot_clear_right: std_ulogic;
59 signal rotator_result: std_ulogic_vector(63 downto 0);
60 signal rotator_carry: std_ulogic;
61 signal logical_result: std_ulogic_vector(63 downto 0);
62 signal countzero_result: std_ulogic_vector(63 downto 0);
63 signal popcnt_result: std_ulogic_vector(63 downto 0);
64 signal parity_result: std_ulogic_vector(63 downto 0);
67 signal x_to_multiply: Execute1ToMultiplyType;
68 signal multiply_to_x: MultiplyToExecute1Type;
71 signal x_to_divider: Execute1ToDividerType;
72 signal divider_to_x: DividerToExecute1Type;
74 procedure set_carry(e: inout Execute1ToWritebackType;
75 carry32 : in std_ulogic;
76 carry : in std_ulogic) is
78 e.xerc.ca32 := carry32;
80 e.write_xerc_enable := '1';
83 procedure set_ov(e: inout Execute1ToWritebackType;
85 ov32 : in std_ulogic) is
92 e.write_xerc_enable := '1';
95 function calc_ov(msb_a : std_ulogic; msb_b: std_ulogic;
96 ca: std_ulogic; msb_r: std_ulogic) return std_ulogic is
98 return (ca xor msb_r) and not (msb_a xor msb_b);
101 function decode_input_carry(ic : carry_in_t;
102 xerc : xer_common_t) return std_ulogic is
114 function msr_copy(msr: std_ulogic_vector(63 downto 0))
115 return std_ulogic_vector is
116 variable msr_out: std_ulogic_vector(63 downto 0);
119 -- Defined MSR bits are classified as either full func-
120 -- tion or partial function. Full function MSR bits are
121 -- saved in SRR1 or HSRR1 when an interrupt other
122 -- than a System Call Vectored interrupt occurs and
123 -- restored by rfscv, rfid, or hrfid, while partial func-
124 -- tion MSR bits are not saved or restored.
125 -- Full function MSR bits lie in the range 0:32, 37:41, and
126 -- 48:63, and partial function MSR bits lie in the range
128 msr_out := (others => '0');
129 msr_out(32 downto 0) := msr(32 downto 0);
130 msr_out(41 downto 37) := msr(41 downto 37);
131 msr_out(63 downto 48) := msr(63 downto 48);
137 rotator_0: entity work.rotator
141 shift => b_in(6 downto 0),
143 is_32bit => e_in.is_32bit,
144 right_shift => right_shift,
145 arith => e_in.is_signed,
146 clear_left => rot_clear_left,
147 clear_right => rot_clear_right,
148 result => rotator_result,
149 carry_out => rotator_carry
152 logical_0: entity work.logical
156 op => e_in.insn_type,
157 invert_in => e_in.invert_a,
158 invert_out => e_in.invert_out,
159 result => logical_result,
160 datalen => e_in.data_len,
161 popcnt => popcnt_result,
162 parity => parity_result
165 countzero_0: entity work.zero_counter
169 count_right => e_in.insn(10),
170 is_32bit => e_in.is_32bit,
171 result => countzero_result
174 multiply_0: entity work.multiply
177 m_in => x_to_multiply,
178 m_out => multiply_to_x
181 divider_0: entity work.divider
185 d_in => x_to_divider,
186 d_out => divider_to_x
189 a_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data1 = '1' else e_in.read_data1;
190 b_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data2 = '1' else e_in.read_data2;
191 c_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data3 = '1' else e_in.read_data3;
193 execute1_0: process(clk)
195 if rising_edge(clk) then
198 assert not (r.lr_update = '1' and e_in.valid = '1')
199 report "LR update collision with valid in EX1"
201 if r.lr_update = '1' then
202 report "LR update to " & to_hstring(r.next_lr);
207 execute1_1: process(all)
208 variable v : reg_type;
209 variable a_inv : std_ulogic_vector(63 downto 0);
210 variable result : std_ulogic_vector(63 downto 0);
211 variable newcrf : std_ulogic_vector(3 downto 0);
212 variable result_with_carry : std_ulogic_vector(64 downto 0);
213 variable result_en : std_ulogic;
214 variable crnum : crnum_t;
215 variable crbit : integer range 0 to 31;
216 variable scrnum : crnum_t;
217 variable lo, hi : integer;
218 variable sh, mb, me : std_ulogic_vector(5 downto 0);
219 variable sh32, mb32, me32 : std_ulogic_vector(4 downto 0);
220 variable bo, bi : std_ulogic_vector(4 downto 0);
221 variable bf, bfa : std_ulogic_vector(2 downto 0);
222 variable cr_op : std_ulogic_vector(9 downto 0);
223 variable cr_operands : std_ulogic_vector(1 downto 0);
224 variable bt, ba, bb : std_ulogic_vector(4 downto 0);
225 variable btnum, banum, bbnum : integer range 0 to 31;
226 variable crresult : std_ulogic;
227 variable l : std_ulogic;
228 variable next_nia : std_ulogic_vector(63 downto 0);
229 variable carry_32, carry_64 : std_ulogic;
230 variable sign1, sign2 : std_ulogic;
231 variable abs1, abs2 : signed(63 downto 0);
232 variable overflow : std_ulogic;
233 variable negative : std_ulogic;
234 variable zerohi, zerolo : std_ulogic;
235 variable msb_a, msb_b : std_ulogic;
236 variable a_lt : std_ulogic;
237 variable lv : Execute1ToLoadstore1Type;
238 variable irq_valid : std_ulogic;
239 variable exception : std_ulogic;
240 variable exception_nextpc : std_ulogic;
241 variable trapval : std_ulogic_vector(4 downto 0);
243 result := (others => '0');
244 result_with_carry := (others => '0');
246 newcrf := (others => '0');
249 v.e := Execute1ToWritebackInit;
251 -- XER forwarding. To avoid having to track XER hazards, we
252 -- use the previously latched value.
254 -- If the XER was modified by a multiply or a divide, those are
255 -- single issue, we'll get the up to date value from decode2 from
256 -- the register file.
258 -- If it was modified by an instruction older than the previous
259 -- one in EX1, it will have also hit writeback and will be up
260 -- to date in decode2.
262 -- That leaves us with the case where it was updated by the previous
263 -- instruction in EX1. In that case, we can forward it back here.
265 -- This will break if we allow pipelining of multiply and divide,
266 -- but ideally, those should go via EX1 anyway and run as a state
267 -- machine from here.
269 -- One additional hazard to beware of is an XER:SO modifying instruction
270 -- in EX1 followed immediately by a store conditional. Due to our
271 -- writeback latency, the store will go down the LSU with the previous
272 -- XER value, thus the stcx. will set CR0:SO using an obsolete SO value.
274 -- We will need to handle that if we ever make stcx. not single issue
276 -- We always pass a valid XER value downto writeback even when
277 -- we aren't updating it, in order for XER:SO -> CR0:SO transfer
278 -- to work for RC instructions.
280 if r.e.write_xerc_enable = '1' then
281 v.e.xerc := r.e.xerc;
283 v.e.xerc := e_in.xerc;
287 v.mul_in_progress := '0';
288 v.div_in_progress := '0';
289 v.cntz_in_progress := '0';
291 -- signals to multiply unit
292 x_to_multiply <= Execute1ToMultiplyInit;
293 x_to_multiply.insn_type <= e_in.insn_type;
294 x_to_multiply.is_32bit <= e_in.is_32bit;
296 if e_in.is_32bit = '1' then
297 if e_in.is_signed = '1' then
298 x_to_multiply.data1 <= (others => a_in(31));
299 x_to_multiply.data1(31 downto 0) <= a_in(31 downto 0);
300 x_to_multiply.data2 <= (others => b_in(31));
301 x_to_multiply.data2(31 downto 0) <= b_in(31 downto 0);
303 x_to_multiply.data1 <= '0' & x"00000000" & a_in(31 downto 0);
304 x_to_multiply.data2 <= '0' & x"00000000" & b_in(31 downto 0);
307 if e_in.is_signed = '1' then
308 x_to_multiply.data1 <= a_in(63) & a_in;
309 x_to_multiply.data2 <= b_in(63) & b_in;
311 x_to_multiply.data1 <= '0' & a_in;
312 x_to_multiply.data2 <= '0' & b_in;
316 -- signals to divide unit
319 if e_in.is_signed = '1' then
320 if e_in.is_32bit = '1' then
328 -- take absolute values
330 abs1 := signed(a_in);
332 abs1 := - signed(a_in);
335 abs2 := signed(b_in);
337 abs2 := - signed(b_in);
340 x_to_divider <= Execute1ToDividerInit;
341 x_to_divider.is_signed <= e_in.is_signed;
342 x_to_divider.is_32bit <= e_in.is_32bit;
343 if e_in.insn_type = OP_MOD then
344 x_to_divider.is_modulus <= '1';
346 x_to_divider.neg_result <= sign1 xor (sign2 and not x_to_divider.is_modulus);
347 if e_in.is_32bit = '0' then
349 if e_in.insn_type = OP_DIVE then
350 x_to_divider.is_extended <= '1';
352 x_to_divider.dividend <= std_ulogic_vector(abs1);
353 x_to_divider.divisor <= std_ulogic_vector(abs2);
356 x_to_divider.is_extended <= '0';
357 if e_in.insn_type = OP_DIVE then -- extended forms
358 x_to_divider.dividend <= std_ulogic_vector(abs1(31 downto 0)) & x"00000000";
360 x_to_divider.dividend <= x"00000000" & std_ulogic_vector(abs1(31 downto 0));
362 x_to_divider.divisor <= x"00000000" & std_ulogic_vector(abs2(31 downto 0));
366 -- FIXME: run at 512MHz not core freq
367 ctrl_tmp.tb <= std_ulogic_vector(unsigned(ctrl.tb) + 1);
368 ctrl_tmp.dec <= std_ulogic_vector(unsigned(ctrl.dec) - 1);
371 if ctrl.msr(63 - 48) = '1' and ctrl.dec(63) = '1' then
376 terminate_out <= '0';
379 f_out <= Execute1ToFetch1TypeInit;
381 -- Next insn adder used in a couple of places
382 next_nia := std_ulogic_vector(unsigned(e_in.nia) + 4);
384 -- rotator control signals
385 right_shift <= '1' when e_in.insn_type = OP_SHR else '0';
386 rot_clear_left <= '1' when e_in.insn_type = OP_RLC or e_in.insn_type = OP_RLCL else '0';
387 rot_clear_right <= '1' when e_in.insn_type = OP_RLC or e_in.insn_type = OP_RLCR else '0';
389 ctrl_tmp.irq_state <= WRITE_SRR0;
391 exception_nextpc := '0';
392 v.e.exc_write_enable := '0';
393 v.e.exc_write_reg := fast_spr_num(SPR_SRR0);
394 v.e.exc_write_data := e_in.nia;
396 if ctrl.irq_state = WRITE_SRR1 then
397 v.e.exc_write_reg := fast_spr_num(SPR_SRR1);
398 v.e.exc_write_data := ctrl.srr1;
399 v.e.exc_write_enable := '1';
400 ctrl_tmp.msr(63 - 48) <= '0'; -- clear EE
401 f_out.redirect <= '1';
402 f_out.redirect_nia <= ctrl.irq_nia;
403 v.e.valid := e_in.valid;
404 report "Writing SRR1: " & to_hstring(ctrl.srr1);
406 elsif irq_valid = '1' then
407 -- we need two cycles to write srr0 and 1
408 -- will need more when we have to write DSISR, DAR and HIER
409 -- Don't deliver the interrupt until we have a valid instruction
410 -- coming in, so we have a valid NIA to put in SRR0.
411 exception := e_in.valid;
412 ctrl_tmp.irq_nia <= std_logic_vector(to_unsigned(16#900#, 64));
413 ctrl_tmp.srr1 <= msr_copy(ctrl.msr);
415 elsif e_in.valid = '1' then
418 v.e.write_reg := e_in.write_reg;
419 v.slow_op_dest := gspr_to_gpr(e_in.write_reg);
420 v.slow_op_rc := e_in.rc;
421 v.slow_op_oe := e_in.oe;
422 v.slow_op_xerc := v.e.xerc;
424 case_0: case e_in.insn_type is
427 -- we need two cycles to write srr0 and 1
428 -- will need more when we have to write DSISR, DAR and HIER
430 ctrl_tmp.irq_nia <= std_logic_vector(to_unsigned(16#700#, 64));
431 ctrl_tmp.srr1 <= msr_copy(ctrl.msr);
432 -- Since we aren't doing Hypervisor emulation assist (0xe40) we
433 -- set bit 44 to indicate we have an illegal
434 ctrl_tmp.srr1(63 - 44) <= '1';
437 -- FIXME Assume everything is SC (not SCV) for now
438 -- we need two cycles to write srr0 and 1
439 -- will need more when we have to write DSISR, DAR and HIER
441 exception_nextpc := '1';
442 ctrl_tmp.irq_nia <= std_logic_vector(to_unsigned(16#C00#, 64));
443 ctrl_tmp.srr1 <= msr_copy(ctrl.msr);
446 terminate_out <= '1';
450 when OP_ADD | OP_CMP | OP_TRAP =>
451 if e_in.invert_a = '0' then
456 result_with_carry := ppc_adde(a_inv, b_in,
457 decode_input_carry(e_in.input_carry, v.e.xerc));
458 result := result_with_carry(63 downto 0);
459 carry_32 := result(32) xor a_inv(32) xor b_in(32);
460 carry_64 := result_with_carry(64);
461 if e_in.insn_type = OP_ADD then
462 if e_in.output_carry = '1' then
463 set_carry(v.e, carry_32, carry_64);
465 if e_in.oe = '1' then
467 calc_ov(a_inv(63), b_in(63), carry_64, result_with_carry(63)),
468 calc_ov(a_inv(31), b_in(31), carry_32, result_with_carry(31)));
472 -- trap, CMP and CMPL instructions
473 -- Note, we have done RB - RA, not RA - RB
474 if e_in.insn_type = OP_CMP then
475 l := insn_l(e_in.insn);
477 l := not e_in.is_32bit;
479 zerolo := not (or (a_in(31 downto 0) xor b_in(31 downto 0)));
480 zerohi := not (or (a_in(63 downto 32) xor b_in(63 downto 32)));
481 if zerolo = '1' and (l = '0' or zerohi = '1') then
494 if msb_a /= msb_b then
495 -- Subtraction might overflow, but
496 -- comparison is clear from MSB difference.
497 -- for signed, 0 is greater; for unsigned, 1 is greater
498 trapval := msb_a & msb_b & '0' & msb_b & msb_a;
500 -- Subtraction cannot overflow since MSBs are equal.
501 -- carry = 1 indicates RA is smaller (signed or unsigned)
502 a_lt := (not l and carry_32) or (l and carry_64);
503 trapval := a_lt & not a_lt & '0' & a_lt & not a_lt;
506 if e_in.insn_type = OP_CMP then
507 if e_in.is_signed = '1' then
508 newcrf := trapval(4 downto 2) & v.e.xerc.so;
510 newcrf := trapval(1 downto 0) & trapval(2) & v.e.xerc.so;
512 bf := insn_bf(e_in.insn);
513 crnum := to_integer(unsigned(bf));
514 v.e.write_cr_enable := '1';
515 v.e.write_cr_mask := num_to_fxm(crnum);
519 v.e.write_cr_data(hi downto lo) := newcrf;
522 -- trap instructions (tw, twi, td, tdi)
523 if or (trapval and insn_to(e_in.insn)) = '1' then
524 -- generate trap-type program interrupt
526 ctrl_tmp.irq_nia <= std_logic_vector(to_unsigned(16#700#, 64));
527 ctrl_tmp.srr1 <= msr_copy(ctrl.msr);
528 -- set bit 46 to say trap occurred
529 ctrl_tmp.srr1(63 - 46) <= '1';
534 when OP_AND | OP_OR | OP_XOR =>
535 result := logical_result;
538 f_out.redirect <= '1';
539 if (insn_aa(e_in.insn)) then
540 f_out.redirect_nia <= std_ulogic_vector(signed(b_in));
542 f_out.redirect_nia <= std_ulogic_vector(signed(e_in.nia) + signed(b_in));
546 bo := insn_bo(e_in.insn);
547 bi := insn_bi(e_in.insn);
548 if bo(4-2) = '0' then
549 result := std_ulogic_vector(unsigned(a_in) - 1);
551 v.e.write_reg := fast_spr_num(SPR_CTR);
553 if ppc_bc_taken(bo, bi, e_in.cr, a_in) = 1 then
554 f_out.redirect <= '1';
555 if (insn_aa(e_in.insn)) then
556 f_out.redirect_nia <= std_ulogic_vector(signed(b_in));
558 f_out.redirect_nia <= std_ulogic_vector(signed(e_in.nia) + signed(b_in));
563 -- read_data2 is target register (CTR, LR or TAR)
564 bo := insn_bo(e_in.insn);
565 bi := insn_bi(e_in.insn);
566 if bo(4-2) = '0' and e_in.insn(10) = '0' then
567 result := std_ulogic_vector(unsigned(a_in) - 1);
569 v.e.write_reg := fast_spr_num(SPR_CTR);
571 if ppc_bc_taken(bo, bi, e_in.cr, a_in) = 1 then
572 f_out.redirect <= '1';
573 f_out.redirect_nia <= b_in(63 downto 2) & "00";
577 f_out.redirect <= '1';
578 f_out.redirect_nia <= a_in(63 downto 2) & "00"; -- srr0
579 ctrl_tmp.msr <= msr_copy(std_ulogic_vector(signed(b_in))); -- srr1
581 result := ppc_cmpb(c_in, b_in);
585 v.cntz_in_progress := '1';
588 -- note data_len is a 1-hot encoding
589 negative := (e_in.data_len(0) and c_in(7)) or
590 (e_in.data_len(1) and c_in(15)) or
591 (e_in.data_len(2) and c_in(31));
592 result := (others => negative);
593 if e_in.data_len(2) = '1' then
594 result(31 downto 16) := c_in(31 downto 16);
596 if e_in.data_len(2) = '1' or e_in.data_len(1) = '1' then
597 result(15 downto 8) := c_in(15 downto 8);
599 result(7 downto 0) := c_in(7 downto 0);
602 crbit := to_integer(unsigned(insn_bc(e_in.insn)));
603 if e_in.cr(31-crbit) = '1' then
610 cr_op := insn_cr(e_in.insn);
611 report "CR OP " & to_hstring(cr_op);
612 if cr_op(0) = '0' then -- MCRF
613 bf := insn_bf(e_in.insn);
614 bfa := insn_bfa(e_in.insn);
615 v.e.write_cr_enable := '1';
616 crnum := to_integer(unsigned(bf));
617 scrnum := to_integer(unsigned(bfa));
618 v.e.write_cr_mask := num_to_fxm(crnum);
623 newcrf := e_in.cr(hi downto lo);
629 v.e.write_cr_data(hi downto lo) := newcrf;
632 v.e.write_cr_enable := '1';
633 bt := insn_bt(e_in.insn);
634 ba := insn_ba(e_in.insn);
635 bb := insn_bb(e_in.insn);
636 btnum := 31 - to_integer(unsigned(bt));
637 banum := 31 - to_integer(unsigned(ba));
638 bbnum := 31 - to_integer(unsigned(bb));
639 -- Bits 5-8 of cr_op give the truth table of the requested
641 cr_operands := e_in.cr(banum) & e_in.cr(bbnum);
642 crresult := cr_op(5 + to_integer(unsigned(cr_operands)));
643 v.e.write_cr_mask := num_to_fxm((31-btnum) / 4);
644 for i in 0 to 31 loop
646 v.e.write_cr_data(i) := crresult;
648 v.e.write_cr_data(i) := e_in.cr(i);
653 result := msr_copy(ctrl.msr);
656 report "MFSPR to SPR " & integer'image(decode_spr_num(e_in.insn)) &
657 "=" & to_hstring(a_in);
658 if is_fast_spr(e_in.read_reg1) then
660 if decode_spr_num(e_in.insn) = SPR_XER then
661 -- bits 0:31 and 35:43 are treated as reserved and return 0s when read using mfxer
662 result(63 downto 32) := (others => '0');
663 result(63-32) := v.e.xerc.so;
664 result(63-33) := v.e.xerc.ov;
665 result(63-34) := v.e.xerc.ca;
666 result(63-35 downto 63-43) := "000000000";
667 result(63-44) := v.e.xerc.ov32;
668 result(63-45) := v.e.xerc.ca32;
671 case decode_spr_num(e_in.insn) is
677 result := (others => '0');
682 if e_in.insn(20) = '0' then
684 result := x"00000000" & e_in.cr;
687 crnum := fxm_to_num(insn_fxm(e_in.insn));
688 result := (others => '0');
693 result(hi downto lo) := e_in.cr(hi downto lo);
699 v.e.write_cr_enable := '1';
700 if e_in.insn(20) = '0' then
702 v.e.write_cr_mask := insn_fxm(e_in.insn);
704 -- mtocrf: We require one hot priority encoding here
705 crnum := fxm_to_num(insn_fxm(e_in.insn));
706 v.e.write_cr_mask := num_to_fxm(crnum);
708 v.e.write_cr_data := c_in(31 downto 0);
710 -- FIXME handle just the bits we need to.
711 ctrl_tmp.msr <= msr_copy(c_in);
713 report "MTSPR to SPR " & integer'image(decode_spr_num(e_in.insn)) &
714 "=" & to_hstring(c_in);
715 if is_fast_spr(e_in.write_reg) then
718 if decode_spr_num(e_in.insn) = SPR_XER then
719 v.e.xerc.so := c_in(63-32);
720 v.e.xerc.ov := c_in(63-33);
721 v.e.xerc.ca := c_in(63-34);
722 v.e.xerc.ov32 := c_in(63-44);
723 v.e.xerc.ca32 := c_in(63-45);
724 v.e.write_xerc_enable := '1';
728 case decode_spr_num(e_in.insn) is
730 ctrl_tmp.dec <= c_in;
735 result := popcnt_result;
738 result := parity_result;
740 when OP_RLC | OP_RLCL | OP_RLCR | OP_SHL | OP_SHR =>
741 result := rotator_result;
742 if e_in.output_carry = '1' then
743 set_carry(v.e, rotator_carry, rotator_carry);
746 when OP_SIM_CONFIG =>
747 -- bit 0 was used to select the microwatt console, which
748 -- we no longer support.
749 result := x"0000000000000000";
753 f_out.redirect <= '1';
754 f_out.redirect_nia <= next_nia;
759 when OP_MUL_L64 | OP_MUL_H64 | OP_MUL_H32 =>
761 v.mul_in_progress := '1';
763 x_to_multiply.valid <= '1';
765 when OP_DIV | OP_DIVE | OP_MOD =>
767 v.div_in_progress := '1';
769 x_to_divider.valid <= '1';
771 when OP_LOAD | OP_STORE =>
772 -- loadstore/dcache has its own port to writeback
776 terminate_out <= '1';
780 v.e.rc := e_in.rc and e_in.valid;
782 -- Update LR on the next cycle after a branch link
784 -- WARNING: The LR update isn't tracked by our hazard tracker. This
785 -- will work (well I hope) because it only happens on branches
786 -- which will flush all decoded instructions. By the time
787 -- fetch catches up, we'll have the new LR. This will
788 -- *not* work properly however if we have a branch predictor,
789 -- in which case the solution would probably be to keep a
790 -- local cache of the updated LR in execute1 (flushed on
791 -- exceptions) that is used instead of the value from
792 -- decode when its content is valid.
793 if e_in.lr = '1' then
795 v.next_lr := next_nia;
797 report "Delayed LR update to " & to_hstring(next_nia);
800 elsif r.lr_update = '1' then
803 v.e.write_reg := fast_spr_num(SPR_LR);
805 elsif r.cntz_in_progress = '1' then
806 -- cnt[lt]z always takes two cycles
807 result := countzero_result;
809 v.e.write_reg := gpr_to_gspr(v.slow_op_dest);
810 v.e.rc := v.slow_op_rc;
811 v.e.xerc := v.slow_op_xerc;
813 elsif r.mul_in_progress = '1' or r.div_in_progress = '1' then
814 if (r.mul_in_progress = '1' and multiply_to_x.valid = '1') or
815 (r.div_in_progress = '1' and divider_to_x.valid = '1') then
816 if r.mul_in_progress = '1' then
817 result := multiply_to_x.write_reg_data;
818 overflow := multiply_to_x.overflow;
820 result := divider_to_x.write_reg_data;
821 overflow := divider_to_x.overflow;
824 v.e.write_reg := gpr_to_gspr(v.slow_op_dest);
825 v.e.rc := v.slow_op_rc;
826 v.e.xerc := v.slow_op_xerc;
827 v.e.write_xerc_enable := v.slow_op_oe;
828 -- We must test oe because the RC update code in writeback
829 -- will use the xerc value to set CR0:SO so we must not clobber
830 -- xerc if OE wasn't set.
831 if v.slow_op_oe = '1' then
832 v.e.xerc.ov := overflow;
833 v.e.xerc.ov32 := overflow;
834 v.e.xerc.so := v.slow_op_xerc.so or overflow;
839 v.mul_in_progress := r.mul_in_progress;
840 v.div_in_progress := r.div_in_progress;
844 if exception = '1' then
845 v.e.exc_write_enable := '1';
846 if exception_nextpc = '1' then
847 v.e.exc_write_data := next_nia;
849 ctrl_tmp.irq_state <= WRITE_SRR1;
853 v.e.write_data := result;
854 v.e.write_enable := result_en;
856 -- Outputs to loadstore1 (async)
857 lv := Execute1ToLoadstore1Init;
858 if e_in.valid = '1' and (e_in.insn_type = OP_LOAD or e_in.insn_type = OP_STORE) then
861 if e_in.insn_type = OP_LOAD then
867 lv.write_reg := gspr_to_gpr(e_in.write_reg);
868 lv.length := e_in.data_len;
869 lv.byte_reverse := e_in.byte_reverse;
870 lv.sign_extend := e_in.sign_extend;
871 lv.update := e_in.update;
872 lv.update_reg := gspr_to_gpr(e_in.read_reg1);
874 lv.reserve := e_in.reserve;
876 -- decode l*cix and st*cix instructions here
877 if e_in.insn(31 downto 26) = "011111" and e_in.insn(10 downto 9) = "11" and
878 e_in.insn(5 downto 1) = "10101" then
889 flush_out <= f_out.redirect;
891 end architecture behaviour;