From: Paul Mackerras Date: Sat, 18 Jun 2022 06:24:30 +0000 (+1000) Subject: execute1: Restructure to separate out execution of side effects X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=813e2317bf1f1c10d988f660c0a4282da316a3b9;p=microwatt.git execute1: Restructure to separate out execution of side effects We now have a record that represents the actions taken in executing an instruction, and a process that computes that for the incoming instruction. We no longer have 'current' or 'r.cur_instr', instead things like the destination register are put into r.e in the first cycle of an instruction and not reinitialized in subsequent busy cycles. For mfspr and mtspr, we now decode "slow" SPR numbers (those SPRs that are not stored in the register file) to a new "spr_selector" record in decode1 (excluding those in the loadstore unit). With this, the result for mfspr is determined in the data path. Signed-off-by: Paul Mackerras --- diff --git a/common.vhdl b/common.vhdl index bab5aed..7ecf4e2 100644 --- a/common.vhdl +++ b/common.vhdl @@ -124,6 +124,23 @@ package common is end record; constant xerc_init : xer_common_t := (others => '0'); + subtype spr_selector is std_ulogic_vector(2 downto 0); + type spr_id is record + sel : spr_selector; + valid : std_ulogic; + ispmu : std_ulogic; + end record; + constant spr_id_init : spr_id := (sel => "000", others => '0'); + + constant SPRSEL_TB : spr_selector := 3x"0"; + constant SPRSEL_TBU : spr_selector := 3x"1"; + constant SPRSEL_DEC : spr_selector := 3x"2"; + constant SPRSEL_PVR : spr_selector := 3x"3"; + constant SPRSEL_LOGA : spr_selector := 3x"4"; + constant SPRSEL_LOGD : spr_selector := 3x"5"; + constant SPRSEL_CFAR : spr_selector := 3x"6"; + constant SPRSEL_XER : spr_selector := 3x"7"; + -- FPSCR bit numbers constant FPSCR_FX : integer := 63 - 32; constant FPSCR_FEX : integer := 63 - 33; @@ -235,11 +252,13 @@ package common is decode: decode_rom_t; br_pred: std_ulogic; -- Branch was predicted to be taken big_endian: std_ulogic; + spr_info : spr_id; end record; constant Decode1ToDecode2Init : Decode1ToDecode2Type := (valid => '0', stop_mark => '0', nia => (others => '0'), insn => (others => '0'), ispr1 => (others => '0'), ispr2 => (others => '0'), ispro => (others => '0'), - decode => decode_rom_init, br_pred => '0', big_endian => '0'); + decode => decode_rom_init, br_pred => '0', big_endian => '0', + spr_info => spr_id_init); type Decode1ToFetch1Type is record redirect : std_ulogic; @@ -299,6 +318,7 @@ package common is sub_select : std_ulogic_vector(2 downto 0); -- sub-result selection repeat : std_ulogic; -- set if instruction is cracked into two ops second : std_ulogic; -- set if this is the second op + spr_select : spr_id; end record; constant Decode2ToExecute1Init : Decode2ToExecute1Type := (valid => '0', unit => NONE, fac => NONE, insn_type => OP_ILLEGAL, instr_tag => instr_tag_init, @@ -311,7 +331,8 @@ package common is read_data1 => (others => '0'), read_data2 => (others => '0'), read_data3 => (others => '0'), cr => (others => '0'), insn => (others => '0'), data_len => (others => '0'), result_sel => "000", sub_select => "000", - repeat => '0', second => '0', others => (others => '0')); + repeat => '0', second => '0', spr_select => spr_id_init, + others => (others => '0')); type MultiplyInputType is record valid: std_ulogic; diff --git a/cr_file.vhdl b/cr_file.vhdl index d1aedba..940b95b 100644 --- a/cr_file.vhdl +++ b/cr_file.vhdl @@ -66,7 +66,11 @@ begin crs <= crs_updated; end if; if w_in.write_xerc_enable = '1' then - report "Writing XERC"; + report "Writing XERC SO=" & std_ulogic'image(xerc_updated.so) & + " OV=" & std_ulogic'image(xerc_updated.ov) & + " CA=" & std_ulogic'image(xerc_updated.ca) & + " OV32=" & std_ulogic'image(xerc_updated.ov32) & + " CA32=" & std_ulogic'image(xerc_updated.ca32); xerc <= xerc_updated; end if; end if; diff --git a/decode1.vhdl b/decode1.vhdl index baf4347..fb92b9e 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -519,6 +519,40 @@ architecture behaviour of decode1 is constant nop_instr : decode_rom_t := (ALU, NONE, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE); constant fetch_fail_inst: decode_rom_t := (LDST, NONE, OP_FETCH_FAILED, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE); + function map_spr(sprn : spr_num_t) return spr_id is + variable i : spr_id; + begin + i.sel := "000"; + i.valid := '1'; + i.ispmu := '0'; + case sprn is + when SPR_TB => + i.sel := SPRSEL_TB; + when SPR_TBU => + i.sel := SPRSEL_TBU; + when SPR_DEC => + i.sel := SPRSEL_DEC; + when SPR_PVR => + i.sel := SPRSEL_PVR; + when 724 => -- LOG_ADDR SPR + i.sel := SPRSEL_LOGA; + when 725 => -- LOG_DATA SPR + i.sel := SPRSEL_LOGD; + when SPR_UPMC1 | SPR_UPMC2 | SPR_UPMC3 | SPR_UPMC4 | SPR_UPMC5 | SPR_UPMC6 | + SPR_UMMCR0 | SPR_UMMCR1 | SPR_UMMCR2 | SPR_UMMCRA | SPR_USIER | SPR_USIAR | SPR_USDAR | + SPR_PMC1 | SPR_PMC2 | SPR_PMC3 | SPR_PMC4 | SPR_PMC5 | SPR_PMC6 | + SPR_MMCR0 | SPR_MMCR1 | SPR_MMCR2 | SPR_MMCRA | SPR_SIER | SPR_SIAR | SPR_SDAR => + i.ispmu := '1'; + when SPR_CFAR => + i.sel := SPRSEL_CFAR; + when SPR_XER => + i.sel := SPRSEL_XER; + when others => + i.valid := '0'; + end case; + return i; + end; + begin decode1_0: process(clk) begin @@ -586,6 +620,9 @@ begin majorop := unsigned(f_in.insn(31 downto 26)); v.decode := major_decode_rom_array(to_integer(majorop)); + sprn := decode_spr_num(f_in.insn); + v.spr_info := map_spr(sprn); + case to_integer(unsigned(majorop)) is when 4 => -- major opcode 4, mostly VMX/VSX stuff but also some integer ops (madd*) @@ -598,7 +635,6 @@ begin v.decode := decode_op_31_array(to_integer(unsigned(f_in.insn(10 downto 1)))); -- Work out ispr1/ispro independent of v.decode since they seem to be critical path - sprn := decode_spr_num(f_in.insn); v.ispr1 := fast_spr_num(sprn); v.ispro := fast_spr_num(sprn); diff --git a/decode2.vhdl b/decode2.vhdl index 5aa1a6f..8998f2b 100644 --- a/decode2.vhdl +++ b/decode2.vhdl @@ -228,13 +228,6 @@ architecture behaviour of decode2 is OP_SHR => "010", OP_EXTSWSLI => "010", OP_MUL_L64 => "011", -- muldiv_result - OP_MUL_H64 => "011", - OP_MUL_H32 => "011", - OP_DIV => "011", - OP_DIVE => "011", - OP_MOD => "011", - OP_CNTZ => "100", -- countbits_result - OP_POPCNT => "100", OP_MFSPR => "101", -- spr_result OP_B => "110", -- next_nia OP_BC => "110", @@ -440,6 +433,8 @@ begin decoded_reg_o.reg(0) := not r.repeat; end if; + v.e.spr_select := d_in.spr_info; + r_out.read1_enable <= decoded_reg_a.reg_valid and d_in.valid; r_out.read1_reg <= decoded_reg_a.reg; r_out.read2_enable <= decoded_reg_b.reg_valid and d_in.valid; @@ -496,6 +491,17 @@ begin v.e.result_sel := "000"; -- select adder output end if; end if; + if op = OP_MFSPR then + if is_fast_spr(d_in.ispr1) = '1' then + v.e.result_sel := "000"; -- adder_result, effectively a_in + elsif d_in.spr_info.valid = '0' then + -- Privileged mfspr to invalid/unimplemented SPR numbers + -- writes the contents of RT back to RT (i.e. it's a no-op) + v.e.result_sel := "001"; -- logical_result + elsif d_in.spr_info.ispmu = '1' then + v.e.result_sel := "100"; -- pmuspr_result + end if; + end if; -- See if any of the operands can get their value via the bypass path. case gpr_a_bypass is diff --git a/execute1.vhdl b/execute1.vhdl index b955b75..21f6f8f 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -62,7 +62,6 @@ end entity execute1; architecture behaviour of execute1 is type reg_type is record e : Execute1ToWritebackType; - cur_instr : Decode2ToExecute1Type; busy: std_ulogic; terminate: std_ulogic; intr_pending : std_ulogic; @@ -70,6 +69,8 @@ architecture behaviour of execute1 is trace_next : std_ulogic; prev_op : insn_type_t; br_taken : std_ulogic; + oe : std_ulogic; + mul_select : std_ulogic_vector(1 downto 0); mul_in_progress : std_ulogic; mul_finish : std_ulogic; div_in_progress : std_ulogic; @@ -83,15 +84,42 @@ architecture behaviour of execute1 is end record; constant reg_type_init : reg_type := (e => Execute1ToWritebackInit, - cur_instr => Decode2ToExecute1Init, busy => '0', terminate => '0', intr_pending => '0', fp_exception_next => '0', trace_next => '0', prev_op => OP_ILLEGAL, br_taken => '0', + oe => '0', mul_select => "00", mul_in_progress => '0', mul_finish => '0', div_in_progress => '0', cntz_in_progress => '0', no_instr_avail => '0', instr_dispatch => '0', ext_interrupt => '0', taken_branch_event => '0', br_mispredict => '0', others => (others => '0')); + type actions_type is record + e : Execute1ToWritebackType; + complete : std_ulogic; + exception : std_ulogic; + trap : std_ulogic; + terminate : std_ulogic; + write_msr : std_ulogic; + new_msr : std_ulogic_vector(63 downto 0); + write_xerlow : std_ulogic; + write_pmuspr : std_ulogic; + write_dec : std_ulogic; + write_loga : std_ulogic; + inc_loga : std_ulogic; + write_cfar : std_ulogic; + take_branch : std_ulogic; + direct_branch : std_ulogic; + start_mul : std_ulogic; + start_div : std_ulogic; + start_cntz : std_ulogic; + do_trace : std_ulogic; + fp_intr : std_ulogic; + icache_inval : std_ulogic; + end record; + constant actions_type_init : actions_type := + (e => Execute1ToWritebackInit, new_msr => (others => '0'), others => '0'); + signal r, rin : reg_type; + signal actions : actions_type; signal a_in, b_in, c_in : std_ulogic_vector(63 downto 0); signal cr_in : std_ulogic_vector(31 downto 0); @@ -112,9 +140,9 @@ architecture behaviour of execute1 is signal adder_result: std_ulogic_vector(63 downto 0); signal misc_result: std_ulogic_vector(63 downto 0); signal muldiv_result: std_ulogic_vector(63 downto 0); + signal shortmul_result: std_ulogic_vector(63 downto 0); signal spr_result: std_ulogic_vector(63 downto 0); signal next_nia : std_ulogic_vector(63 downto 0); - signal current: Decode2ToExecute1Type; signal carry_32 : std_ulogic; signal carry_64 : std_ulogic; @@ -369,7 +397,7 @@ begin br_taken_complete => r.taken_branch_event, br_mispredict => r.br_mispredict, others => '0'); - x_to_pmu.nia <= current.nia; + x_to_pmu.nia <= e_in.nia; x_to_pmu.addr <= (others => '0'); x_to_pmu.addr_v <= '0'; x_to_pmu.spr_num <= e_in.insn(20 downto 16); @@ -381,7 +409,7 @@ begin -- (SO, OV[32] and CA[32]) are only modified by instructions that are -- handled here, we can just forward the result being sent to -- writeback. - xerc_in <= r.e.xerc when r.e.write_xerc_enable = '1' or r.busy = '1' else e_in.xerc; + xerc_in <= r.e.xerc when (r.e.write_xerc_enable and r.e.valid) = '1' else e_in.xerc; with e_in.unit select busy_out <= l_in.busy or r.busy or fp_in.busy when LDST, @@ -391,15 +419,24 @@ begin terminate_out <= r.terminate; - current <= e_in when r.busy = '0' else r.cur_instr; + -- Slow SPR read mux + with e_in.spr_select.sel select spr_result <= + ctrl.tb when SPRSEL_TB, + 32x"0" & ctrl.tb(63 downto 32) when SPRSEL_TBU, + ctrl.dec when SPRSEL_DEC, + 32x"0" & PVR_MICROWATT when SPRSEL_PVR, + log_wr_addr & r.log_addr_spr when SPRSEL_LOGA, + log_rd_data when SPRSEL_LOGD, + ctrl.cfar when SPRSEL_CFAR, + assemble_xer(xerc_in, ctrl.xer_low) when others; -- Result mux - with current.result_sel select alu_result <= + with e_in.result_sel select alu_result <= adder_result when "000", logical_result when "001", rotator_result when "010", - muldiv_result when "011", - countbits_result when "100", + shortmul_result when "011", + pmu_to_x.spr_val when "100", spr_result when "101", next_nia when "110", misc_result when others; @@ -545,13 +582,10 @@ begin x_to_divider.divisor <= x"00000000" & std_ulogic_vector(abs2(31 downto 0)); end if; - case current.sub_select(1 downto 0) is + shortmul_result <= std_ulogic_vector(resize(signed(mshort_p), 64)); + case r.mul_select is when "00" => - if HAS_SHORT_MULT and r.mul_in_progress = '0' then - muldiv_result <= std_ulogic_vector(resize(signed(mshort_p), 64)); - else - muldiv_result <= multiply_to_x.result(63 downto 0); - end if; + muldiv_result <= multiply_to_x.result(63 downto 0); when "01" => muldiv_result <= multiply_to_x.result(127 downto 64); when "10" => @@ -562,7 +596,7 @@ begin end case; -- Compute misc_result - case current.sub_select is + case e_in.sub_select is when "000" => misc_result <= (others => '0'); when "001" => @@ -684,7 +718,7 @@ begin bf := insn_bf(e_in.insn); crnum := to_integer(unsigned(bf)); newcrf := (others => '0'); - case current.sub_select is + case e_in.sub_select is when "000" => -- CMP and CMPL instructions if e_in.is_signed = '1' then @@ -697,7 +731,7 @@ begin when "010" => newcrf := ppc_cmpeqb(a_in, b_in); when "011" => - if current.insn(1) = '1' then + if e_in.insn(1) = '1' then -- CR logical instructions j := (7 - crnum) * 4; newcrf := cr_in(j + 3 downto j); @@ -728,7 +762,7 @@ begin newcrf := xerc_in.ov & xerc_in.ov32 & xerc_in.ca & xerc_in.ca32; when others => end case; - if current.insn_type = OP_MTCRF then + if e_in.insn_type = OP_MTCRF then if e_in.insn(20) = '0' then -- mtcrf write_cr_mask <= insn_fxm(e_in.insn); @@ -737,201 +771,86 @@ begin crnum := fxm_to_num(insn_fxm(e_in.insn)); write_cr_mask <= num_to_fxm(crnum); end if; - write_cr_data <= c_in(31 downto 0); else write_cr_mask <= num_to_fxm(crnum); - write_cr_data <= newcrf & newcrf & newcrf & newcrf & - newcrf & newcrf & newcrf & newcrf; end if; + for i in 0 to 7 loop + if write_cr_mask(i) = '0' then + write_cr_data(i*4 + 3 downto i*4) <= cr_in(i*4 + 3 downto i*4); + elsif e_in.insn_type = OP_MTCRF then + write_cr_data(i*4 + 3 downto i*4) <= c_in(i*4 + 3 downto i*4); + else + write_cr_data(i*4 + 3 downto i*4) <= newcrf; + end if; + end loop; end process; - execute1_1: process(all) - variable v : reg_type; + execute1_actions: process(all) + variable v: actions_type; variable bo, bi : std_ulogic_vector(4 downto 0); - variable overflow : std_ulogic; - variable lv : Execute1ToLoadstore1Type; - variable irq_valid : std_ulogic; - variable exception : std_ulogic; variable illegal : std_ulogic; - variable is_branch : std_ulogic; - variable is_direct_branch : std_ulogic; - variable taken_branch : std_ulogic; - variable abs_branch : std_ulogic; - variable spr_val : std_ulogic_vector(63 downto 0); - variable do_trace : std_ulogic; - variable hold_wr_data : std_ulogic; - variable fv : Execute1ToFPUType; + variable privileged : std_ulogic; + variable slow_op : std_ulogic; begin - is_branch := '0'; - is_direct_branch := '0'; - taken_branch := '0'; - abs_branch := '0'; - hold_wr_data := '0'; - - v := r; - v.e := Execute1ToWritebackInit; + v := actions_type_init; + v.e.write_data := alu_result; + v.e.write_reg := e_in.write_reg; + v.e.write_enable := e_in.write_reg_enable; + v.e.rc := e_in.rc; + v.e.write_cr_data := write_cr_data; + v.e.write_cr_mask := write_cr_mask; + v.e.write_cr_enable := e_in.output_cr; + v.e.write_xerc_enable := e_in.output_xer; + v.e.xerc := xerc_in; + v.new_msr := ctrl.msr; + v.e.write_xerc_enable := e_in.output_xer; v.e.redir_mode := ctrl.msr(MSR_IR) & not ctrl.msr(MSR_PR) & not ctrl.msr(MSR_LE) & not ctrl.msr(MSR_SF); - v.e.xerc := xerc_in; - - lv := Execute1ToLoadstore1Init; - fv := Execute1ToFPUInit; - - x_to_multiply.valid <= '0'; - x_to_divider.valid <= '0'; - v.mul_in_progress := '0'; - v.div_in_progress := '0'; - v.cntz_in_progress := '0'; - v.mul_finish := '0'; - v.ext_interrupt := '0'; - v.taken_branch_event := '0'; - v.br_mispredict := '0'; - - x_to_pmu.mfspr <= '0'; - x_to_pmu.mtspr <= '0'; - x_to_pmu.tbbits(3) <= ctrl.tb(63 - 47); - x_to_pmu.tbbits(2) <= ctrl.tb(63 - 51); - x_to_pmu.tbbits(1) <= ctrl.tb(63 - 55); - x_to_pmu.tbbits(0) <= ctrl.tb(63 - 63); - x_to_pmu.pmm_msr <= ctrl.msr(MSR_PMM); - x_to_pmu.pr_msr <= ctrl.msr(MSR_PR); - - spr_result <= (others => '0'); - spr_val := (others => '0'); - - ctrl_tmp <= ctrl; - -- FIXME: run at 512MHz not core freq - ctrl_tmp.tb <= std_ulogic_vector(unsigned(ctrl.tb) + 1); - ctrl_tmp.dec <= std_ulogic_vector(unsigned(ctrl.dec) - 1); - - irq_valid := ctrl.msr(MSR_EE) and (pmu_to_x.intr or ctrl.dec(63) or ext_irq_in); - - v.terminate := '0'; - icache_inval <= '0'; - v.busy := '0'; - - -- Next insn adder used in a couple of places - next_nia <= std_ulogic_vector(unsigned(e_in.nia) + 4); - - -- rotator control signals - right_shift <= '1' when e_in.insn_type = OP_SHR else '0'; - rot_clear_left <= '1' when e_in.insn_type = OP_RLC or e_in.insn_type = OP_RLCL else '0'; - rot_clear_right <= '1' when e_in.insn_type = OP_RLC or e_in.insn_type = OP_RLCR else '0'; - rot_sign_ext <= '1' when e_in.insn_type = OP_EXTSWSLI else '0'; - - do_popcnt <= '1' when e_in.insn_type = OP_POPCNT else '0'; + v.e.intr_vec := 16#700#; + v.e.mode_32bit := not ctrl.msr(MSR_SF); + v.e.instr_tag := e_in.instr_tag; + v.e.last_nia := e_in.nia; + v.e.br_offset := 64x"4"; + + -- Note the difference between v.exception and v.trap: + -- v.exception signals a condition that prevents execution of the + -- instruction, and hence shouldn't depend on operand data, so as to + -- avoid timing chains through both data and control paths. + -- v.trap also means we want to generate an interrupt, but doesn't + -- cancel instruction execution (hence we need to avoid setting any + -- side-effect flags or write enables when generating a trap). + -- With v.trap = 1 we will assert both r.e.valid and r.e.interrupt + -- to writeback, and it will complete the instruction and take + -- and interrupt. It is OK for v.trap to depend on operand data. illegal := '0'; - if r.intr_pending = '1' then - v.e.srr1 := r.e.srr1; - v.e.intr_vec := r.e.intr_vec; - end if; - if valid_in = '1' then - v.e.last_nia := e_in.nia; - else - v.e.last_nia := r.e.last_nia; - end if; - - v.e.mode_32bit := not ctrl.msr(MSR_SF); - v.e.instr_tag := current.instr_tag; + privileged := '0'; + slow_op := '0'; - do_trace := valid_in and ctrl.msr(MSR_SE); - if valid_in = '1' then - v.cur_instr := e_in; - v.prev_op := e_in.insn_type; + if ctrl.msr(MSR_PR) = '1' and instr_is_privileged(e_in.insn_type, e_in.insn) then + privileged := '1'; end if; - -- Determine if there is any interrupt to be taken - -- before/instead of executing this instruction - exception := r.intr_pending; - if valid_in = '1' and e_in.second = '0' and r.intr_pending = '0' then - if HAS_FPU and r.fp_exception_next = '1' then - -- This is used for FP-type program interrupts that - -- become pending due to MSR[FE0,FE1] changing from 00 to non-zero. - exception := '1'; - v.e.intr_vec := 16#700#; - v.e.srr1(47 - 43) := '1'; - v.e.srr1(47 - 47) := '1'; - elsif r.trace_next = '1' then - -- Generate a trace interrupt rather than executing the next instruction - -- or taking any asynchronous interrupt - exception := '1'; - v.e.intr_vec := 16#d00#; - v.e.srr1(47 - 33) := '1'; - if r.prev_op = OP_LOAD or r.prev_op = OP_ICBI or r.prev_op = OP_ICBT or - r.prev_op = OP_DCBT or r.prev_op = OP_DCBST or r.prev_op = OP_DCBF then - v.e.srr1(47 - 35) := '1'; - elsif r.prev_op = OP_STORE or r.prev_op = OP_DCBZ or r.prev_op = OP_DCBTST then - v.e.srr1(47 - 36) := '1'; - end if; - - elsif irq_valid = '1' then - -- Don't deliver the interrupt until we have a valid instruction - -- coming in, so we have a valid NIA to put in SRR0. - if pmu_to_x.intr = '1' then - v.e.intr_vec := 16#f00#; - report "IRQ valid: PMU"; - elsif ctrl.dec(63) = '1' then - v.e.intr_vec := 16#900#; - report "IRQ valid: DEC"; - elsif ext_irq_in = '1' then - v.e.intr_vec := 16#500#; - report "IRQ valid: External"; - v.ext_interrupt := '1'; - end if; - exception := '1'; - - elsif ctrl.msr(MSR_PR) = '1' and instr_is_privileged(e_in.insn_type, e_in.insn) then - -- generate a program interrupt - exception := '1'; - v.e.intr_vec := 16#700#; - -- set bit 45 to indicate privileged instruction type interrupt - v.e.srr1(47 - 45) := '1'; - report "privileged instruction"; - - elsif not HAS_FPU and e_in.fac = FPU then - -- make lfd/stfd/lfs/stfs etc. illegal in no-FPU implementations - illegal := '1'; - - elsif HAS_FPU and ctrl.msr(MSR_FP) = '0' and e_in.fac = FPU then - -- generate a floating-point unavailable interrupt - exception := '1'; - v.e.intr_vec := 16#800#; - report "FP unavailable interrupt"; - end if; + if (not HAS_FPU and e_in.fac = FPU) or e_in.unit = NONE then + -- make lfd/stfd/lfs/stfs etc. illegal in no-FPU implementations + illegal := '1'; end if; - if exception = '1' and l_in.in_progress = '1' then - -- We can't send this interrupt to writeback yet because there are - -- still instructions in loadstore1 that haven't completed. - v.intr_pending := '1'; - v.busy := '1'; - end if; - if l_in.interrupt = '1' then - v.intr_pending := '0'; - end if; - - v.no_instr_avail := not (e_in.valid or l_in.busy or l_in.in_progress or r.busy or fp_in.busy); - v.instr_dispatch := valid_in and not exception and not illegal; - - if valid_in = '1' and exception = '0' and illegal = '0' and e_in.unit = ALU then - v.e.valid := '1'; - - case_0: case e_in.insn_type is + v.do_trace := ctrl.msr(MSR_SE); + case_0: case e_in.insn_type is when OP_ILLEGAL => - -- we need two cycles to write srr0 and 1 - -- will need more when we have to write HEIR illegal := '1'; when OP_SC => -- check bit 1 of the instruction is 1 so we know this is sc; -- 0 would mean scv, so generate an illegal instruction interrupt - -- we need two cycles to write srr0 and 1 if e_in.insn(1) = '1' then - exception := '1'; + v.trap := '1'; v.e.intr_vec := 16#C00#; v.e.last_nia := next_nia; - report "sc"; + if e_in.valid = '1' then + report "sc"; + end if; else illegal := '1'; end if; @@ -940,12 +859,14 @@ begin -- if not then it is illegal if e_in.insn(10 downto 1) = "0100000000" then v.terminate := '1'; - report "ATTN"; + if e_in.valid = '1' then + report "ATTN"; + end if; else illegal := '1'; end if; when OP_NOP | OP_DCBF | OP_DCBST | OP_DCBT | OP_DCBTST | OP_ICBT => - -- Do nothing + -- Do nothing when OP_ADD => if e_in.output_carry = '1' then if e_in.input_carry /= OV then @@ -966,27 +887,34 @@ begin v.e.srr1(47 - 46) := '1'; if or (trapval and insn_to(e_in.insn)) = '1' then -- generate trap-type program interrupt - exception := '1'; - report "trap"; + v.trap := '1'; + if e_in.valid = '1' then + report "trap"; + end if; end if; when OP_ADDG6S => when OP_CMPRB => when OP_CMPEQB => when OP_AND | OP_OR | OP_XOR | OP_PRTY | OP_CMPB | OP_EXTS | - OP_BPERM | OP_BCD => + OP_BPERM | OP_BCD => when OP_B => - is_branch := '1'; - taken_branch := '1'; - is_direct_branch := '1'; - abs_branch := e_in.br_abs; + v.take_branch := '1'; + v.direct_branch := '1'; + v.e.br_last := '1'; + v.e.br_taken := '1'; + v.e.br_offset := b_in; + v.e.abs_br := insn_aa(e_in.insn); + if e_in.br_pred = '0' then + -- should never happen + v.e.redirect := '1'; + end if; if ctrl.msr(MSR_BE) = '1' then - do_trace := '1'; + v.do_trace := '1'; end if; - v.taken_branch_event := '1'; - when OP_BC | OP_BCREG => + v.write_cfar := '1'; + when OP_BC => -- read_data1 is CTR - -- for OP_BCREG, read_data2 is target register (CTR, LR or TAR) -- If this instruction updates both CTR and LR, then it is -- doubled; the first instruction decrements CTR and determines -- whether the branch is taken, and the second does the @@ -994,21 +922,52 @@ begin bo := insn_bo(e_in.insn); bi := insn_bi(e_in.insn); if e_in.second = '0' then - taken_branch := ppc_bc_taken(bo, bi, cr_in, a_in); + v.take_branch := ppc_bc_taken(bo, bi, cr_in, a_in); else - taken_branch := r.br_taken; + v.take_branch := r.br_taken; + end if; + if v.take_branch = '1' then + v.e.br_offset := b_in; + v.e.abs_br := insn_aa(e_in.insn); end if; - v.br_taken := taken_branch; - v.taken_branch_event := taken_branch; - abs_branch := e_in.br_abs; if e_in.repeat = '0' or e_in.second = '1' then - is_branch := '1'; - if e_in.insn_type = OP_BC then - is_direct_branch := '1'; + -- Mispredicted branches cause a redirect + if v.take_branch /= e_in.br_pred then + v.e.redirect := '1'; end if; + v.direct_branch := '1'; + v.e.br_last := '1'; + v.e.br_taken := v.take_branch; if ctrl.msr(MSR_BE) = '1' then - do_trace := '1'; + v.do_trace := '1'; end if; + v.write_cfar := v.take_branch; + end if; + when OP_BCREG => + -- read_data1 is CTR, read_data2 is target register (CTR, LR or TAR) + -- If this instruction updates both CTR and LR, then it is + -- doubled; the first instruction decrements CTR and determines + -- whether the branch is taken, and the second does the + -- redirect and the LR update. + bo := insn_bo(e_in.insn); + bi := insn_bi(e_in.insn); + if e_in.second = '0' then + v.take_branch := ppc_bc_taken(bo, bi, cr_in, a_in); + else + v.take_branch := r.br_taken; + end if; + if v.take_branch = '1' then + v.e.br_offset := b_in; + v.e.abs_br := '1'; + end if; + if e_in.repeat = '0' or e_in.second = '1' then + -- Indirect branches are never predicted taken + v.e.redirect := v.take_branch; + v.e.br_taken := v.take_branch; + if ctrl.msr(MSR_BE) = '1' then + v.do_trace := '1'; + end if; + v.write_cfar := v.take_branch; end if; when OP_RFID => @@ -1016,131 +975,115 @@ begin not a_in(MSR_LE) & not a_in(MSR_SF); -- Can't use msr_copy here because the partial function MSR -- bits should be left unchanged, not zeroed. - ctrl_tmp.msr(63 downto 31) <= a_in(63 downto 31); - ctrl_tmp.msr(26 downto 22) <= a_in(26 downto 22); - ctrl_tmp.msr(15 downto 0) <= a_in(15 downto 0); + v.new_msr(63 downto 31) := a_in(63 downto 31); + v.new_msr(26 downto 22) := a_in(26 downto 22); + v.new_msr(15 downto 0) := a_in(15 downto 0); if a_in(MSR_PR) = '1' then - ctrl_tmp.msr(MSR_EE) <= '1'; - ctrl_tmp.msr(MSR_IR) <= '1'; - ctrl_tmp.msr(MSR_DR) <= '1'; + v.new_msr(MSR_EE) := '1'; + v.new_msr(MSR_IR) := '1'; + v.new_msr(MSR_DR) := '1'; end if; - -- mark this as a branch so CFAR gets updated - is_branch := '1'; - taken_branch := '1'; - abs_branch := '1'; + v.write_msr := '1'; + v.e.br_offset := b_in; + v.e.abs_br := '1'; + v.e.redirect := '1'; + v.write_cfar := '1'; if HAS_FPU then - v.fp_exception_next := fp_in.exception and - (a_in(MSR_FE0) or a_in(MSR_FE1)); + v.fp_intr := fp_in.exception and + (a_in(MSR_FE0) or a_in(MSR_FE1)); end if; - do_trace := '0'; + v.do_trace := '0'; when OP_CNTZ | OP_POPCNT => - v.e.valid := '0'; - v.cntz_in_progress := '1'; - v.busy := '1'; + slow_op := '1'; + v.start_cntz := '1'; when OP_ISEL => when OP_CROP => when OP_MCRXRX => when OP_DARN => when OP_MFMSR => when OP_MFSPR => - report "MFSPR to SPR " & integer'image(decode_spr_num(e_in.insn)) & - "=" & to_hstring(a_in); if is_fast_spr(e_in.read_reg1) = '1' then - spr_val := a_in; - else - spr_val := c_in; - case decode_spr_num(e_in.insn) is - when SPR_XER => - spr_val := assemble_xer(xerc_in, ctrl.xer_low); - when SPR_TB => - spr_val := ctrl.tb; - when SPR_TBU => - spr_val(63 downto 32) := (others => '0'); - spr_val(31 downto 0) := ctrl.tb(63 downto 32); - when SPR_DEC => - spr_val := ctrl.dec; - when SPR_CFAR => - spr_val := ctrl.cfar; - when SPR_PVR => - spr_val(63 downto 32) := (others => '0'); - spr_val(31 downto 0) := PVR_MICROWATT; - when 724 => -- LOG_ADDR SPR - spr_val := log_wr_addr & r.log_addr_spr; - when 725 => -- LOG_DATA SPR - spr_val := log_rd_data; - v.log_addr_spr := std_ulogic_vector(unsigned(r.log_addr_spr) + 1); - when SPR_UPMC1 | SPR_UPMC2 | SPR_UPMC3 | SPR_UPMC4 | SPR_UPMC5 | SPR_UPMC6 | - SPR_UMMCR0 | SPR_UMMCR1 | SPR_UMMCR2 | SPR_UMMCRA | SPR_USIER | SPR_USIAR | SPR_USDAR | - SPR_PMC1 | SPR_PMC2 | SPR_PMC3 | SPR_PMC4 | SPR_PMC5 | SPR_PMC6 | - SPR_MMCR0 | SPR_MMCR1 | SPR_MMCR2 | SPR_MMCRA | SPR_SIER | SPR_SIAR | SPR_SDAR => - x_to_pmu.mfspr <= '1'; - spr_val := pmu_to_x.spr_val; - when others => - -- mfspr from unimplemented SPRs should be a nop in - -- supervisor mode and a program interrupt for user mode - if is_fast_spr(e_in.read_reg1) = '0' and ctrl.msr(MSR_PR) = '1' then - illegal := '1'; - end if; + if e_in.valid = '1' then + report "MFSPR to SPR " & integer'image(decode_spr_num(e_in.insn)) & + "=" & to_hstring(a_in); + end if; + elsif e_in.spr_select.valid = '1' then + if e_in.valid = '1' then + report "MFSPR to SPR " & integer'image(decode_spr_num(e_in.insn)) & + "=" & to_hstring(spr_result); + end if; + case e_in.spr_select.sel is + when SPRSEL_LOGD => + v.inc_loga := '1'; + when others => end case; + else + -- mfspr from unimplemented SPRs should be a nop in + -- supervisor mode and a program interrupt for user mode + if e_in.valid = '1' then + report "MFSPR to SPR " & integer'image(decode_spr_num(e_in.insn)) & + " invalid"; + end if; + if ctrl.msr(MSR_PR) = '1' then + illegal := '1'; + end if; end if; - spr_result <= spr_val; when OP_MFCR => when OP_MTCRF => when OP_MTMSRD => + v.write_msr := '1'; if e_in.insn(16) = '1' then -- just update EE and RI - ctrl_tmp.msr(MSR_EE) <= c_in(MSR_EE); - ctrl_tmp.msr(MSR_RI) <= c_in(MSR_RI); + v.new_msr(MSR_EE) := c_in(MSR_EE); + v.new_msr(MSR_RI) := c_in(MSR_RI); else -- Architecture says to leave out bits 3 (HV), 51 (ME) -- and 63 (LE) (IBM bit numbering) if e_in.is_32bit = '0' then - ctrl_tmp.msr(63 downto 61) <= c_in(63 downto 61); - ctrl_tmp.msr(59 downto 32) <= c_in(59 downto 32); + v.new_msr(63 downto 61) := c_in(63 downto 61); + v.new_msr(59 downto 32) := c_in(59 downto 32); end if; - ctrl_tmp.msr(31 downto 13) <= c_in(31 downto 13); - ctrl_tmp.msr(11 downto 1) <= c_in(11 downto 1); + v.new_msr(31 downto 13) := c_in(31 downto 13); + v.new_msr(11 downto 1) := c_in(11 downto 1); if c_in(MSR_PR) = '1' then - ctrl_tmp.msr(MSR_EE) <= '1'; - ctrl_tmp.msr(MSR_IR) <= '1'; - ctrl_tmp.msr(MSR_DR) <= '1'; + v.new_msr(MSR_EE) := '1'; + v.new_msr(MSR_IR) := '1'; + v.new_msr(MSR_DR) := '1'; end if; if HAS_FPU then - v.fp_exception_next := fp_in.exception and - (c_in(MSR_FE0) or c_in(MSR_FE1)); + v.fp_intr := fp_in.exception and + (c_in(MSR_FE0) or c_in(MSR_FE1)); end if; end if; when OP_MTSPR => - report "MTSPR to SPR " & integer'image(decode_spr_num(e_in.insn)) & - "=" & to_hstring(c_in); - if is_fast_spr(e_in.write_reg) = '0' then - -- slow spr - case decode_spr_num(e_in.insn) is - when SPR_XER => - v.e.xerc.so := c_in(63-32); - v.e.xerc.ov := c_in(63-33); - v.e.xerc.ca := c_in(63-34); - v.e.xerc.ov32 := c_in(63-44); - v.e.xerc.ca32 := c_in(63-45); - ctrl_tmp.xer_low <= c_in(17 downto 0); - when SPR_DEC => - ctrl_tmp.dec <= c_in; - when 724 => -- LOG_ADDR SPR - v.log_addr_spr := c_in(31 downto 0); - when SPR_UPMC1 | SPR_UPMC2 | SPR_UPMC3 | SPR_UPMC4 | SPR_UPMC5 | SPR_UPMC6 | - SPR_UMMCR0 | SPR_UMMCR2 | SPR_UMMCRA | - SPR_PMC1 | SPR_PMC2 | SPR_PMC3 | SPR_PMC4 | SPR_PMC5 | SPR_PMC6 | - SPR_MMCR0 | SPR_MMCR1 | SPR_MMCR2 | SPR_MMCRA | SPR_SIER | SPR_SIAR | SPR_SDAR => - x_to_pmu.mtspr <= '1'; - when others => - -- mtspr to unimplemented SPRs should be a nop in - -- supervisor mode and a program interrupt for user mode - if ctrl.msr(MSR_PR) = '1' then - illegal := '1'; - end if; - end case; + if e_in.valid = '1' then + report "MTSPR to SPR " & integer'image(decode_spr_num(e_in.insn)) & + "=" & to_hstring(c_in); + end if; + v.write_pmuspr := e_in.spr_select.ispmu; + if e_in.spr_select.valid = '1' and e_in.spr_select.ispmu = '0' then + case e_in.spr_select.sel is + when SPRSEL_XER => + v.e.xerc.so := c_in(63-32); + v.e.xerc.ov := c_in(63-33); + v.e.xerc.ca := c_in(63-34); + v.e.xerc.ov32 := c_in(63-44); + v.e.xerc.ca32 := c_in(63-45); + v.write_xerlow := '1'; + when SPRSEL_DEC => + v.write_dec := '1'; + when SPRSEL_LOGA => + v.write_loga := '1'; + when others => + end case; + elsif is_fast_spr(e_in.write_reg) = '0' then + -- mtspr to unimplemented SPRs should be a nop in + -- supervisor mode and a program interrupt for user mode + if ctrl.msr(MSR_PR) = '1' then + illegal := '1'; + end if; end if; when OP_RLC | OP_RLCL | OP_RLCR | OP_SHL | OP_SHR | OP_EXTSWSLI => if e_in.output_carry = '1' then @@ -1150,13 +1093,12 @@ begin when OP_ISYNC => v.e.redirect := '1'; - v.e.br_offset := std_ulogic_vector(to_unsigned(4, 64)); when OP_ICBI => - icache_inval <= '1'; + v.icache_inval := '1'; - when OP_MUL_L64 | OP_MUL_H64 | OP_MUL_H32 => - if HAS_SHORT_MULT and e_in.insn_type = OP_MUL_L64 and e_in.insn(26) = '1' and + when OP_MUL_L64 => + if HAS_SHORT_MULT and e_in.insn(26) = '1' and fits_in_n_bits(a_in, 16) and fits_in_n_bits(b_in, 16) then -- Operands fit into 16 bits, so use short multiplier if e_in.oe = '1' then @@ -1165,54 +1107,230 @@ begin end if; else -- Use standard multiplier - v.e.valid := '0'; - v.mul_in_progress := '1'; - v.busy := '1'; - x_to_multiply.valid <= '1'; + v.start_mul := '1'; + slow_op := '1'; end if; + when OP_MUL_H64 | OP_MUL_H32 => + v.start_mul := '1'; + slow_op := '1'; + when OP_DIV | OP_DIVE | OP_MOD => - v.e.valid := '0'; - v.div_in_progress := '1'; - v.busy := '1'; - x_to_divider.valid <= '1'; + v.start_div := '1'; + slow_op := '1'; + + when OP_FETCH_FAILED => + -- Handling an ITLB miss doesn't count as having executed an instruction + v.do_trace := '0'; when others => - v.terminate := '1'; - report "illegal"; - end case; - - -- Mispredicted branches cause a redirect - if is_branch = '1' then - if taken_branch = '1' then - ctrl_tmp.cfar <= e_in.nia; + if e_in.valid = '1' and e_in.unit = ALU then + report "unhandled insn_type " & insn_type_t'image(e_in.insn_type); end if; - if taken_branch = '1' then - v.e.br_offset := b_in; - v.e.abs_br := abs_branch; - else - v.e.br_offset := std_ulogic_vector(to_unsigned(4, 64)); + end case; + + if privileged = '1' then + -- generate a program interrupt + v.exception := '1'; + -- set bit 45 to indicate privileged instruction type interrupt + v.e.srr1(47 - 45) := '1'; + if e_in.valid = '1' then + report "privileged instruction"; + end if; + + elsif illegal = '1' then + v.exception := '1'; + -- Since we aren't doing Hypervisor emulation assist (0xe40) we + -- set bit 44 to indicate we have an illegal + v.e.srr1(47 - 44) := '1'; + if e_in.valid = '1' then + report "illegal instruction"; + end if; + + elsif HAS_FPU and ctrl.msr(MSR_FP) = '0' and e_in.fac = FPU then + -- generate a floating-point unavailable interrupt + v.exception := '1'; + v.e.intr_vec := 16#800#; + if e_in.valid = '1' then + report "FP unavailable interrupt"; + end if; + end if; + + if e_in.unit = ALU then + v.complete := e_in.valid and not v.exception and not slow_op; + end if; + + actions <= v; + end process; + + execute1_1: process(all) + variable v : reg_type; + variable overflow : std_ulogic; + variable lv : Execute1ToLoadstore1Type; + variable irq_valid : std_ulogic; + variable exception : std_ulogic; + variable fv : Execute1ToFPUType; + variable go : std_ulogic; + begin + v := r; + if r.busy = '0' then + v.e := actions.e; + v.oe := e_in.oe; + v.mul_select := e_in.sub_select(1 downto 0); + end if; + + lv := Execute1ToLoadstore1Init; + fv := Execute1ToFPUInit; + + x_to_multiply.valid <= '0'; + x_to_divider.valid <= '0'; + v.mul_in_progress := '0'; + v.div_in_progress := '0'; + v.cntz_in_progress := '0'; + v.mul_finish := '0'; + v.ext_interrupt := '0'; + v.taken_branch_event := '0'; + v.br_mispredict := '0'; + + x_to_pmu.mfspr <= '0'; + x_to_pmu.mtspr <= '0'; + x_to_pmu.tbbits(3) <= ctrl.tb(63 - 47); + x_to_pmu.tbbits(2) <= ctrl.tb(63 - 51); + x_to_pmu.tbbits(1) <= ctrl.tb(63 - 55); + x_to_pmu.tbbits(0) <= ctrl.tb(63 - 63); + x_to_pmu.pmm_msr <= ctrl.msr(MSR_PMM); + x_to_pmu.pr_msr <= ctrl.msr(MSR_PR); + + ctrl_tmp <= ctrl; + -- FIXME: run at 512MHz not core freq + ctrl_tmp.tb <= std_ulogic_vector(unsigned(ctrl.tb) + 1); + ctrl_tmp.dec <= std_ulogic_vector(unsigned(ctrl.dec) - 1); + + irq_valid := ctrl.msr(MSR_EE) and (pmu_to_x.intr or ctrl.dec(63) or ext_irq_in); + + v.terminate := '0'; + icache_inval <= '0'; + v.busy := '0'; + + -- Next insn adder used in a couple of places + next_nia <= std_ulogic_vector(unsigned(e_in.nia) + 4); + + -- rotator control signals + right_shift <= '1' when e_in.insn_type = OP_SHR else '0'; + rot_clear_left <= '1' when e_in.insn_type = OP_RLC or e_in.insn_type = OP_RLCL else '0'; + rot_clear_right <= '1' when e_in.insn_type = OP_RLC or e_in.insn_type = OP_RLCR else '0'; + rot_sign_ext <= '1' when e_in.insn_type = OP_EXTSWSLI else '0'; + + do_popcnt <= '1' when e_in.insn_type = OP_POPCNT else '0'; + + if r.intr_pending = '1' then + v.e.srr1 := r.e.srr1; + v.e.intr_vec := r.e.intr_vec; + end if; + + if valid_in = '1' then + v.prev_op := e_in.insn_type; + end if; + + -- Determine if there is any interrupt to be taken + -- before/instead of executing this instruction + exception := r.intr_pending or (valid_in and actions.exception); + if valid_in = '1' and e_in.second = '0' and r.intr_pending = '0' then + if HAS_FPU and r.fp_exception_next = '1' then + -- This is used for FP-type program interrupts that + -- become pending due to MSR[FE0,FE1] changing from 00 to non-zero. + exception := '1'; + v.e.intr_vec := 16#700#; + v.e.srr1 := (others => '0'); + v.e.srr1(47 - 43) := '1'; + v.e.srr1(47 - 47) := '1'; + elsif r.trace_next = '1' then + -- Generate a trace interrupt rather than executing the next instruction + -- or taking any asynchronous interrupt + exception := '1'; + v.e.intr_vec := 16#d00#; + v.e.srr1 := (others => '0'); + v.e.srr1(47 - 33) := '1'; + if r.prev_op = OP_LOAD or r.prev_op = OP_ICBI or r.prev_op = OP_ICBT or + r.prev_op = OP_DCBT or r.prev_op = OP_DCBST or r.prev_op = OP_DCBF then + v.e.srr1(47 - 35) := '1'; + elsif r.prev_op = OP_STORE or r.prev_op = OP_DCBZ or r.prev_op = OP_DCBTST then + v.e.srr1(47 - 36) := '1'; end if; - if taken_branch /= e_in.br_pred then - v.e.redirect := '1'; - v.br_mispredict := is_direct_branch; + + elsif irq_valid = '1' then + -- Don't deliver the interrupt until we have a valid instruction + -- coming in, so we have a valid NIA to put in SRR0. + if pmu_to_x.intr = '1' then + v.e.intr_vec := 16#f00#; + report "IRQ valid: PMU"; + elsif ctrl.dec(63) = '1' then + v.e.intr_vec := 16#900#; + report "IRQ valid: DEC"; + elsif ext_irq_in = '1' then + v.e.intr_vec := 16#500#; + report "IRQ valid: External"; + v.ext_interrupt := '1'; end if; - v.e.br_last := is_direct_branch; - v.e.br_taken := taken_branch; + v.e.srr1 := (others => '0'); + exception := '1'; + end if; + end if; + if exception = '1' and l_in.in_progress = '1' then + -- We can't send this interrupt to writeback yet because there are + -- still instructions in loadstore1 that haven't completed. + v.intr_pending := '1'; + v.busy := '1'; + end if; + + v.no_instr_avail := not (e_in.valid or l_in.busy or l_in.in_progress or r.busy or fp_in.busy); + + go := valid_in and not exception; + v.instr_dispatch := go; + + if go = '1' then + v.e.valid := actions.complete; + v.taken_branch_event := actions.take_branch; + v.br_taken := actions.take_branch; + v.trace_next := actions.do_trace; + v.fp_exception_next := actions.fp_intr; + v.cntz_in_progress := actions.start_cntz; + + if actions.write_msr = '1' then + ctrl_tmp.msr <= actions.new_msr; + end if; + if actions.write_xerlow = '1' then + ctrl_tmp.xer_low <= c_in(17 downto 0); + end if; + if actions.write_dec = '1' then + ctrl_tmp.dec <= c_in; + end if; + if actions.write_cfar = '1' then + ctrl_tmp.cfar <= e_in.nia; + end if; + if actions.write_loga = '1' then + v.log_addr_spr := c_in(31 downto 0); + elsif actions.inc_loga = '1' then + v.log_addr_spr := std_ulogic_vector(unsigned(r.log_addr_spr) + 1); + end if; + x_to_pmu.mtspr <= actions.write_pmuspr; + icache_inval <= actions.icache_inval; + x_to_multiply.valid <= actions.start_mul; + v.mul_in_progress := actions.start_mul; + x_to_divider.valid <= actions.start_div; + v.div_in_progress := actions.start_div; + v.terminate := actions.terminate; + v.br_mispredict := v.e.redirect and actions.direct_branch; + v.busy := actions.start_cntz or actions.start_mul or actions.start_div; + exception := actions.trap; - elsif valid_in = '1' and exception = '0' and illegal = '0' then -- instruction for other units, i.e. LDST if e_in.unit = LDST then lv.valid := '1'; - elsif e_in.unit = NONE then - illegal := '1'; - elsif HAS_FPU and e_in.unit = FPU then - fv.valid := '1'; end if; - -- Handling an ITLB miss doesn't count as having executed an instruction - if e_in.insn_type = OP_FETCH_FAILED then - do_trace := '0'; + if HAS_FPU and e_in.unit = FPU then + fv.valid := '1'; end if; end if; @@ -1222,38 +1340,44 @@ begin if r.cntz_in_progress = '1' then -- cnt[lt]z and popcnt* always take two cycles v.e.valid := '1'; - elsif r.mul_in_progress = '1' or r.div_in_progress = '1' then - if (r.mul_in_progress = '1' and multiply_to_x.valid = '1') or - (r.div_in_progress = '1' and divider_to_x.valid = '1') then - if r.mul_in_progress = '1' then - overflow := '0'; - else - overflow := divider_to_x.overflow; - end if; - if r.mul_in_progress = '1' and current.oe = '1' then + v.e.write_data := countbits_result; + end if; + if r.div_in_progress = '1' then + if divider_to_x.valid = '1' then + v.e.write_data := muldiv_result; + overflow := divider_to_x.overflow; + -- We must test oe because the RC update code in writeback + -- will use the xerc value to set CR0:SO so we must not clobber + -- xerc if OE wasn't set. + if r.oe = '1' then + v.e.xerc.ov := overflow; + v.e.xerc.ov32 := overflow; + if overflow = '1' then + v.e.xerc.so := '1'; + end if; + end if; + v.e.valid := '1'; + else + v.busy := '1'; + v.div_in_progress := '1'; + end if; + end if; + if r.mul_in_progress = '1' then + if multiply_to_x.valid = '1' then + v.e.write_data := muldiv_result; + if r.oe = '1' then -- have to wait until next cycle for overflow indication v.mul_finish := '1'; v.busy := '1'; else - -- We must test oe because the RC update code in writeback - -- will use the xerc value to set CR0:SO so we must not clobber - -- xerc if OE wasn't set. - if current.oe = '1' then - v.e.xerc.ov := overflow; - v.e.xerc.ov32 := overflow; - if overflow = '1' then - v.e.xerc.so := '1'; - end if; - end if; v.e.valid := '1'; end if; else v.busy := '1'; - v.mul_in_progress := r.mul_in_progress; - v.div_in_progress := r.div_in_progress; + v.mul_in_progress := '1'; end if; - elsif r.mul_finish = '1' then - hold_wr_data := '1'; + end if; + if r.mul_finish = '1' then v.e.xerc.ov := multiply_to_x.overflow; v.e.xerc.ov32 := multiply_to_x.overflow; if multiply_to_x.overflow = '1' then @@ -1262,24 +1386,11 @@ begin v.e.valid := '1'; end if; - if illegal = '1' then - exception := '1'; - v.e.intr_vec := 16#700#; - -- Since we aren't doing Hypervisor emulation assist (0xe40) we - -- set bit 44 to indicate we have an illegal - v.e.srr1(47 - 44) := '1'; - report "illegal"; - end if; - v.e.interrupt := exception and not (l_in.in_progress or l_in.interrupt); if v.e.interrupt = '1' then v.intr_pending := '0'; end if; - if do_trace = '1' then - v.trace_next := '1'; - end if; - if interrupt_in = '1' then ctrl_tmp.msr(MSR_SF) <= '1'; ctrl_tmp.msr(MSR_EE) <= '0'; @@ -1298,32 +1409,13 @@ begin v.intr_pending := '0'; end if; - if hold_wr_data = '0' then - v.e.write_data := alu_result; - else - v.e.write_data := r.e.write_data; - end if; - v.e.write_reg := current.write_reg; - v.e.write_enable := current.write_reg_enable and v.e.valid and not exception; - v.e.rc := current.rc and v.e.valid and not exception; - v.e.write_cr_data := write_cr_data; - v.e.write_cr_mask := write_cr_mask; - v.e.write_cr_enable := current.output_cr and v.e.valid and not exception; - v.e.write_xerc_enable := current.output_xer and v.e.valid and not exception; - - bypass_data.tag.valid <= current.instr_tag.valid and current.write_reg_enable and v.e.valid; - bypass_data.tag.tag <= current.instr_tag.tag; + bypass_data.tag.valid <= v.e.write_enable and v.e.valid; + bypass_data.tag.tag <= v.e.instr_tag.tag; bypass_data.data <= v.e.write_data; - bypass_cr_data.tag.valid <= current.instr_tag.valid and current.output_cr and v.e.valid; - bypass_cr_data.tag.tag <= current.instr_tag.tag; - for i in 0 to 7 loop - if v.e.write_cr_mask(i) = '1' then - bypass_cr_data.data(i*4 + 3 downto i*4) <= v.e.write_cr_data(i*4 + 3 downto i*4); - else - bypass_cr_data.data(i*4 + 3 downto i*4) <= cr_in(i*4 + 3 downto i*4); - end if; - end loop; + bypass_cr_data.tag.valid <= v.e.write_cr_enable and v.e.valid; + bypass_cr_data.tag.tag <= v.e.instr_tag.tag; + bypass_cr_data.data <= v.e.write_cr_data; -- Outputs to loadstore1 (async) lv.op := e_in.insn_type; @@ -1373,6 +1465,13 @@ begin -- update outputs l_out <= lv; e_out <= r.e; + if r.e.valid = '0' then + e_out.write_enable <= '0'; + e_out.write_cr_enable <= '0'; + e_out.write_xerc_enable <= '0'; + e_out.redirect <= '0'; + e_out.br_last <= '0'; + end if; e_out.msr <= msr_copy(ctrl.msr); fp_out <= fv; @@ -1394,7 +1493,7 @@ begin "000" & r.e.write_enable & r.e.valid & - (r.e.redirect or r.e.interrupt) & + ((r.e.redirect and r.e.valid) or r.e.interrupt) & r.busy & flush_in; end if;