From: Paul Mackerras Date: Mon, 11 Jul 2022 22:52:05 +0000 (+1000) Subject: Start removing SPRs from register file X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=bc4d02cb0dcc5b502a45651953ac7bd34521f0b9;p=microwatt.git Start removing SPRs from register file This starts the process of removing SPRs from the register file by moving SRR0/1, SPRG0-3, HSRR0/1 and HSPRG0/1 out of the register file and putting them into execute1. They are stored in a pair of small RAM arrays, referred to as "even" and "odd". The reason for having two arrays is so that two values can be read and written in each cycle. For example, SRR0 and SRR1 can be written in parallel by an interrupt and read in parallel by the rfid instruction. The addresses in the RAM which will be accessed are determined in the decode2 stage. We have one write address for both sides, but two read addresses, since in future we will want to be able to read CTR at the same time as either LR or TAR. We now have a connection from writeback to execute1 which carries the partial SRR1 value for an interrupt. SRR0 comes from the execute pipeline; we no longer need to carry instruction addresses along the LSU and FPU pipelines. Since SRR0 and SRR1 can be written in the same cycle now, we don't need the little state machine in writeback any more. Signed-off-by: Paul Mackerras --- diff --git a/common.vhdl b/common.vhdl index f846fb4..74341d1 100644 --- a/common.vhdl +++ b/common.vhdl @@ -124,6 +124,28 @@ package common is end record; constant xerc_init : xer_common_t := (others => '0'); + -- Some SPRs are stored in a pair of small RAMs in execute1 + -- Even half: + subtype ramspr_index is natural range 0 to 7; + constant RAMSPR_SRR0 : ramspr_index := 0; + constant RAMSPR_HSRR0 : ramspr_index := 1; + constant RAMSPR_SPRG0 : ramspr_index := 2; + constant RAMSPR_SPRG2 : ramspr_index := 3; + constant RAMSPR_HSPRG0 : ramspr_index := 4; + -- Odd half: + constant RAMSPR_SRR1 : ramspr_index := 0; + constant RAMSPR_HSRR1 : ramspr_index := 1; + constant RAMSPR_SPRG1 : ramspr_index := 2; + constant RAMSPR_SPRG3 : ramspr_index := 3; + constant RAMSPR_HSPRG1 : ramspr_index := 4; + + type ram_spr_info is record + index : ramspr_index; + isodd : std_ulogic; + valid : std_ulogic; + end record; + constant ram_spr_info_init: ram_spr_info := (index => 0, others => '0'); + subtype spr_selector is std_ulogic_vector(2 downto 0); type spr_id is record sel : spr_selector; @@ -253,12 +275,13 @@ package common is br_pred: std_ulogic; -- Branch was predicted to be taken big_endian: std_ulogic; spr_info : spr_id; + ram_spr : ram_spr_info; end record; constant Decode1ToDecode2Init : Decode1ToDecode2Type := (valid => '0', stop_mark => '0', nia => (others => '0'), insn => (others => '0'), ispr1 => (others => '0'), ispr2 => (others => '0'), ispro => (others => '0'), decode => decode_rom_init, br_pred => '0', big_endian => '0', - spr_info => spr_id_init); + spr_info => spr_id_init, ram_spr => ram_spr_info_init); type Decode1ToFetch1Type is record redirect : std_ulogic; @@ -320,6 +343,13 @@ package common is repeat : std_ulogic; -- set if instruction is cracked into two ops second : std_ulogic; -- set if this is the second op spr_select : spr_id; + spr_is_ram : std_ulogic; + ramspr_even_rdaddr : ramspr_index; + ramspr_odd_rdaddr : ramspr_index; + ramspr_rd_odd : std_ulogic; + ramspr_wraddr : ramspr_index; + ramspr_write_even : std_ulogic; + ramspr_write_odd : std_ulogic; end record; constant Decode2ToExecute1Init : Decode2ToExecute1Type := (valid => '0', unit => NONE, fac => NONE, insn_type => OP_ILLEGAL, instr_tag => instr_tag_init, @@ -333,6 +363,9 @@ package common is cr => (others => '0'), insn => (others => '0'), data_len => (others => '0'), result_sel => "000", sub_select => "000", repeat => '0', second => '0', spr_select => spr_id_init, + spr_is_ram => '0', + ramspr_even_rdaddr => 0, ramspr_odd_rdaddr => 0, ramspr_rd_odd => '0', + ramspr_wraddr => 0, ramspr_write_even => '0', ramspr_write_odd => '0', others => (others => '0')); type MultiplyInputType is record @@ -574,7 +607,6 @@ package common is store_done : std_ulogic; interrupt : std_ulogic; intr_vec : intr_vector_t; - srr0: std_ulogic_vector(63 downto 0); srr1: std_ulogic_vector(15 downto 0); end record; constant Loadstore1ToWritebackInit : Loadstore1ToWritebackType := @@ -582,7 +614,7 @@ package common is write_reg => (others => '0'), write_data => (others => '0'), xerc => xerc_init, rc => '0', store_done => '0', interrupt => '0', intr_vec => 0, - srr0 => (others => '0'), srr1 => (others => '0')); + srr1 => (others => '0')); type Loadstore1EventType is record load_complete : std_ulogic; @@ -675,7 +707,6 @@ package common is write_xerc : std_ulogic; xerc : xer_common_t; intr_vec : intr_vector_t; - srr0 : std_ulogic_vector(63 downto 0); srr1 : std_ulogic_vector(15 downto 0); end record; constant FPUToWritebackInit : FPUToWritebackType := @@ -731,6 +762,11 @@ package common is write_cr_mask => (others => '0'), write_cr_data => (others => '0')); + type WritebackToExecute1Type is record + intr : std_ulogic; + srr1 : std_ulogic_vector(15 downto 0); + end record; + type WritebackEventType is record instr_complete : std_ulogic; fp_complete : std_ulogic; @@ -755,26 +791,6 @@ package body common is n := 0; -- N.B. decode2 relies on this specific value when SPR_CTR => n := 1; -- N.B. decode2 relies on this specific value - when SPR_SRR0 => - n := 2; - when SPR_SRR1 => - n := 3; - when SPR_HSRR0 => - n := 4; - when SPR_HSRR1 => - n := 5; - when SPR_SPRG0 => - n := 6; - when SPR_SPRG1 => - n := 7; - when SPR_SPRG2 => - n := 8; - when SPR_SPRG3 | SPR_SPRG3U => - n := 9; - when SPR_HSPRG0 => - n := 10; - when SPR_HSPRG1 => - n := 11; when SPR_TAR => n := 13; when others => diff --git a/core.vhdl b/core.vhdl index ba8f0cc..b2f2704 100644 --- a/core.vhdl +++ b/core.vhdl @@ -102,6 +102,7 @@ architecture behave of core is -- Writeback signals signal writeback_bypass: bypass_data_t; + signal wb_interrupt: WritebackToExecute1Type; -- local signals signal fetch1_stall_in : std_ulogic; @@ -122,7 +123,6 @@ architecture behave of core is signal complete: instr_tag_t; signal terminate: std_ulogic; signal core_rst: std_ulogic; - signal do_interrupt: std_ulogic; -- Delayed/Latched resets and alt_reset signal rst_fetch1 : std_ulogic; @@ -361,7 +361,7 @@ begin l_in => loadstore1_to_execute1, fp_in => fpu_to_execute1, ext_irq_in => ext_irq, - interrupt_in => do_interrupt, + interrupt_in => wb_interrupt, l_out => execute1_to_loadstore1, fp_out => execute1_to_fpu, e_out => execute1_to_writeback, @@ -469,7 +469,7 @@ begin f_out => writeback_to_fetch1, wb_bypass => writeback_bypass, events => writeback_events, - interrupt_out => do_interrupt, + interrupt_out => wb_interrupt, complete_out => complete ); diff --git a/decode1.vhdl b/decode1.vhdl index 2e2a8e3..fd01d61 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -181,7 +181,7 @@ architecture behaviour of decode1 is -- isync 2#111# => (ALU, NONE, OP_ISYNC, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- rfid - 2#101# => (ALU, NONE, OP_RFID, SPR, SPR, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), + 2#101# => (ALU, NONE, OP_RFID, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), others => illegal_inst ); @@ -525,6 +525,42 @@ architecture behaviour of decode1 is constant nop_instr : decode_rom_t := (ALU, NONE, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE); constant fetch_fail_inst: decode_rom_t := (LDST, NONE, OP_FETCH_FAILED, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE); + function decode_ram_spr(sprn : spr_num_t) return ram_spr_info is + variable ret : ram_spr_info; + begin + ret := (index => 0, isodd => '0', valid => '1'); + case sprn is + when SPR_SRR0 => + ret.index := RAMSPR_SRR0; + when SPR_SRR1 => + ret.index := RAMSPR_SRR1; + ret.isodd := '1'; + when SPR_HSRR0 => + ret.index := RAMSPR_HSRR0; + when SPR_HSRR1 => + ret.index := RAMSPR_HSRR1; + ret.isodd := '1'; + when SPR_SPRG0 => + ret.index := RAMSPR_SPRG0; + when SPR_SPRG1 => + ret.index := RAMSPR_SPRG1; + ret.isodd := '1'; + when SPR_SPRG2 => + ret.index := RAMSPR_SPRG2; + when SPR_SPRG3 | SPR_SPRG3U => + ret.index := RAMSPR_SPRG3; + ret.isodd := '1'; + when SPR_HSPRG0 => + ret.index := RAMSPR_HSPRG0; + when SPR_HSPRG1 => + ret.index := RAMSPR_HSPRG1; + ret.isodd := '1'; + when others => + ret.valid := '0'; + end case; + return ret; + end; + function map_spr(sprn : spr_num_t) return spr_id is variable i : spr_id; begin @@ -614,6 +650,7 @@ begin sprn := decode_spr_num(f_in.insn); v.spr_info := map_spr(sprn); + v.ram_spr := decode_ram_spr(sprn); case to_integer(unsigned(majorop)) is when 4 => @@ -632,17 +669,17 @@ begin if std_match(f_in.insn(10 downto 1), "01-1010011") then -- mfspr or mtspr - if is_fast_spr(v.ispr1) = '0' then - -- Make mtspr to slow SPRs single issue + -- Make mtspr to slow SPRs single issue + if v.spr_info.valid = '1' then vi.force_single := f_in.insn(8); - -- send MMU-related SPRs to loadstore1 - case sprn is - when SPR_DAR | SPR_DSISR | SPR_PID | SPR_PTCR => - vi.override_decode.unit := LDST; - vi.override_unit := '1'; - when others => - end case; end if; + -- send MMU-related SPRs to loadstore1 + case sprn is + when SPR_DAR | SPR_DSISR | SPR_PID | SPR_PTCR => + vi.override_decode.unit := LDST; + vi.override_unit := '1'; + when others => + end case; end if; when 16 => @@ -690,10 +727,6 @@ begin else v.ispr2 := fast_spr_num(SPR_TAR); end if; - else - -- Could be OP_RFID - v.ispr1 := fast_spr_num(SPR_SRR1); - v.ispr2 := fast_spr_num(SPR_SRR0); end if; when 24 => diff --git a/decode2.vhdl b/decode2.vhdl index a043ef9..c76b7f5 100644 --- a/decode2.vhdl +++ b/decode2.vhdl @@ -480,6 +480,23 @@ begin v.e.spr_select := d_in.spr_info; + case op is + when OP_MFSPR => + v.e.ramspr_even_rdaddr := d_in.ram_spr.index; + v.e.ramspr_odd_rdaddr := d_in.ram_spr.index; + v.e.ramspr_rd_odd := d_in.ram_spr.isodd; + v.e.spr_is_ram := d_in.ram_spr.valid; + when OP_MTSPR => + v.e.ramspr_wraddr := d_in.ram_spr.index; + v.e.ramspr_write_even := d_in.ram_spr.valid and not d_in.ram_spr.isodd; + v.e.ramspr_write_odd := d_in.ram_spr.valid and d_in.ram_spr.isodd; + v.e.spr_is_ram := d_in.ram_spr.valid; + when OP_RFID => + v.e.ramspr_even_rdaddr := RAMSPR_SRR0; + v.e.ramspr_odd_rdaddr := RAMSPR_SRR1; + when others => + end case; + case d_in.decode.length is when is1B => length := "0001"; @@ -530,6 +547,8 @@ begin if op = OP_MFSPR then if is_fast_spr(d_in.ispr1) = '1' then v.e.result_sel := "000"; -- adder_result, effectively a_in + elsif d_in.ram_spr.valid = '1' then + v.e.result_sel := "101"; -- ramspr_result elsif d_in.spr_info.valid = '0' then -- Privileged mfspr to invalid/unimplemented SPR numbers -- writes the contents of RT back to RT (i.e. it's a no-op) diff --git a/execute1.vhdl b/execute1.vhdl index 2efe439..b0b2f98 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -31,7 +31,7 @@ entity execute1 is fp_in : in FPUToExecute1Type; ext_irq_in : std_ulogic; - interrupt_in : std_ulogic; + interrupt_in : WritebackToExecute1Type; -- asynchronous l_out : out Execute1ToLoadstore1Type; @@ -72,6 +72,8 @@ architecture behaviour of execute1 is write_loga : std_ulogic; inc_loga : std_ulogic; write_pmuspr : std_ulogic; + ramspr_write_even : std_ulogic; + ramspr_write_odd : std_ulogic; end record; constant side_effect_init : side_effect_type := (others => '0'); @@ -119,6 +121,7 @@ architecture behaviour of execute1 is msr : std_ulogic_vector(63 downto 0); xerc : xer_common_t; xerc_valid : std_ulogic; + ramspr_wraddr : ramspr_index; end record; constant reg_stage1_type_init : reg_stage1_type := (e => Execute1ToWritebackInit, se => side_effect_init, @@ -130,7 +133,8 @@ architecture behaviour of execute1 is no_instr_avail => '0', instr_dispatch => '0', ext_interrupt => '0', taken_branch_event => '0', br_mispredict => '0', msr => 64x"0", - xerc => xerc_init, xerc_valid => '0'); + xerc => xerc_init, xerc_valid => '0', + ramspr_wraddr => 0); type reg_stage2_type is record e : Execute1ToWritebackType; @@ -203,6 +207,20 @@ architecture behaviour of execute1 is signal exception_log : std_ulogic; signal irq_valid_log : std_ulogic; + -- SPR-related signals + type ramspr_half_t is array(ramspr_index) of std_ulogic_vector(63 downto 0); + signal even_sprs : ramspr_half_t := (others => (others => '0')); + signal odd_sprs : ramspr_half_t := (others => (others => '0')); + signal ramspr_even : std_ulogic_vector(63 downto 0); + signal ramspr_odd : std_ulogic_vector(63 downto 0); + signal ramspr_result : std_ulogic_vector(63 downto 0); + signal ramspr_rd_odd : std_ulogic; + signal ramspr_wr_addr : ramspr_index; + signal ramspr_even_wr_data : std_ulogic_vector(63 downto 0); + signal ramspr_even_wr_enab : std_ulogic; + signal ramspr_odd_wr_data : std_ulogic_vector(63 downto 0); + signal ramspr_odd_wr_enab : std_ulogic; + signal stage2_stall : std_ulogic; type privilege_level is (USER, SUPER); @@ -289,6 +307,18 @@ architecture behaviour of execute1 is return msr_out; end; + function intr_srr1(msr: std_ulogic_vector; flags: std_ulogic_vector) + return std_ulogic_vector is + variable srr1: std_ulogic_vector(63 downto 0); + begin + srr1(63 downto 31) := msr(63 downto 31); + srr1(30 downto 27) := flags(14 downto 11); + srr1(26 downto 22) := msr(26 downto 22); + srr1(21 downto 16) := flags(5 downto 0); + srr1(15 downto 0) := msr(15 downto 0); + return srr1; + end; + -- Work out whether a signed value fits into n bits, -- that is, see if it is in the range -2^(n-1) .. 2^(n-1) - 1 function fits_in_n_bits(val: std_ulogic_vector; n: integer) return boolean is @@ -456,6 +486,78 @@ begin valid_in <= e_in.valid and not (busy_out or flush_in or ex1.e.redirect or ex1.e.interrupt); + -- SPRs stored in two small RAM arrays (two so that we can read and write + -- two SPRs in each cycle). + + ramspr_read: process(all) + variable even_rd_data, odd_rd_data : std_ulogic_vector(63 downto 0); + variable wr_addr : ramspr_index; + variable even_wr_enab, odd_wr_enab : std_ulogic; + variable even_wr_data, odd_wr_data : std_ulogic_vector(63 downto 0); + variable doit : std_ulogic; + begin + -- Read address mux and async RAM reading + even_rd_data := even_sprs(e_in.ramspr_even_rdaddr); + odd_rd_data := odd_sprs(e_in.ramspr_odd_rdaddr); + + -- Write address and data muxes + doit := ex1.e.valid and not stage2_stall and not flush_in; + even_wr_enab := (ex1.se.ramspr_write_even and doit) or interrupt_in.intr; + odd_wr_enab := (ex1.se.ramspr_write_odd and doit) or interrupt_in.intr; + if interrupt_in.intr = '1' then + wr_addr := RAMSPR_SRR0; + else + wr_addr := ex1.ramspr_wraddr; + end if; + if interrupt_in.intr = '1' then + even_wr_data := ex2.e.last_nia; + odd_wr_data := intr_srr1(ctrl.msr, interrupt_in.srr1); + else + even_wr_data := ex1.e.write_data; + odd_wr_data := ex1.e.write_data; + end if; + ramspr_wr_addr <= wr_addr; + ramspr_even_wr_data <= even_wr_data; + ramspr_even_wr_enab <= even_wr_enab; + ramspr_odd_wr_data <= odd_wr_data; + ramspr_odd_wr_enab <= odd_wr_enab; + + -- SPR RAM read with write data bypass + -- We assume no instruction executes in the cycle immediately following + -- an interrupt, so we don't need to bypass interrupt data + if ex1.se.ramspr_write_even = '1' and e_in.ramspr_even_rdaddr = ex1.ramspr_wraddr then + ramspr_even <= ex1.e.write_data; + else + ramspr_even <= even_rd_data; + end if; + if ex1.se.ramspr_write_odd = '1' and e_in.ramspr_odd_rdaddr = ex1.ramspr_wraddr then + ramspr_odd <= ex1.e.write_data; + else + ramspr_odd <= odd_rd_data; + end if; + if e_in.ramspr_rd_odd = '0' then + ramspr_result <= ramspr_even; + else + ramspr_result <= ramspr_odd; + end if; + end process; + + ramspr_write: process(clk) + begin + if rising_edge(clk) then + if ramspr_even_wr_enab = '1' then + even_sprs(ramspr_wr_addr) <= ramspr_even_wr_data; + report "writing even spr " & integer'image(ramspr_wr_addr) & " data=" & + to_hstring(ramspr_even_wr_data); + end if; + if ramspr_odd_wr_enab = '1' then + odd_sprs(ramspr_wr_addr) <= ramspr_odd_wr_data; + report "writing odd spr " & integer'image(ramspr_wr_addr) & " data=" & + to_hstring(ramspr_odd_wr_data); + end if; + end if; + end process; + -- First stage result mux s1_sel <= e_in.result_sel when ex1.busy = '0' else "100"; with s1_sel select alu_result <= @@ -464,6 +566,7 @@ begin rotator_result when "010", shortmul_result when "011", muldiv_result when "100", + ramspr_result when "101", next_nia when "110", misc_result when others; @@ -830,6 +933,7 @@ begin variable privileged : std_ulogic; variable slow_op : std_ulogic; variable owait : std_ulogic; + variable srr1 : std_ulogic_vector(63 downto 0); begin v := actions_type_init; v.e.write_data := alu_result; @@ -850,6 +954,9 @@ begin v.e.last_nia := e_in.nia; v.e.br_offset := 64x"4"; + v.se.ramspr_write_even := e_in.ramspr_write_even; + v.se.ramspr_write_odd := e_in.ramspr_write_odd; + -- Note the difference between v.exception and v.trap: -- v.exception signals a condition that prevents execution of the -- instruction, and hence shouldn't depend on operand data, so as to @@ -1009,26 +1116,27 @@ begin end if; when OP_RFID => - v.e.redir_mode := (a_in(MSR_IR) or a_in(MSR_PR)) & not a_in(MSR_PR) & - not a_in(MSR_LE) & not a_in(MSR_SF); + srr1 := ramspr_odd; + v.e.redir_mode := (srr1(MSR_IR) or srr1(MSR_PR)) & not srr1(MSR_PR) & + not srr1(MSR_LE) & not srr1(MSR_SF); -- Can't use msr_copy here because the partial function MSR -- bits should be left unchanged, not zeroed. - v.new_msr(63 downto 31) := a_in(63 downto 31); - v.new_msr(26 downto 22) := a_in(26 downto 22); - v.new_msr(15 downto 0) := a_in(15 downto 0); - if a_in(MSR_PR) = '1' then + v.new_msr(63 downto 31) := srr1(63 downto 31); + v.new_msr(26 downto 22) := srr1(26 downto 22); + v.new_msr(15 downto 0) := srr1(15 downto 0); + if srr1(MSR_PR) = '1' then v.new_msr(MSR_EE) := '1'; v.new_msr(MSR_IR) := '1'; v.new_msr(MSR_DR) := '1'; end if; v.se.write_msr := '1'; - v.e.br_offset := b_in; + v.e.br_offset := ramspr_even; v.e.abs_br := '1'; v.e.redirect := '1'; v.se.write_cfar := '1'; if HAS_FPU then v.fp_intr := fp_in.exception and - (a_in(MSR_FE0) or a_in(MSR_FE1)); + (srr1(MSR_FE0) or srr1(MSR_FE1)); end if; v.do_trace := '0'; @@ -1041,10 +1149,10 @@ begin when OP_DARN => when OP_MFMSR => when OP_MFSPR => - if is_fast_spr(e_in.read_reg1) = '1' then + if is_fast_spr(e_in.read_reg1) = '1' or e_in.spr_is_ram = '1' then if e_in.valid = '1' then report "MFSPR to SPR " & integer'image(decode_spr_num(e_in.insn)) & - "=" & to_hstring(a_in); + "=" & to_hstring(alu_result); end if; elsif e_in.spr_select.valid = '1' then if e_in.valid = '1' then @@ -1121,7 +1229,9 @@ begin v.se.write_loga := '1'; when others => end case; - elsif is_fast_spr(e_in.write_reg) = '0' then + end if; + if e_in.spr_select.valid = '0' and is_fast_spr(e_in.write_reg) = '0' and + e_in.spr_is_ram = '0' then -- mtspr to unimplemented SPRs should be a nop in -- supervisor mode and a program interrupt for user mode if ex1.msr(MSR_PR) = '1' then @@ -1232,6 +1342,7 @@ begin v.pmu_spr_num := e_in.insn(20 downto 16); v.mul_select := e_in.sub_select(1 downto 0); v.se := side_effect_init; + v.ramspr_wraddr := e_in.ramspr_wraddr; end if; lv := Execute1ToLoadstore1Init; @@ -1402,10 +1513,10 @@ begin v.mul_finish := '0'; v.xerc_valid := '0'; end if; - if flush_in = '1' or interrupt_in = '1' then + if flush_in = '1' or interrupt_in.intr = '1' then v.msr := ctrl_tmp.msr; end if; - if interrupt_in = '1' then + if interrupt_in.intr = '1' then v.trace_next := '0'; v.fp_exception_next := '0'; end if; @@ -1449,7 +1560,6 @@ begin -- Outputs to FPU fv.op := e_in.insn_type; - fv.nia := e_in.nia; fv.insn := e_in.insn; fv.itag := e_in.instr_tag; fv.single := e_in.is_32bit; @@ -1607,7 +1717,7 @@ begin x_to_pmu.mtspr <= ex1.se.write_pmuspr; end if; - if interrupt_in = '1' then + if interrupt_in.intr = '1' then ctrl_tmp.msr(MSR_SF) <= '1'; ctrl_tmp.msr(MSR_EE) <= '0'; ctrl_tmp.msr(MSR_PR) <= '0'; @@ -1659,7 +1769,7 @@ begin ctrl.msr(MSR_IR) & ctrl.msr(MSR_DR) & exception_log & irq_valid_log & - interrupt_in & + interrupt_in.intr & "000" & ex2.e.write_enable & ex2.e.valid & diff --git a/fpu.vhdl b/fpu.vhdl index 90e04b3..2dd221e 100644 --- a/fpu.vhdl +++ b/fpu.vhdl @@ -99,7 +99,6 @@ architecture behaviour of fpu is illegal : std_ulogic; op : insn_type_t; insn : std_ulogic_vector(31 downto 0); - nia : std_ulogic_vector(63 downto 0); instr_tag : instr_tag_t; dest_fpr : gspr_index_t; fe_mode : std_ulogic; @@ -669,7 +668,6 @@ begin w_out.xerc <= r.xerc_result; w_out.interrupt <= r.do_intr; w_out.intr_vec <= 16#700#; - w_out.srr0 <= r.nia; w_out.srr1 <= (47-44 => r.illegal, 47-43 => not r.illegal, others => '0'); fpu_1: process(all) @@ -756,7 +754,6 @@ begin -- capture incoming instruction if e_in.valid = '1' then v.insn := e_in.insn; - v.nia := e_in.nia; v.op := e_in.op; v.instr_tag := e_in.itag; v.fe_mode := or (e_in.fe_mode); diff --git a/loadstore1.vhdl b/loadstore1.vhdl index 7fad454..b556211 100644 --- a/loadstore1.vhdl +++ b/loadstore1.vhdl @@ -90,7 +90,6 @@ architecture behave of loadstore1 is dword_index : std_ulogic; two_dwords : std_ulogic; incomplete : std_ulogic; - nia : std_ulogic_vector(63 downto 0); end record; constant request_init : request_t := (valid => '0', dc_req => '0', load => '0', store => '0', tlbie => '0', dcbz => '0', read_spr => '0', write_spr => '0', mmu_op => '0', @@ -105,8 +104,7 @@ architecture behave of loadstore1 is atomic => '0', atomic_last => '0', rc => '0', nc => '0', virt_mode => '0', priv_mode => '0', load_sp => '0', sprn => 10x"0", is_slbia => '0', align_intr => '0', - dword_index => '0', two_dwords => '0', incomplete => '0', - nia => (others => '0')); + dword_index => '0', two_dwords => '0', incomplete => '0'); type reg_stage1_t is record req : request_t; @@ -146,7 +144,6 @@ architecture behave of loadstore1 is stage1_en : std_ulogic; interrupt : std_ulogic; intr_vec : integer range 0 to 16#fff#; - nia : std_ulogic_vector(63 downto 0); srr1 : std_ulogic_vector(15 downto 0); events : Loadstore1EventType; end record; @@ -412,7 +409,6 @@ begin v.virt_mode := l_in.virt_mode; v.priv_mode := l_in.priv_mode; v.sprn := sprn; - v.nia := l_in.nia; lsu_sum := std_ulogic_vector(unsigned(l_in.addr1) + unsigned(l_in.addr2)); @@ -866,7 +862,6 @@ begin -- or ISI or ISegI for instruction fetch exceptions v.interrupt := exception; if exception = '1' then - v.nia := r2.req.nia; if r2.req.align_intr = '1' then v.intr_vec := 16#600#; v.dar := r2.req.addr; @@ -962,7 +957,6 @@ begin l_out.store_done <= d_in.store_done; l_out.interrupt <= r3.interrupt; l_out.intr_vec <= r3.intr_vec; - l_out.srr0 <= r3.nia; l_out.srr1 <= r3.srr1; -- update busy signal back to execute1 diff --git a/writeback.vhdl b/writeback.vhdl index 5b384c6..2f6af2c 100644 --- a/writeback.vhdl +++ b/writeback.vhdl @@ -25,20 +25,12 @@ entity writeback is events : out WritebackEventType; flush_out : out std_ulogic; - interrupt_out: out std_ulogic; + interrupt_out: out WritebackToExecute1Type; complete_out : out instr_tag_t ); end entity writeback; architecture behaviour of writeback is - type irq_state_t is (WRITE_SRR0, WRITE_SRR1); - - type reg_type is record - state : irq_state_t; - srr1 : std_ulogic_vector(63 downto 0); - end record; - - signal r, rin : reg_type; begin writeback_0: process(clk) @@ -47,13 +39,6 @@ begin variable w : std_ulogic_vector(0 downto 0); begin if rising_edge(clk) then - if rst = '1' then - r.state <= WRITE_SRR0; - r.srr1 <= (others => '0'); - else - r <= rin; - end if; - -- Do consistency checks only on the clock edge x(0) := e_in.valid; y(0) := l_in.valid; @@ -82,7 +67,6 @@ begin end process; writeback_1: process(all) - variable v : reg_type; variable f : WritebackToFetch1Type; variable scf : std_ulogic_vector(3 downto 0); variable vec : integer range 0 to 16#fff#; @@ -92,9 +76,7 @@ begin w_out <= WritebackToRegisterFileInit; c_out <= WritebackToCrFileInit; f := WritebackToFetch1Init; - interrupt_out <= '0'; vec := 0; - v := r; complete_out <= instr_tag_init; if e_in.valid = '1' then @@ -108,37 +90,21 @@ begin events.fp_complete <= fp_in.valid; intr := e_in.interrupt or l_in.interrupt or fp_in.interrupt; + interrupt_out.intr <= intr; - if r.state = WRITE_SRR1 then - w_out.write_reg <= fast_spr_num(SPR_SRR1); - w_out.write_data <= r.srr1; - w_out.write_enable <= '1'; - interrupt_out <= '1'; - v.state := WRITE_SRR0; - - elsif intr = '1' then - w_out.write_reg <= fast_spr_num(SPR_SRR0); - w_out.write_enable <= '1'; - v.state := WRITE_SRR1; + if intr = '1' then srr1 := (others => '0'); if e_in.interrupt = '1' then vec := e_in.intr_vec; - w_out.write_data <= e_in.last_nia; srr1 := e_in.srr1; elsif l_in.interrupt = '1' then vec := l_in.intr_vec; - w_out.write_data <= l_in.srr0; srr1 := l_in.srr1; elsif fp_in.interrupt = '1' then vec := fp_in.intr_vec; - w_out.write_data <= fp_in.srr0; srr1 := fp_in.srr1; end if; - v.srr1(63 downto 31) := e_in.msr(63 downto 31); - v.srr1(30 downto 27) := srr1(14 downto 11); - v.srr1(26 downto 22) := e_in.msr(26 downto 22); - v.srr1(21 downto 16) := srr1(5 downto 0); - v.srr1(15 downto 0) := e_in.msr(15 downto 0); + interrupt_out.srr1 <= srr1; else if e_in.write_enable = '1' then @@ -229,6 +195,5 @@ begin wb_bypass.tag.valid <= complete_out.valid and w_out.write_enable; wb_bypass.data <= w_out.write_data; - rin <= v; end process; end;