write_enable: std_ulogic;
write_reg : gpr_index_t;
write_data : std_ulogic_vector(63 downto 0);
- write_len : std_ulogic_vector(3 downto 0);
- write_shift : std_ulogic_vector(2 downto 0);
- sign_extend : std_ulogic;
- byte_reverse : std_ulogic;
- second_word : std_ulogic;
xerc : xer_common_t;
rc : std_ulogic;
store_done : std_ulogic;
end record;
- constant Loadstore1ToWritebackInit : Loadstore1ToWritebackType := (valid => '0', write_enable => '0', sign_extend => '0',
- byte_reverse => '0', second_word => '0', xerc => xerc_init,
+ constant Loadstore1ToWritebackInit : Loadstore1ToWritebackType := (valid => '0', write_enable => '0', xerc => xerc_init,
rc => '0', store_done => '0', others => (others => '0'));
type Execute1ToWritebackType is record
-- latch most of the input request
load : std_ulogic;
addr : std_ulogic_vector(63 downto 0);
- data : std_ulogic_vector(63 downto 0);
+ store_data : std_ulogic_vector(63 downto 0);
+ load_data : std_ulogic_vector(63 downto 0);
write_reg : gpr_index_t;
length : std_ulogic_vector(3 downto 0);
byte_reverse : std_ulogic;
second_bytes : std_ulogic_vector(7 downto 0);
end record;
+ type byte_sel_t is array(0 to 7) of std_ulogic;
+ subtype byte_trim_t is std_ulogic_vector(1 downto 0);
+ type trim_ctl_t is array(0 to 7) of byte_trim_t;
+
signal r, rin : reg_stage_t;
signal lsu_sum : std_ulogic_vector(63 downto 0);
variable byte_offset : unsigned(2 downto 0);
variable j : integer;
variable k : unsigned(2 downto 0);
+ variable kk : unsigned(3 downto 0);
variable long_sel : std_ulogic_vector(15 downto 0);
variable byte_sel : std_ulogic_vector(7 downto 0);
variable req : std_ulogic;
variable wdata : std_ulogic_vector(63 downto 0);
variable write_enable : std_ulogic;
variable do_update : std_ulogic;
- variable second_dword : std_ulogic;
+ variable two_dwords : std_ulogic;
variable done : std_ulogic;
+ variable data_permuted : std_ulogic_vector(63 downto 0);
+ variable data_trimmed : std_ulogic_vector(63 downto 0);
+ variable use_second : byte_sel_t;
+ variable trim_ctl : trim_ctl_t;
+ variable negative : std_ulogic;
begin
v := r;
req := '0';
write_enable := '0';
do_update := '0';
- second_dword := '0';
+ two_dwords := or (r.second_bytes);
+
+ -- load data formatting
+ if r.load = '1' then
+ byte_offset := unsigned(r.addr(2 downto 0));
+ brev_lenm1 := "000";
+ if r.byte_reverse = '1' then
+ brev_lenm1 := unsigned(r.length(2 downto 0)) - 1;
+ end if;
+
+ -- shift and byte-reverse data bytes
+ for i in 0 to 7 loop
+ kk := ('0' & (to_unsigned(i, 3) xor brev_lenm1)) + ('0' & byte_offset);
+ use_second(i) := kk(3);
+ j := to_integer(kk(2 downto 0)) * 8;
+ data_permuted(i * 8 + 7 downto i * 8) := d_in.data(j + 7 downto j);
+ end loop;
+
+ -- Work out the sign bit for sign extension.
+ -- Assumes we are not doing both sign extension and byte reversal,
+ -- in that for unaligned loads crossing two dwords we end up
+ -- using a bit from the second dword, whereas for a byte-reversed
+ -- (i.e. big-endian) load the sign bit would be in the first dword.
+ negative := (r.length(3) and data_permuted(63)) or
+ (r.length(2) and data_permuted(31)) or
+ (r.length(1) and data_permuted(15)) or
+ (r.length(0) and data_permuted(7));
+
+ -- trim and sign-extend
+ for i in 0 to 7 loop
+ if i < to_integer(unsigned(r.length)) then
+ if two_dwords = '1' then
+ trim_ctl(i) := '1' & not use_second(i);
+ else
+ trim_ctl(i) := not use_second(i) & '0';
+ end if;
+ else
+ trim_ctl(i) := '0' & (negative and r.sign_extend);
+ end if;
+ case trim_ctl(i) is
+ when "11" =>
+ data_trimmed(i * 8 + 7 downto i * 8) := r.load_data(i * 8 + 7 downto i * 8);
+ when "10" =>
+ data_trimmed(i * 8 + 7 downto i * 8) := data_permuted(i * 8 + 7 downto i * 8);
+ when "01" =>
+ data_trimmed(i * 8 + 7 downto i * 8) := x"FF";
+ when others =>
+ data_trimmed(i * 8 + 7 downto i * 8) := x"00";
+ end case;
+ end loop;
+ end if;
case r.state is
when IDLE =>
if l_in.valid = '1' then
v.load := l_in.load;
v.addr := lsu_sum;
- v.data := l_in.data;
v.write_reg := l_in.write_reg;
v.length := l_in.length;
v.byte_reverse := l_in.byte_reverse;
for i in 0 to 7 loop
k := (to_unsigned(i, 3) xor brev_lenm1) + byte_offset;
j := to_integer(k) * 8;
- v.data(j + 7 downto j) := l_in.data(i * 8 + 7 downto i * 8);
+ v.store_data(j + 7 downto j) := l_in.data(i * 8 + 7 downto i * 8);
end loop;
end if;
when FIRST_ACK_WAIT =>
stall := '1';
if d_in.valid = '1' then
- write_enable := r.load;
v.state := LAST_ACK_WAIT;
+ if r.load = '1' then
+ v.load_data := data_permuted;
+ end if;
end if;
when LAST_ACK_WAIT =>
stall := '1';
- second_dword := or (r.second_bytes);
if d_in.valid = '1' then
write_enable := r.load;
if r.load = '1' and r.update = '1' then
done := '1';
end case;
- -- Update registers
- rin <= v;
-
-- Update outputs to dcache
d_out.valid <= req;
d_out.load <= v.load;
d_out.nc <= v.nc;
d_out.reserve <= v.reserve;
d_out.addr <= addr;
- d_out.data <= v.data;
+ d_out.data <= v.store_data;
d_out.byte_sel <= byte_sel;
-- Update outputs to writeback
l_out.write_enable <= '1';
l_out.write_reg <= r.update_reg;
l_out.write_data <= r.addr;
- l_out.write_len <= x"8";
- l_out.write_shift <= "000";
- l_out.sign_extend <= '0';
- l_out.byte_reverse <= '0';
- l_out.second_word <= '0';
- l_out.rc <= '0';
- l_out.store_done <= '0';
else
l_out.write_enable <= write_enable;
l_out.write_reg <= r.write_reg;
- l_out.write_data <= d_in.data;
- l_out.write_len <= r.length;
- l_out.write_shift <= r.addr(2 downto 0);
- l_out.sign_extend <= r.sign_extend;
- l_out.byte_reverse <= r.byte_reverse;
- l_out.second_word <= second_dword;
- l_out.rc <= r.rc and done;
- l_out.store_done <= d_in.store_done;
+ l_out.write_data <= data_trimmed;
end if;
l_out.xerc <= r.xerc;
+ l_out.rc <= r.rc and done;
+ l_out.store_done <= d_in.store_done;
stall_out <= stall;
+ -- Update registers
+ rin <= v;
+
end process;
+
end;
end entity writeback;
architecture behaviour of writeback is
- subtype byte_index_t is unsigned(2 downto 0);
- type permutation_t is array(0 to 7) of byte_index_t;
- subtype byte_trim_t is std_ulogic_vector(1 downto 0);
- type trim_ctl_t is array(0 to 7) of byte_trim_t;
- type byte_sel_t is array(0 to 7) of std_ulogic;
-
- signal data_len : unsigned(3 downto 0);
- signal data_in : std_ulogic_vector(63 downto 0);
- signal data_permuted : std_ulogic_vector(63 downto 0);
- signal data_trimmed : std_ulogic_vector(63 downto 0);
- signal data_latched : std_ulogic_vector(63 downto 0);
- signal perm : permutation_t;
- signal use_second : byte_sel_t;
- signal byte_offset : unsigned(2 downto 0);
- signal brev_lenm1 : unsigned(2 downto 0);
- signal trim_ctl : trim_ctl_t;
- signal rc : std_ulogic;
- signal partial_write : std_ulogic;
- signal sign_extend : std_ulogic;
- signal negative : std_ulogic;
- signal second_word : std_ulogic;
begin
- writeback_0: process(clk)
- begin
- if rising_edge(clk) then
- if partial_write = '1' then
- data_latched <= data_permuted;
- end if;
- end if;
- end process;
-
writeback_1: process(all)
variable x : std_ulogic_vector(0 downto 0);
variable y : std_ulogic_vector(0 downto 0);
- variable z : std_ulogic_vector(0 downto 0);
variable w : std_ulogic_vector(0 downto 0);
- variable j : integer;
- variable k : unsigned(3 downto 0);
variable cf: std_ulogic_vector(3 downto 0);
- variable xe: xer_common_t;
variable zero : std_ulogic;
variable sign : std_ulogic;
variable scf : std_ulogic_vector(3 downto 0);
complete_out <= '1';
end if;
- rc <= '0';
- brev_lenm1 <= "000";
- partial_write <= '0';
- second_word <= '0';
- xe := e_in.xerc;
- data_in <= (others => '0');
-
if e_in.write_enable = '1' then
w_out.write_reg <= e_in.write_reg;
+ w_out.write_data <= e_in.write_data;
w_out.write_enable <= '1';
- rc <= e_in.rc;
end if;
if e_in.write_cr_enable = '1' then
c_out.write_xerc_data <= e_in.xerc;
end if;
- sign_extend <= l_in.sign_extend;
- data_len <= unsigned(l_in.write_len);
- byte_offset <= unsigned(l_in.write_shift);
if l_in.write_enable = '1' then
w_out.write_reg <= gpr_to_gspr(l_in.write_reg);
- if l_in.byte_reverse = '1' then
- brev_lenm1 <= unsigned(l_in.write_len(2 downto 0)) - 1;
- end if;
- second_word <= l_in.second_word;
- if l_in.valid = '0' and (data_len + byte_offset > 8) then
- partial_write <= '1';
- end if;
- xe := l_in.xerc;
- w_out.write_enable <= not partial_write or second_word;
+ w_out.write_data <= l_in.write_data;
+ w_out.write_enable <= '1';
end if;
if l_in.rc = '1' then
scf(3) := '0';
scf(2) := '0';
scf(1) := l_in.store_done;
- scf(0) := xe.so;
+ scf(0) := l_in.xerc.so;
c_out.write_cr_enable <= '1';
c_out.write_cr_mask <= num_to_fxm(0);
c_out.write_cr_data(31 downto 28) <= scf;
end if;
- -- shift and byte-reverse data bytes
- for i in 0 to 7 loop
- k := ('0' & (to_unsigned(i, 3) xor brev_lenm1)) + ('0' & byte_offset);
- perm(i) <= k(2 downto 0);
- use_second(i) <= k(3);
- end loop;
- for i in 0 to 7 loop
- j := to_integer(perm(i)) * 8;
- data_permuted(i * 8 + 7 downto i * 8) <= l_in.write_data(j + 7 downto j);
- end loop;
-
- -- If the data can arrive split over two cycles, this will be correct
- -- provided we don't have both sign extension and byte reversal.
- negative <= (data_len(3) and data_permuted(63)) or
- (data_len(2) and data_permuted(31)) or
- (data_len(1) and data_permuted(15)) or
- (data_len(0) and data_permuted(7));
-
- -- trim and sign-extend
- for i in 0 to 7 loop
- if i < to_integer(data_len) then
- if second_word = '1' then
- trim_ctl(i) <= '1' & not use_second(i);
- else
- trim_ctl(i) <= not use_second(i) & '0';
- end if;
- else
- trim_ctl(i) <= '0' & (negative and sign_extend);
- end if;
- end loop;
- for i in 0 to 7 loop
- case trim_ctl(i) is
- when "11" =>
- data_trimmed(i * 8 + 7 downto i * 8) <= data_latched(i * 8 + 7 downto i * 8);
- when "10" =>
- data_trimmed(i * 8 + 7 downto i * 8) <= data_permuted(i * 8 + 7 downto i * 8);
- when "01" =>
- data_trimmed(i * 8 + 7 downto i * 8) <= x"FF";
- when others =>
- data_trimmed(i * 8 + 7 downto i * 8) <= x"00";
- end case;
- end loop;
-
- -- deliver to regfile
- if l_in.write_enable = '1' then
- w_out.write_data <= data_trimmed;
- else
- w_out.write_data <= e_in.write_data;
- end if;
-
-- Perform CR0 update for RC forms
-- Note that loads never have a form with an RC bit, therefore this can test e_in.write_data
- if rc = '1' then
+ if e_in.rc = '1' and e_in.write_enable = '1' then
sign := e_in.write_data(63);
zero := not (or e_in.write_data);
c_out.write_cr_enable <= '1';
cf(3) := sign;
cf(2) := not sign and not zero;
cf(1) := zero;
- cf(0) := xe.so;
+ cf(0) := e_in.xerc.so;
c_out.write_cr_data(31 downto 28) <= cf;
end if;
end process;