loadstore1: Move logic from dcache to loadstore1
[microwatt.git] / loadstore1.vhdl
1 library ieee;
2 use ieee.std_logic_1164.all;
3 use ieee.numeric_std.all;
4
5 library work;
6 use work.common.all;
7 use work.helpers.all;
8
9 -- 2 cycle LSU
10 -- We calculate the address in the first cycle
11
12 entity loadstore1 is
13 port (
14 clk : in std_ulogic;
15 rst : in std_ulogic;
16
17 l_in : in Execute1ToLoadstore1Type;
18 l_out : out Loadstore1ToWritebackType;
19
20 d_out : out Loadstore1ToDcacheType;
21 d_in : in DcacheToLoadstore1Type;
22
23 dc_stall : in std_ulogic;
24 stall_out : out std_ulogic
25 );
26 end loadstore1;
27
28 -- Note, we don't currently use the stall output from the dcache because
29 -- we know it can take two requests without stalling when idle, we are
30 -- its only user, and we know it never stalls when idle.
31
32 architecture behave of loadstore1 is
33
34 -- State machine for unaligned loads/stores
35 type state_t is (IDLE, -- ready for instruction
36 SECOND_REQ, -- send 2nd request of unaligned xfer
37 FIRST_ACK_WAIT, -- waiting for 1st ack from dcache
38 LAST_ACK_WAIT, -- waiting for last ack from dcache
39 LD_UPDATE -- writing rA with computed addr on load
40 );
41
42 type reg_stage_t is record
43 -- latch most of the input request
44 load : std_ulogic;
45 addr : std_ulogic_vector(63 downto 0);
46 data : std_ulogic_vector(63 downto 0);
47 write_reg : gpr_index_t;
48 length : std_ulogic_vector(3 downto 0);
49 byte_reverse : std_ulogic;
50 sign_extend : std_ulogic;
51 update : std_ulogic;
52 update_reg : gpr_index_t;
53 xerc : xer_common_t;
54 reserve : std_ulogic;
55 rc : std_ulogic;
56 nc : std_ulogic; -- non-cacheable access
57 state : state_t;
58 second_bytes : std_ulogic_vector(7 downto 0);
59 end record;
60
61 signal r, rin : reg_stage_t;
62 signal lsu_sum : std_ulogic_vector(63 downto 0);
63
64 -- Generate byte enables from sizes
65 function length_to_sel(length : in std_logic_vector(3 downto 0)) return std_ulogic_vector is
66 begin
67 case length is
68 when "0001" =>
69 return "00000001";
70 when "0010" =>
71 return "00000011";
72 when "0100" =>
73 return "00001111";
74 when "1000" =>
75 return "11111111";
76 when others =>
77 return "00000000";
78 end case;
79 end function length_to_sel;
80
81 -- Calculate byte enables
82 -- This returns 16 bits, giving the select signals for two transfers,
83 -- to account for unaligned loads or stores
84 function xfer_data_sel(size : in std_logic_vector(3 downto 0);
85 address : in std_logic_vector(2 downto 0))
86 return std_ulogic_vector is
87 variable longsel : std_ulogic_vector(15 downto 0);
88 begin
89 longsel := "00000000" & length_to_sel(size);
90 return std_ulogic_vector(shift_left(unsigned(longsel),
91 to_integer(unsigned(address))));
92 end function xfer_data_sel;
93
94 begin
95 -- Calculate the address in the first cycle
96 lsu_sum <= std_ulogic_vector(unsigned(l_in.addr1) + unsigned(l_in.addr2)) when l_in.valid = '1' else (others => '0');
97
98 loadstore1_0: process(clk)
99 begin
100 if rising_edge(clk) then
101 if rst = '1' then
102 r.state <= IDLE;
103 else
104 r <= rin;
105 end if;
106 end if;
107 end process;
108
109 loadstore1_1: process(all)
110 variable v : reg_stage_t;
111 variable brev_lenm1 : unsigned(2 downto 0);
112 variable byte_offset : unsigned(2 downto 0);
113 variable j : integer;
114 variable k : unsigned(2 downto 0);
115 variable long_sel : std_ulogic_vector(15 downto 0);
116 variable byte_sel : std_ulogic_vector(7 downto 0);
117 variable req : std_ulogic;
118 variable stall : std_ulogic;
119 variable addr : std_ulogic_vector(63 downto 0);
120 variable wdata : std_ulogic_vector(63 downto 0);
121 variable write_enable : std_ulogic;
122 variable do_update : std_ulogic;
123 variable second_dword : std_ulogic;
124 variable done : std_ulogic;
125 begin
126 v := r;
127 req := '0';
128 stall := '0';
129 done := '0';
130 byte_sel := (others => '0');
131 addr := lsu_sum;
132
133 write_enable := '0';
134 do_update := '0';
135 second_dword := '0';
136
137 case r.state is
138 when IDLE =>
139 if l_in.valid = '1' then
140 v.load := l_in.load;
141 v.addr := lsu_sum;
142 v.data := l_in.data;
143 v.write_reg := l_in.write_reg;
144 v.length := l_in.length;
145 v.byte_reverse := l_in.byte_reverse;
146 v.sign_extend := l_in.sign_extend;
147 v.update := l_in.update;
148 v.update_reg := l_in.update_reg;
149 v.xerc := l_in.xerc;
150 v.reserve := l_in.reserve;
151 v.rc := l_in.rc;
152
153 -- XXX Temporary hack. Mark the op as non-cachable if the address
154 -- is the form 0xc-------
155 --
156 -- This will have to be replaced by a combination of implementing the
157 -- proper HV CI load/store instructions and having an MMU to get the I
158 -- bit otherwise.
159 if lsu_sum(31 downto 28) = "1100" then
160 v.nc := '1';
161 else
162 v.nc := '0';
163 end if;
164
165 -- Do length_to_sel and work out if we are doing 2 dwords
166 long_sel := xfer_data_sel(l_in.length, v.addr(2 downto 0));
167 byte_sel := long_sel(7 downto 0);
168 v.second_bytes := long_sel(15 downto 8);
169
170 v.addr := lsu_sum;
171
172 -- Do byte reversing and rotating for stores in the first cycle
173 if v.load = '0' then
174 byte_offset := unsigned(lsu_sum(2 downto 0));
175 brev_lenm1 := "000";
176 if l_in.byte_reverse = '1' then
177 brev_lenm1 := unsigned(l_in.length(2 downto 0)) - 1;
178 end if;
179 for i in 0 to 7 loop
180 k := (to_unsigned(i, 3) xor brev_lenm1) + byte_offset;
181 j := to_integer(k) * 8;
182 v.data(j + 7 downto j) := l_in.data(i * 8 + 7 downto i * 8);
183 end loop;
184 end if;
185
186 req := '1';
187 stall := '1';
188 if long_sel(15 downto 8) = "00000000" then
189 v.state := LAST_ACK_WAIT;
190 else
191 v.state := SECOND_REQ;
192 end if;
193 end if;
194
195 when SECOND_REQ =>
196 -- compute (addr + 8) & ~7 for the second doubleword when unaligned
197 addr := std_ulogic_vector(unsigned(r.addr(63 downto 3)) + 1) & "000";
198 byte_sel := r.second_bytes;
199 req := '1';
200 stall := '1';
201 v.state := FIRST_ACK_WAIT;
202
203 when FIRST_ACK_WAIT =>
204 stall := '1';
205 if d_in.valid = '1' then
206 write_enable := r.load;
207 v.state := LAST_ACK_WAIT;
208 end if;
209
210 when LAST_ACK_WAIT =>
211 stall := '1';
212 second_dword := or (r.second_bytes);
213 if d_in.valid = '1' then
214 write_enable := r.load;
215 if r.load = '1' and r.update = '1' then
216 -- loads with rA update need an extra cycle
217 v.state := LD_UPDATE;
218 else
219 -- stores write back rA update in this cycle
220 do_update := r.update;
221 stall := '0';
222 done := '1';
223 v.state := IDLE;
224 end if;
225 end if;
226
227 when LD_UPDATE =>
228 do_update := '1';
229 v.state := IDLE;
230 done := '1';
231 end case;
232
233 -- Update registers
234 rin <= v;
235
236 -- Update outputs to dcache
237 d_out.valid <= req;
238 d_out.load <= v.load;
239 d_out.nc <= v.nc;
240 d_out.reserve <= v.reserve;
241 d_out.addr <= addr;
242 d_out.data <= v.data;
243 d_out.byte_sel <= byte_sel;
244
245 -- Update outputs to writeback
246 -- Multiplex either cache data to the destination GPR or
247 -- the address for the rA update.
248 l_out.valid <= done;
249 if do_update = '1' then
250 l_out.write_enable <= '1';
251 l_out.write_reg <= r.update_reg;
252 l_out.write_data <= r.addr;
253 l_out.write_len <= x"8";
254 l_out.write_shift <= "000";
255 l_out.sign_extend <= '0';
256 l_out.byte_reverse <= '0';
257 l_out.second_word <= '0';
258 l_out.rc <= '0';
259 l_out.store_done <= '0';
260 else
261 l_out.write_enable <= write_enable;
262 l_out.write_reg <= r.write_reg;
263 l_out.write_data <= d_in.data;
264 l_out.write_len <= r.length;
265 l_out.write_shift <= r.addr(2 downto 0);
266 l_out.sign_extend <= r.sign_extend;
267 l_out.byte_reverse <= r.byte_reverse;
268 l_out.second_word <= second_dword;
269 l_out.rc <= r.rc and done;
270 l_out.store_done <= d_in.store_done;
271 end if;
272 l_out.xerc <= r.xerc;
273
274 stall_out <= stall;
275
276 end process;
277 end;