execute1: Implement trap instructions properly
[microwatt.git] / execute1.vhdl
1 library ieee;
2 use ieee.std_logic_1164.all;
3 use ieee.numeric_std.all;
4
5 library work;
6 use work.decode_types.all;
7 use work.common.all;
8 use work.helpers.all;
9 use work.crhelpers.all;
10 use work.insn_helpers.all;
11 use work.ppc_fx_insns.all;
12
13 entity execute1 is
14 generic (
15 EX1_BYPASS : boolean := true
16 );
17 port (
18 clk : in std_ulogic;
19 rst : in std_ulogic;
20
21 -- asynchronous
22 flush_out : out std_ulogic;
23 stall_out : out std_ulogic;
24
25 e_in : in Decode2ToExecute1Type;
26
27 -- asynchronous
28 l_out : out Execute1ToLoadstore1Type;
29 f_out : out Execute1ToFetch1Type;
30
31 e_out : out Execute1ToWritebackType;
32
33 icache_inval : out std_ulogic;
34 terminate_out : out std_ulogic
35 );
36 end entity execute1;
37
38 architecture behaviour of execute1 is
39 type reg_type is record
40 e : Execute1ToWritebackType;
41 lr_update : std_ulogic;
42 next_lr : std_ulogic_vector(63 downto 0);
43 mul_in_progress : std_ulogic;
44 div_in_progress : std_ulogic;
45 cntz_in_progress : std_ulogic;
46 slow_op_dest : gpr_index_t;
47 slow_op_rc : std_ulogic;
48 slow_op_oe : std_ulogic;
49 slow_op_xerc : xer_common_t;
50 end record;
51
52 signal r, rin : reg_type;
53
54 signal a_in, b_in, c_in : std_ulogic_vector(63 downto 0);
55
56 signal ctrl: ctrl_t := (irq_state => WRITE_SRR0, others => (others => '0'));
57 signal ctrl_tmp: ctrl_t := (irq_state => WRITE_SRR0, others => (others => '0'));
58 signal right_shift, rot_clear_left, rot_clear_right: std_ulogic;
59 signal rotator_result: std_ulogic_vector(63 downto 0);
60 signal rotator_carry: std_ulogic;
61 signal logical_result: std_ulogic_vector(63 downto 0);
62 signal countzero_result: std_ulogic_vector(63 downto 0);
63 signal popcnt_result: std_ulogic_vector(63 downto 0);
64 signal parity_result: std_ulogic_vector(63 downto 0);
65
66 -- multiply signals
67 signal x_to_multiply: Execute1ToMultiplyType;
68 signal multiply_to_x: MultiplyToExecute1Type;
69
70 -- divider signals
71 signal x_to_divider: Execute1ToDividerType;
72 signal divider_to_x: DividerToExecute1Type;
73
74 procedure set_carry(e: inout Execute1ToWritebackType;
75 carry32 : in std_ulogic;
76 carry : in std_ulogic) is
77 begin
78 e.xerc.ca32 := carry32;
79 e.xerc.ca := carry;
80 e.write_xerc_enable := '1';
81 end;
82
83 procedure set_ov(e: inout Execute1ToWritebackType;
84 ov : in std_ulogic;
85 ov32 : in std_ulogic) is
86 begin
87 e.xerc.ov32 := ov32;
88 e.xerc.ov := ov;
89 if ov = '1' then
90 e.xerc.so := '1';
91 end if;
92 e.write_xerc_enable := '1';
93 end;
94
95 function calc_ov(msb_a : std_ulogic; msb_b: std_ulogic;
96 ca: std_ulogic; msb_r: std_ulogic) return std_ulogic is
97 begin
98 return (ca xor msb_r) and not (msb_a xor msb_b);
99 end;
100
101 function decode_input_carry(ic : carry_in_t;
102 xerc : xer_common_t) return std_ulogic is
103 begin
104 case ic is
105 when ZERO =>
106 return '0';
107 when CA =>
108 return xerc.ca;
109 when ONE =>
110 return '1';
111 end case;
112 end;
113
114 function msr_copy(msr: std_ulogic_vector(63 downto 0))
115 return std_ulogic_vector is
116 variable msr_out: std_ulogic_vector(63 downto 0);
117 begin
118 -- ISA says this:
119 -- Defined MSR bits are classified as either full func-
120 -- tion or partial function. Full function MSR bits are
121 -- saved in SRR1 or HSRR1 when an interrupt other
122 -- than a System Call Vectored interrupt occurs and
123 -- restored by rfscv, rfid, or hrfid, while partial func-
124 -- tion MSR bits are not saved or restored.
125 -- Full function MSR bits lie in the range 0:32, 37:41, and
126 -- 48:63, and partial function MSR bits lie in the range
127 -- 33:36 and 42:47.
128 msr_out := (others => '0');
129 msr_out(32 downto 0) := msr(32 downto 0);
130 msr_out(41 downto 37) := msr(41 downto 37);
131 msr_out(63 downto 48) := msr(63 downto 48);
132 return msr_out;
133 end;
134
135 begin
136
137 rotator_0: entity work.rotator
138 port map (
139 rs => c_in,
140 ra => a_in,
141 shift => b_in(6 downto 0),
142 insn => e_in.insn,
143 is_32bit => e_in.is_32bit,
144 right_shift => right_shift,
145 arith => e_in.is_signed,
146 clear_left => rot_clear_left,
147 clear_right => rot_clear_right,
148 result => rotator_result,
149 carry_out => rotator_carry
150 );
151
152 logical_0: entity work.logical
153 port map (
154 rs => c_in,
155 rb => b_in,
156 op => e_in.insn_type,
157 invert_in => e_in.invert_a,
158 invert_out => e_in.invert_out,
159 result => logical_result,
160 datalen => e_in.data_len,
161 popcnt => popcnt_result,
162 parity => parity_result
163 );
164
165 countzero_0: entity work.zero_counter
166 port map (
167 clk => clk,
168 rs => c_in,
169 count_right => e_in.insn(10),
170 is_32bit => e_in.is_32bit,
171 result => countzero_result
172 );
173
174 multiply_0: entity work.multiply
175 port map (
176 clk => clk,
177 m_in => x_to_multiply,
178 m_out => multiply_to_x
179 );
180
181 divider_0: entity work.divider
182 port map (
183 clk => clk,
184 rst => rst,
185 d_in => x_to_divider,
186 d_out => divider_to_x
187 );
188
189 a_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data1 = '1' else e_in.read_data1;
190 b_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data2 = '1' else e_in.read_data2;
191 c_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data3 = '1' else e_in.read_data3;
192
193 execute1_0: process(clk)
194 begin
195 if rising_edge(clk) then
196 r <= rin;
197 ctrl <= ctrl_tmp;
198 assert not (r.lr_update = '1' and e_in.valid = '1')
199 report "LR update collision with valid in EX1"
200 severity failure;
201 if r.lr_update = '1' then
202 report "LR update to " & to_hstring(r.next_lr);
203 end if;
204 end if;
205 end process;
206
207 execute1_1: process(all)
208 variable v : reg_type;
209 variable a_inv : std_ulogic_vector(63 downto 0);
210 variable result : std_ulogic_vector(63 downto 0);
211 variable newcrf : std_ulogic_vector(3 downto 0);
212 variable result_with_carry : std_ulogic_vector(64 downto 0);
213 variable result_en : std_ulogic;
214 variable crnum : crnum_t;
215 variable crbit : integer range 0 to 31;
216 variable scrnum : crnum_t;
217 variable lo, hi : integer;
218 variable sh, mb, me : std_ulogic_vector(5 downto 0);
219 variable sh32, mb32, me32 : std_ulogic_vector(4 downto 0);
220 variable bo, bi : std_ulogic_vector(4 downto 0);
221 variable bf, bfa : std_ulogic_vector(2 downto 0);
222 variable cr_op : std_ulogic_vector(9 downto 0);
223 variable cr_operands : std_ulogic_vector(1 downto 0);
224 variable bt, ba, bb : std_ulogic_vector(4 downto 0);
225 variable btnum, banum, bbnum : integer range 0 to 31;
226 variable crresult : std_ulogic;
227 variable l : std_ulogic;
228 variable next_nia : std_ulogic_vector(63 downto 0);
229 variable carry_32, carry_64 : std_ulogic;
230 variable sign1, sign2 : std_ulogic;
231 variable abs1, abs2 : signed(63 downto 0);
232 variable overflow : std_ulogic;
233 variable negative : std_ulogic;
234 variable zerohi, zerolo : std_ulogic;
235 variable msb_a, msb_b : std_ulogic;
236 variable a_lt : std_ulogic;
237 variable lv : Execute1ToLoadstore1Type;
238 variable irq_valid : std_ulogic;
239 variable exception : std_ulogic;
240 variable exception_nextpc : std_ulogic;
241 variable trapval : std_ulogic_vector(4 downto 0);
242 begin
243 result := (others => '0');
244 result_with_carry := (others => '0');
245 result_en := '0';
246 newcrf := (others => '0');
247
248 v := r;
249 v.e := Execute1ToWritebackInit;
250
251 -- XER forwarding. To avoid having to track XER hazards, we
252 -- use the previously latched value.
253 --
254 -- If the XER was modified by a multiply or a divide, those are
255 -- single issue, we'll get the up to date value from decode2 from
256 -- the register file.
257 --
258 -- If it was modified by an instruction older than the previous
259 -- one in EX1, it will have also hit writeback and will be up
260 -- to date in decode2.
261 --
262 -- That leaves us with the case where it was updated by the previous
263 -- instruction in EX1. In that case, we can forward it back here.
264 --
265 -- This will break if we allow pipelining of multiply and divide,
266 -- but ideally, those should go via EX1 anyway and run as a state
267 -- machine from here.
268 --
269 -- One additional hazard to beware of is an XER:SO modifying instruction
270 -- in EX1 followed immediately by a store conditional. Due to our
271 -- writeback latency, the store will go down the LSU with the previous
272 -- XER value, thus the stcx. will set CR0:SO using an obsolete SO value.
273 --
274 -- We will need to handle that if we ever make stcx. not single issue
275 --
276 -- We always pass a valid XER value downto writeback even when
277 -- we aren't updating it, in order for XER:SO -> CR0:SO transfer
278 -- to work for RC instructions.
279 --
280 if r.e.write_xerc_enable = '1' then
281 v.e.xerc := r.e.xerc;
282 else
283 v.e.xerc := e_in.xerc;
284 end if;
285
286 v.lr_update := '0';
287 v.mul_in_progress := '0';
288 v.div_in_progress := '0';
289 v.cntz_in_progress := '0';
290
291 -- signals to multiply unit
292 x_to_multiply <= Execute1ToMultiplyInit;
293 x_to_multiply.insn_type <= e_in.insn_type;
294 x_to_multiply.is_32bit <= e_in.is_32bit;
295
296 if e_in.is_32bit = '1' then
297 if e_in.is_signed = '1' then
298 x_to_multiply.data1 <= (others => a_in(31));
299 x_to_multiply.data1(31 downto 0) <= a_in(31 downto 0);
300 x_to_multiply.data2 <= (others => b_in(31));
301 x_to_multiply.data2(31 downto 0) <= b_in(31 downto 0);
302 else
303 x_to_multiply.data1 <= '0' & x"00000000" & a_in(31 downto 0);
304 x_to_multiply.data2 <= '0' & x"00000000" & b_in(31 downto 0);
305 end if;
306 else
307 if e_in.is_signed = '1' then
308 x_to_multiply.data1 <= a_in(63) & a_in;
309 x_to_multiply.data2 <= b_in(63) & b_in;
310 else
311 x_to_multiply.data1 <= '0' & a_in;
312 x_to_multiply.data2 <= '0' & b_in;
313 end if;
314 end if;
315
316 -- signals to divide unit
317 sign1 := '0';
318 sign2 := '0';
319 if e_in.is_signed = '1' then
320 if e_in.is_32bit = '1' then
321 sign1 := a_in(31);
322 sign2 := b_in(31);
323 else
324 sign1 := a_in(63);
325 sign2 := b_in(63);
326 end if;
327 end if;
328 -- take absolute values
329 if sign1 = '0' then
330 abs1 := signed(a_in);
331 else
332 abs1 := - signed(a_in);
333 end if;
334 if sign2 = '0' then
335 abs2 := signed(b_in);
336 else
337 abs2 := - signed(b_in);
338 end if;
339
340 x_to_divider <= Execute1ToDividerInit;
341 x_to_divider.is_signed <= e_in.is_signed;
342 x_to_divider.is_32bit <= e_in.is_32bit;
343 if e_in.insn_type = OP_MOD then
344 x_to_divider.is_modulus <= '1';
345 end if;
346 x_to_divider.neg_result <= sign1 xor (sign2 and not x_to_divider.is_modulus);
347 if e_in.is_32bit = '0' then
348 -- 64-bit forms
349 if e_in.insn_type = OP_DIVE then
350 x_to_divider.is_extended <= '1';
351 end if;
352 x_to_divider.dividend <= std_ulogic_vector(abs1);
353 x_to_divider.divisor <= std_ulogic_vector(abs2);
354 else
355 -- 32-bit forms
356 x_to_divider.is_extended <= '0';
357 if e_in.insn_type = OP_DIVE then -- extended forms
358 x_to_divider.dividend <= std_ulogic_vector(abs1(31 downto 0)) & x"00000000";
359 else
360 x_to_divider.dividend <= x"00000000" & std_ulogic_vector(abs1(31 downto 0));
361 end if;
362 x_to_divider.divisor <= x"00000000" & std_ulogic_vector(abs2(31 downto 0));
363 end if;
364
365 ctrl_tmp <= ctrl;
366 -- FIXME: run at 512MHz not core freq
367 ctrl_tmp.tb <= std_ulogic_vector(unsigned(ctrl.tb) + 1);
368 ctrl_tmp.dec <= std_ulogic_vector(unsigned(ctrl.dec) - 1);
369
370 irq_valid := '0';
371 if ctrl.msr(63 - 48) = '1' and ctrl.dec(63) = '1' then
372 report "IRQ valid";
373 irq_valid := '1';
374 end if;
375
376 terminate_out <= '0';
377 icache_inval <= '0';
378 stall_out <= '0';
379 f_out <= Execute1ToFetch1TypeInit;
380
381 -- Next insn adder used in a couple of places
382 next_nia := std_ulogic_vector(unsigned(e_in.nia) + 4);
383
384 -- rotator control signals
385 right_shift <= '1' when e_in.insn_type = OP_SHR else '0';
386 rot_clear_left <= '1' when e_in.insn_type = OP_RLC or e_in.insn_type = OP_RLCL else '0';
387 rot_clear_right <= '1' when e_in.insn_type = OP_RLC or e_in.insn_type = OP_RLCR else '0';
388
389 ctrl_tmp.irq_state <= WRITE_SRR0;
390 exception := '0';
391 exception_nextpc := '0';
392 v.e.exc_write_enable := '0';
393 v.e.exc_write_reg := fast_spr_num(SPR_SRR0);
394 v.e.exc_write_data := e_in.nia;
395
396 if ctrl.irq_state = WRITE_SRR1 then
397 v.e.exc_write_reg := fast_spr_num(SPR_SRR1);
398 v.e.exc_write_data := ctrl.srr1;
399 v.e.exc_write_enable := '1';
400 ctrl_tmp.msr(63 - 48) <= '0'; -- clear EE
401 f_out.redirect <= '1';
402 f_out.redirect_nia <= ctrl.irq_nia;
403 v.e.valid := e_in.valid;
404 report "Writing SRR1: " & to_hstring(ctrl.srr1);
405
406 elsif irq_valid = '1' then
407 -- we need two cycles to write srr0 and 1
408 -- will need more when we have to write DSISR, DAR and HIER
409 -- Don't deliver the interrupt until we have a valid instruction
410 -- coming in, so we have a valid NIA to put in SRR0.
411 exception := e_in.valid;
412 ctrl_tmp.irq_nia <= std_logic_vector(to_unsigned(16#900#, 64));
413 ctrl_tmp.srr1 <= msr_copy(ctrl.msr);
414
415 elsif e_in.valid = '1' then
416
417 v.e.valid := '1';
418 v.e.write_reg := e_in.write_reg;
419 v.slow_op_dest := gspr_to_gpr(e_in.write_reg);
420 v.slow_op_rc := e_in.rc;
421 v.slow_op_oe := e_in.oe;
422 v.slow_op_xerc := v.e.xerc;
423
424 case_0: case e_in.insn_type is
425
426 when OP_ILLEGAL =>
427 -- we need two cycles to write srr0 and 1
428 -- will need more when we have to write DSISR, DAR and HIER
429 exception := '1';
430 ctrl_tmp.irq_nia <= std_logic_vector(to_unsigned(16#700#, 64));
431 ctrl_tmp.srr1 <= msr_copy(ctrl.msr);
432 -- Since we aren't doing Hypervisor emulation assist (0xe40) we
433 -- set bit 44 to indicate we have an illegal
434 ctrl_tmp.srr1(63 - 44) <= '1';
435 report "illegal";
436 when OP_SC =>
437 -- FIXME Assume everything is SC (not SCV) for now
438 -- we need two cycles to write srr0 and 1
439 -- will need more when we have to write DSISR, DAR and HIER
440 exception := '1';
441 exception_nextpc := '1';
442 ctrl_tmp.irq_nia <= std_logic_vector(to_unsigned(16#C00#, 64));
443 ctrl_tmp.srr1 <= msr_copy(ctrl.msr);
444 report "sc";
445 when OP_ATTN =>
446 terminate_out <= '1';
447 report "ATTN";
448 when OP_NOP =>
449 -- Do nothing
450 when OP_ADD | OP_CMP | OP_TRAP =>
451 if e_in.invert_a = '0' then
452 a_inv := a_in;
453 else
454 a_inv := not a_in;
455 end if;
456 result_with_carry := ppc_adde(a_inv, b_in,
457 decode_input_carry(e_in.input_carry, v.e.xerc));
458 result := result_with_carry(63 downto 0);
459 carry_32 := result(32) xor a_inv(32) xor b_in(32);
460 carry_64 := result_with_carry(64);
461 if e_in.insn_type = OP_ADD then
462 if e_in.output_carry = '1' then
463 set_carry(v.e, carry_32, carry_64);
464 end if;
465 if e_in.oe = '1' then
466 set_ov(v.e,
467 calc_ov(a_inv(63), b_in(63), carry_64, result_with_carry(63)),
468 calc_ov(a_inv(31), b_in(31), carry_32, result_with_carry(31)));
469 end if;
470 result_en := '1';
471 else
472 -- trap, CMP and CMPL instructions
473 -- Note, we have done RB - RA, not RA - RB
474 if e_in.insn_type = OP_CMP then
475 l := insn_l(e_in.insn);
476 else
477 l := not e_in.is_32bit;
478 end if;
479 zerolo := not (or (a_in(31 downto 0) xor b_in(31 downto 0)));
480 zerohi := not (or (a_in(63 downto 32) xor b_in(63 downto 32)));
481 if zerolo = '1' and (l = '0' or zerohi = '1') then
482 -- values are equal
483 trapval := "00100";
484 else
485 if l = '1' then
486 -- 64-bit comparison
487 msb_a := a_in(63);
488 msb_b := b_in(63);
489 else
490 -- 32-bit comparison
491 msb_a := a_in(31);
492 msb_b := b_in(31);
493 end if;
494 if msb_a /= msb_b then
495 -- Subtraction might overflow, but
496 -- comparison is clear from MSB difference.
497 -- for signed, 0 is greater; for unsigned, 1 is greater
498 trapval := msb_a & msb_b & '0' & msb_b & msb_a;
499 else
500 -- Subtraction cannot overflow since MSBs are equal.
501 -- carry = 1 indicates RA is smaller (signed or unsigned)
502 a_lt := (not l and carry_32) or (l and carry_64);
503 trapval := a_lt & not a_lt & '0' & a_lt & not a_lt;
504 end if;
505 end if;
506 if e_in.insn_type = OP_CMP then
507 if e_in.is_signed = '1' then
508 newcrf := trapval(4 downto 2) & v.e.xerc.so;
509 else
510 newcrf := trapval(1 downto 0) & trapval(2) & v.e.xerc.so;
511 end if;
512 bf := insn_bf(e_in.insn);
513 crnum := to_integer(unsigned(bf));
514 v.e.write_cr_enable := '1';
515 v.e.write_cr_mask := num_to_fxm(crnum);
516 for i in 0 to 7 loop
517 lo := i*4;
518 hi := lo + 3;
519 v.e.write_cr_data(hi downto lo) := newcrf;
520 end loop;
521 else
522 -- trap instructions (tw, twi, td, tdi)
523 if or (trapval and insn_to(e_in.insn)) = '1' then
524 -- generate trap-type program interrupt
525 exception := '1';
526 ctrl_tmp.irq_nia <= std_logic_vector(to_unsigned(16#700#, 64));
527 ctrl_tmp.srr1 <= msr_copy(ctrl.msr);
528 -- set bit 46 to say trap occurred
529 ctrl_tmp.srr1(63 - 46) <= '1';
530 report "trap";
531 end if;
532 end if;
533 end if;
534 when OP_AND | OP_OR | OP_XOR =>
535 result := logical_result;
536 result_en := '1';
537 when OP_B =>
538 f_out.redirect <= '1';
539 if (insn_aa(e_in.insn)) then
540 f_out.redirect_nia <= std_ulogic_vector(signed(b_in));
541 else
542 f_out.redirect_nia <= std_ulogic_vector(signed(e_in.nia) + signed(b_in));
543 end if;
544 when OP_BC =>
545 -- read_data1 is CTR
546 bo := insn_bo(e_in.insn);
547 bi := insn_bi(e_in.insn);
548 if bo(4-2) = '0' then
549 result := std_ulogic_vector(unsigned(a_in) - 1);
550 result_en := '1';
551 v.e.write_reg := fast_spr_num(SPR_CTR);
552 end if;
553 if ppc_bc_taken(bo, bi, e_in.cr, a_in) = 1 then
554 f_out.redirect <= '1';
555 if (insn_aa(e_in.insn)) then
556 f_out.redirect_nia <= std_ulogic_vector(signed(b_in));
557 else
558 f_out.redirect_nia <= std_ulogic_vector(signed(e_in.nia) + signed(b_in));
559 end if;
560 end if;
561 when OP_BCREG =>
562 -- read_data1 is CTR
563 -- read_data2 is target register (CTR, LR or TAR)
564 bo := insn_bo(e_in.insn);
565 bi := insn_bi(e_in.insn);
566 if bo(4-2) = '0' and e_in.insn(10) = '0' then
567 result := std_ulogic_vector(unsigned(a_in) - 1);
568 result_en := '1';
569 v.e.write_reg := fast_spr_num(SPR_CTR);
570 end if;
571 if ppc_bc_taken(bo, bi, e_in.cr, a_in) = 1 then
572 f_out.redirect <= '1';
573 f_out.redirect_nia <= b_in(63 downto 2) & "00";
574 end if;
575
576 when OP_RFID =>
577 f_out.redirect <= '1';
578 f_out.redirect_nia <= a_in(63 downto 2) & "00"; -- srr0
579 ctrl_tmp.msr <= msr_copy(std_ulogic_vector(signed(b_in))); -- srr1
580 when OP_CMPB =>
581 result := ppc_cmpb(c_in, b_in);
582 result_en := '1';
583 when OP_CNTZ =>
584 v.e.valid := '0';
585 v.cntz_in_progress := '1';
586 stall_out <= '1';
587 when OP_EXTS =>
588 -- note data_len is a 1-hot encoding
589 negative := (e_in.data_len(0) and c_in(7)) or
590 (e_in.data_len(1) and c_in(15)) or
591 (e_in.data_len(2) and c_in(31));
592 result := (others => negative);
593 if e_in.data_len(2) = '1' then
594 result(31 downto 16) := c_in(31 downto 16);
595 end if;
596 if e_in.data_len(2) = '1' or e_in.data_len(1) = '1' then
597 result(15 downto 8) := c_in(15 downto 8);
598 end if;
599 result(7 downto 0) := c_in(7 downto 0);
600 result_en := '1';
601 when OP_ISEL =>
602 crbit := to_integer(unsigned(insn_bc(e_in.insn)));
603 if e_in.cr(31-crbit) = '1' then
604 result := a_in;
605 else
606 result := b_in;
607 end if;
608 result_en := '1';
609 when OP_CROP =>
610 cr_op := insn_cr(e_in.insn);
611 report "CR OP " & to_hstring(cr_op);
612 if cr_op(0) = '0' then -- MCRF
613 bf := insn_bf(e_in.insn);
614 bfa := insn_bfa(e_in.insn);
615 v.e.write_cr_enable := '1';
616 crnum := to_integer(unsigned(bf));
617 scrnum := to_integer(unsigned(bfa));
618 v.e.write_cr_mask := num_to_fxm(crnum);
619 for i in 0 to 7 loop
620 lo := (7-i)*4;
621 hi := lo + 3;
622 if i = scrnum then
623 newcrf := e_in.cr(hi downto lo);
624 end if;
625 end loop;
626 for i in 0 to 7 loop
627 lo := i*4;
628 hi := lo + 3;
629 v.e.write_cr_data(hi downto lo) := newcrf;
630 end loop;
631 else
632 v.e.write_cr_enable := '1';
633 bt := insn_bt(e_in.insn);
634 ba := insn_ba(e_in.insn);
635 bb := insn_bb(e_in.insn);
636 btnum := 31 - to_integer(unsigned(bt));
637 banum := 31 - to_integer(unsigned(ba));
638 bbnum := 31 - to_integer(unsigned(bb));
639 -- Bits 5-8 of cr_op give the truth table of the requested
640 -- logical operation
641 cr_operands := e_in.cr(banum) & e_in.cr(bbnum);
642 crresult := cr_op(5 + to_integer(unsigned(cr_operands)));
643 v.e.write_cr_mask := num_to_fxm((31-btnum) / 4);
644 for i in 0 to 31 loop
645 if i = btnum then
646 v.e.write_cr_data(i) := crresult;
647 else
648 v.e.write_cr_data(i) := e_in.cr(i);
649 end if;
650 end loop;
651 end if;
652 when OP_MFMSR =>
653 result := msr_copy(ctrl.msr);
654 result_en := '1';
655 when OP_MFSPR =>
656 report "MFSPR to SPR " & integer'image(decode_spr_num(e_in.insn)) &
657 "=" & to_hstring(a_in);
658 if is_fast_spr(e_in.read_reg1) then
659 result := a_in;
660 if decode_spr_num(e_in.insn) = SPR_XER then
661 -- bits 0:31 and 35:43 are treated as reserved and return 0s when read using mfxer
662 result(63 downto 32) := (others => '0');
663 result(63-32) := v.e.xerc.so;
664 result(63-33) := v.e.xerc.ov;
665 result(63-34) := v.e.xerc.ca;
666 result(63-35 downto 63-43) := "000000000";
667 result(63-44) := v.e.xerc.ov32;
668 result(63-45) := v.e.xerc.ca32;
669 end if;
670 else
671 case decode_spr_num(e_in.insn) is
672 when SPR_TB =>
673 result := ctrl.tb;
674 when SPR_DEC =>
675 result := ctrl.dec;
676 when others =>
677 result := (others => '0');
678 end case;
679 end if;
680 result_en := '1';
681 when OP_MFCR =>
682 if e_in.insn(20) = '0' then
683 -- mfcr
684 result := x"00000000" & e_in.cr;
685 else
686 -- mfocrf
687 crnum := fxm_to_num(insn_fxm(e_in.insn));
688 result := (others => '0');
689 for i in 0 to 7 loop
690 lo := (7-i)*4;
691 hi := lo + 3;
692 if crnum = i then
693 result(hi downto lo) := e_in.cr(hi downto lo);
694 end if;
695 end loop;
696 end if;
697 result_en := '1';
698 when OP_MTCRF =>
699 v.e.write_cr_enable := '1';
700 if e_in.insn(20) = '0' then
701 -- mtcrf
702 v.e.write_cr_mask := insn_fxm(e_in.insn);
703 else
704 -- mtocrf: We require one hot priority encoding here
705 crnum := fxm_to_num(insn_fxm(e_in.insn));
706 v.e.write_cr_mask := num_to_fxm(crnum);
707 end if;
708 v.e.write_cr_data := c_in(31 downto 0);
709 when OP_MTMSRD =>
710 -- FIXME handle just the bits we need to.
711 ctrl_tmp.msr <= msr_copy(c_in);
712 when OP_MTSPR =>
713 report "MTSPR to SPR " & integer'image(decode_spr_num(e_in.insn)) &
714 "=" & to_hstring(c_in);
715 if is_fast_spr(e_in.write_reg) then
716 result := c_in;
717 result_en := '1';
718 if decode_spr_num(e_in.insn) = SPR_XER then
719 v.e.xerc.so := c_in(63-32);
720 v.e.xerc.ov := c_in(63-33);
721 v.e.xerc.ca := c_in(63-34);
722 v.e.xerc.ov32 := c_in(63-44);
723 v.e.xerc.ca32 := c_in(63-45);
724 v.e.write_xerc_enable := '1';
725 end if;
726 else
727 -- slow spr
728 case decode_spr_num(e_in.insn) is
729 when SPR_DEC =>
730 ctrl_tmp.dec <= c_in;
731 when others =>
732 end case;
733 end if;
734 when OP_POPCNT =>
735 result := popcnt_result;
736 result_en := '1';
737 when OP_PRTY =>
738 result := parity_result;
739 result_en := '1';
740 when OP_RLC | OP_RLCL | OP_RLCR | OP_SHL | OP_SHR =>
741 result := rotator_result;
742 if e_in.output_carry = '1' then
743 set_carry(v.e, rotator_carry, rotator_carry);
744 end if;
745 result_en := '1';
746 when OP_SIM_CONFIG =>
747 -- bit 0 was used to select the microwatt console, which
748 -- we no longer support.
749 result := x"0000000000000000";
750 result_en := '1';
751
752 when OP_ISYNC =>
753 f_out.redirect <= '1';
754 f_out.redirect_nia <= next_nia;
755
756 when OP_ICBI =>
757 icache_inval <= '1';
758
759 when OP_MUL_L64 | OP_MUL_H64 | OP_MUL_H32 =>
760 v.e.valid := '0';
761 v.mul_in_progress := '1';
762 stall_out <= '1';
763 x_to_multiply.valid <= '1';
764
765 when OP_DIV | OP_DIVE | OP_MOD =>
766 v.e.valid := '0';
767 v.div_in_progress := '1';
768 stall_out <= '1';
769 x_to_divider.valid <= '1';
770
771 when OP_LOAD | OP_STORE =>
772 -- loadstore/dcache has its own port to writeback
773 v.e.valid := '0';
774
775 when others =>
776 terminate_out <= '1';
777 report "illegal";
778 end case;
779
780 v.e.rc := e_in.rc and e_in.valid;
781
782 -- Update LR on the next cycle after a branch link
783 --
784 -- WARNING: The LR update isn't tracked by our hazard tracker. This
785 -- will work (well I hope) because it only happens on branches
786 -- which will flush all decoded instructions. By the time
787 -- fetch catches up, we'll have the new LR. This will
788 -- *not* work properly however if we have a branch predictor,
789 -- in which case the solution would probably be to keep a
790 -- local cache of the updated LR in execute1 (flushed on
791 -- exceptions) that is used instead of the value from
792 -- decode when its content is valid.
793 if e_in.lr = '1' then
794 v.lr_update := '1';
795 v.next_lr := next_nia;
796 v.e.valid := '0';
797 report "Delayed LR update to " & to_hstring(next_nia);
798 stall_out <= '1';
799 end if;
800 elsif r.lr_update = '1' then
801 result_en := '1';
802 result := r.next_lr;
803 v.e.write_reg := fast_spr_num(SPR_LR);
804 v.e.valid := '1';
805 elsif r.cntz_in_progress = '1' then
806 -- cnt[lt]z always takes two cycles
807 result := countzero_result;
808 result_en := '1';
809 v.e.write_reg := gpr_to_gspr(v.slow_op_dest);
810 v.e.rc := v.slow_op_rc;
811 v.e.xerc := v.slow_op_xerc;
812 v.e.valid := '1';
813 elsif r.mul_in_progress = '1' or r.div_in_progress = '1' then
814 if (r.mul_in_progress = '1' and multiply_to_x.valid = '1') or
815 (r.div_in_progress = '1' and divider_to_x.valid = '1') then
816 if r.mul_in_progress = '1' then
817 result := multiply_to_x.write_reg_data;
818 overflow := multiply_to_x.overflow;
819 else
820 result := divider_to_x.write_reg_data;
821 overflow := divider_to_x.overflow;
822 end if;
823 result_en := '1';
824 v.e.write_reg := gpr_to_gspr(v.slow_op_dest);
825 v.e.rc := v.slow_op_rc;
826 v.e.xerc := v.slow_op_xerc;
827 v.e.write_xerc_enable := v.slow_op_oe;
828 -- We must test oe because the RC update code in writeback
829 -- will use the xerc value to set CR0:SO so we must not clobber
830 -- xerc if OE wasn't set.
831 if v.slow_op_oe = '1' then
832 v.e.xerc.ov := overflow;
833 v.e.xerc.ov32 := overflow;
834 v.e.xerc.so := v.slow_op_xerc.so or overflow;
835 end if;
836 v.e.valid := '1';
837 else
838 stall_out <= '1';
839 v.mul_in_progress := r.mul_in_progress;
840 v.div_in_progress := r.div_in_progress;
841 end if;
842 end if;
843
844 if exception = '1' then
845 v.e.exc_write_enable := '1';
846 if exception_nextpc = '1' then
847 v.e.exc_write_data := next_nia;
848 end if;
849 ctrl_tmp.irq_state <= WRITE_SRR1;
850 v.e.valid := '1';
851 end if;
852
853 v.e.write_data := result;
854 v.e.write_enable := result_en;
855
856 -- Outputs to loadstore1 (async)
857 lv := Execute1ToLoadstore1Init;
858 if e_in.valid = '1' and (e_in.insn_type = OP_LOAD or e_in.insn_type = OP_STORE) then
859 lv.valid := '1';
860 end if;
861 if e_in.insn_type = OP_LOAD then
862 lv.load := '1';
863 end if;
864 lv.addr1 := a_in;
865 lv.addr2 := b_in;
866 lv.data := c_in;
867 lv.write_reg := gspr_to_gpr(e_in.write_reg);
868 lv.length := e_in.data_len;
869 lv.byte_reverse := e_in.byte_reverse;
870 lv.sign_extend := e_in.sign_extend;
871 lv.update := e_in.update;
872 lv.update_reg := gspr_to_gpr(e_in.read_reg1);
873 lv.xerc := v.e.xerc;
874 lv.reserve := e_in.reserve;
875 lv.rc := e_in.rc;
876 -- decode l*cix and st*cix instructions here
877 if e_in.insn(31 downto 26) = "011111" and e_in.insn(10 downto 9) = "11" and
878 e_in.insn(5 downto 1) = "10101" then
879 lv.ci := '1';
880 end if;
881
882 -- Update registers
883 rin <= v;
884
885 -- update outputs
886 --f_out <= r.f;
887 l_out <= lv;
888 e_out <= r.e;
889 flush_out <= f_out.redirect;
890 end process;
891 end architecture behaviour;