execute1.vhdl

   1 library ieee;
   2 use ieee.std_logic_1164.all;
   3 use ieee.numeric_std.all;
   4
   5 library work;
   6 use work.decode_types.all;
   7 use work.common.all;
   8 use work.helpers.all;
   9 use work.crhelpers.all;
  10 use work.insn_helpers.all;
  11 use work.ppc_fx_insns.all;
  12
  13 entity execute1 is
  14     generic (
  15         EX1_BYPASS : boolean := true
  16         );
  17     port (
  18         clk   : in std_ulogic;
  19         rst   : in std_ulogic;
  20
  21         -- asynchronous
  22         flush_out : out std_ulogic;
  23         stall_out : out std_ulogic;
  24
  25         e_in  : in Decode2ToExecute1Type;
  26
  27         -- asynchronous
  28         l_out : out Execute1ToLoadstore1Type;
  29         f_out : out Execute1ToFetch1Type;
  30
  31         e_out : out Execute1ToWritebackType;
  32
  33         icache_inval : out std_ulogic;
  34         terminate_out : out std_ulogic
  35         );
  36 end entity execute1;
  37
  38 architecture behaviour of execute1 is
  39     type reg_type is record
  40         e : Execute1ToWritebackType;
  41         lr_update : std_ulogic;
  42         next_lr : std_ulogic_vector(63 downto 0);
  43         mul_in_progress : std_ulogic;
  44         div_in_progress : std_ulogic;
  45         cntz_in_progress : std_ulogic;
  46         slow_op_dest : gpr_index_t;
  47         slow_op_rc : std_ulogic;
  48         slow_op_oe : std_ulogic;
  49         slow_op_xerc : xer_common_t;
  50     end record;
  51
  52     signal r, rin : reg_type;
  53
  54     signal a_in, b_in, c_in : std_ulogic_vector(63 downto 0);
  55
  56     signal ctrl: ctrl_t := (others => (others => '0'));
  57     signal ctrl_tmp: ctrl_t := (others => (others => '0'));
  58
  59     signal right_shift, rot_clear_left, rot_clear_right: std_ulogic;
  60     signal rotator_result: std_ulogic_vector(63 downto 0);
  61     signal rotator_carry: std_ulogic;
  62     signal logical_result: std_ulogic_vector(63 downto 0);
  63     signal countzero_result: std_ulogic_vector(63 downto 0);
  64     signal popcnt_result: std_ulogic_vector(63 downto 0);
  65     signal parity_result: std_ulogic_vector(63 downto 0);
  66
  67     -- multiply signals
  68     signal x_to_multiply: Execute1ToMultiplyType;
  69     signal multiply_to_x: MultiplyToExecute1Type;
  70
  71     -- divider signals
  72     signal x_to_divider: Execute1ToDividerType;
  73     signal divider_to_x: DividerToExecute1Type;
  74
  75     procedure set_carry(e: inout Execute1ToWritebackType;
  76                         carry32 : in std_ulogic;
  77                         carry : in std_ulogic) is
  78     begin
  79         e.xerc.ca32 := carry32;
  80         e.xerc.ca := carry;
  81         e.write_xerc_enable := '1';
  82     end;
  83
  84     procedure set_ov(e: inout Execute1ToWritebackType;
  85                      ov   : in std_ulogic;
  86                      ov32 : in std_ulogic) is
  87     begin
  88         e.xerc.ov32 := ov32;
  89         e.xerc.ov := ov;
  90         if ov = '1' then
  91             e.xerc.so := '1';
  92         end if;
  93         e.write_xerc_enable := '1';
  94     end;
  95
  96     function calc_ov(msb_a : std_ulogic; msb_b: std_ulogic;
  97                      ca: std_ulogic; msb_r: std_ulogic) return std_ulogic is
  98     begin
  99         return (ca xor msb_r) and not (msb_a xor msb_b);
 100     end;
 101
 102     function decode_input_carry(ic : carry_in_t;
 103                                 xerc : xer_common_t) return std_ulogic is
 104     begin
 105         case ic is
 106         when ZERO =>
 107             return '0';
 108         when CA =>
 109             return xerc.ca;
 110         when ONE =>
 111             return '1';
 112         end case;
 113     end;
 114
 115 begin
 116
 117     rotator_0: entity work.rotator
 118         port map (
 119             rs => c_in,
 120             ra => a_in,
 121             shift => b_in(6 downto 0),
 122             insn => e_in.insn,
 123             is_32bit => e_in.is_32bit,
 124             right_shift => right_shift,
 125             arith => e_in.is_signed,
 126             clear_left => rot_clear_left,
 127             clear_right => rot_clear_right,
 128             result => rotator_result,
 129             carry_out => rotator_carry
 130             );
 131
 132     logical_0: entity work.logical
 133         port map (
 134             rs => c_in,
 135             rb => b_in,
 136             op => e_in.insn_type,
 137             invert_in => e_in.invert_a,
 138             invert_out => e_in.invert_out,
 139             result => logical_result,
 140             datalen => e_in.data_len,
 141             popcnt => popcnt_result,
 142             parity => parity_result
 143             );
 144
 145     countzero_0: entity work.zero_counter
 146         port map (
 147             clk => clk,
 148             rs => c_in,
 149             count_right => e_in.insn(10),
 150             is_32bit => e_in.is_32bit,
 151             result => countzero_result
 152             );
 153
 154     multiply_0: entity work.multiply
 155         port map (
 156             clk => clk,
 157             m_in => x_to_multiply,
 158             m_out => multiply_to_x
 159             );
 160
 161     divider_0: entity work.divider
 162         port map (
 163             clk => clk,
 164             rst => rst,
 165             d_in => x_to_divider,
 166             d_out => divider_to_x
 167             );
 168
 169     a_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data1 = '1' else e_in.read_data1;
 170     b_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data2 = '1' else e_in.read_data2;
 171     c_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data3 = '1' else e_in.read_data3;
 172
 173     execute1_0: process(clk)
 174     begin
 175         if rising_edge(clk) then
 176             r <= rin;
 177             ctrl <= ctrl_tmp;
 178             assert not (r.lr_update = '1' and e_in.valid = '1')
 179                 report "LR update collision with valid in EX1"
 180                 severity failure;
 181             if r.lr_update = '1' then
 182                 report "LR update to " & to_hstring(r.next_lr);
 183             end if;
 184         end if;
 185     end process;
 186
 187     execute1_1: process(all)
 188         variable v : reg_type;
 189         variable a_inv : std_ulogic_vector(63 downto 0);
 190         variable result : std_ulogic_vector(63 downto 0);
 191         variable newcrf : std_ulogic_vector(3 downto 0);
 192         variable result_with_carry : std_ulogic_vector(64 downto 0);
 193         variable result_en : std_ulogic;
 194         variable crnum : crnum_t;
 195         variable crbit : integer range 0 to 31;
 196         variable scrnum : crnum_t;
 197         variable lo, hi : integer;
 198         variable sh, mb, me : std_ulogic_vector(5 downto 0);
 199         variable sh32, mb32, me32 : std_ulogic_vector(4 downto 0);
 200         variable bo, bi : std_ulogic_vector(4 downto 0);
 201         variable bf, bfa : std_ulogic_vector(2 downto 0);
 202         variable cr_op : std_ulogic_vector(9 downto 0);
 203         variable cr_operands : std_ulogic_vector(1 downto 0);
 204         variable bt, ba, bb : std_ulogic_vector(4 downto 0);
 205         variable btnum, banum, bbnum : integer range 0 to 31;
 206         variable crresult : std_ulogic;
 207         variable l : std_ulogic;
 208         variable next_nia : std_ulogic_vector(63 downto 0);
 209         variable carry_32, carry_64 : std_ulogic;
 210         variable sign1, sign2 : std_ulogic;
 211         variable abs1, abs2 : signed(63 downto 0);
 212         variable overflow : std_ulogic;
 213         variable negative : std_ulogic;
 214         variable zerohi, zerolo : std_ulogic;
 215         variable msb_a, msb_b : std_ulogic;
 216         variable a_lt : std_ulogic;
 217         variable lv : Execute1ToLoadstore1Type;
 218     begin
 219         result := (others => '0');
 220         result_with_carry := (others => '0');
 221         result_en := '0';
 222         newcrf := (others => '0');
 223
 224         v := r;
 225         v.e := Execute1ToWritebackInit;
 226
 227         -- XER forwarding. To avoid having to track XER hazards, we
 228         -- use the previously latched value.
 229         --
 230         -- If the XER was modified by a multiply or a divide, those are
 231         -- single issue, we'll get the up to date value from decode2 from
 232         -- the register file.
 233         --
 234         -- If it was modified by an instruction older than the previous
 235         -- one in EX1, it will have also hit writeback and will be up
 236         -- to date in decode2.
 237         --
 238         -- That leaves us with the case where it was updated by the previous
 239         -- instruction in EX1. In that case, we can forward it back here.
 240         --
 241         -- This will break if we allow pipelining of multiply and divide,
 242         -- but ideally, those should go via EX1 anyway and run as a state
 243         -- machine from here.
 244         --
 245         -- One additional hazard to beware of is an XER:SO modifying instruction
 246         -- in EX1 followed immediately by a store conditional. Due to our
 247         -- writeback latency, the store will go down the LSU with the previous
 248         -- XER value, thus the stcx. will set CR0:SO using an obsolete SO value.
 249         --
 250         -- We will need to handle that if we ever make stcx. not single issue
 251         --
 252         -- We always pass a valid XER value downto writeback even when
 253         -- we aren't updating it, in order for XER:SO -> CR0:SO transfer
 254         -- to work for RC instructions.
 255         --
 256         if r.e.write_xerc_enable = '1' then
 257             v.e.xerc := r.e.xerc;
 258         else
 259             v.e.xerc := e_in.xerc;
 260         end if;
 261
 262         v.lr_update := '0';
 263         v.mul_in_progress := '0';
 264         v.div_in_progress := '0';
 265         v.cntz_in_progress := '0';
 266
 267         -- signals to multiply unit
 268         x_to_multiply <= Execute1ToMultiplyInit;
 269         x_to_multiply.insn_type <= e_in.insn_type;
 270         x_to_multiply.is_32bit <= e_in.is_32bit;
 271
 272         if e_in.is_32bit = '1' then
 273             if e_in.is_signed = '1' then
 274                 x_to_multiply.data1 <= (others => a_in(31));
 275                 x_to_multiply.data1(31 downto 0) <= a_in(31 downto 0);
 276                 x_to_multiply.data2 <= (others => b_in(31));
 277                 x_to_multiply.data2(31 downto 0) <= b_in(31 downto 0);
 278             else
 279                 x_to_multiply.data1 <= '0' & x"00000000" & a_in(31 downto 0);
 280                 x_to_multiply.data2 <= '0' & x"00000000" & b_in(31 downto 0);
 281             end if;
 282         else
 283             if e_in.is_signed = '1' then
 284                 x_to_multiply.data1 <= a_in(63) & a_in;
 285                 x_to_multiply.data2 <= b_in(63) & b_in;
 286             else
 287                 x_to_multiply.data1 <= '0' & a_in;
 288                 x_to_multiply.data2 <= '0' & b_in;
 289             end if;
 290         end if;
 291
 292         -- signals to divide unit
 293         sign1 := '0';
 294         sign2 := '0';
 295         if e_in.is_signed = '1' then
 296             if e_in.is_32bit = '1' then
 297                 sign1 := a_in(31);
 298                 sign2 := b_in(31);
 299             else
 300                 sign1 := a_in(63);
 301                 sign2 := b_in(63);
 302             end if;
 303         end if;
 304         -- take absolute values
 305         if sign1 = '0' then
 306             abs1 := signed(a_in);
 307         else
 308             abs1 := - signed(a_in);
 309         end if;
 310         if sign2 = '0' then
 311             abs2 := signed(b_in);
 312         else
 313             abs2 := - signed(b_in);
 314         end if;
 315
 316         x_to_divider <= Execute1ToDividerInit;
 317         x_to_divider.is_signed <= e_in.is_signed;
 318         x_to_divider.is_32bit <= e_in.is_32bit;
 319         if e_in.insn_type = OP_MOD then
 320             x_to_divider.is_modulus <= '1';
 321         end if;
 322         x_to_divider.neg_result <= sign1 xor (sign2 and not x_to_divider.is_modulus);
 323         if e_in.is_32bit = '0' then
 324             -- 64-bit forms
 325             if e_in.insn_type = OP_DIVE then
 326                 x_to_divider.is_extended <= '1';
 327             end if;
 328             x_to_divider.dividend <= std_ulogic_vector(abs1);
 329             x_to_divider.divisor <= std_ulogic_vector(abs2);
 330         else
 331             -- 32-bit forms
 332             x_to_divider.is_extended <= '0';
 333             if e_in.insn_type = OP_DIVE then   -- extended forms
 334                 x_to_divider.dividend <= std_ulogic_vector(abs1(31 downto 0)) & x"00000000";
 335             else
 336                 x_to_divider.dividend <= x"00000000" & std_ulogic_vector(abs1(31 downto 0));
 337             end if;
 338             x_to_divider.divisor <= x"00000000" & std_ulogic_vector(abs2(31 downto 0));
 339         end if;
 340
 341         ctrl_tmp <= ctrl;
 342         -- FIXME: run at 512MHz not core freq
 343         ctrl_tmp.tb <= std_ulogic_vector(unsigned(ctrl.tb) + 1);
 344
 345         terminate_out <= '0';
 346         icache_inval <= '0';
 347         stall_out <= '0';
 348         f_out <= Execute1ToFetch1TypeInit;
 349
 350         -- Next insn adder used in a couple of places
 351         next_nia := std_ulogic_vector(unsigned(e_in.nia) + 4);
 352
 353         -- rotator control signals
 354         right_shift <= '1' when e_in.insn_type = OP_SHR else '0';
 355         rot_clear_left <= '1' when e_in.insn_type = OP_RLC or e_in.insn_type = OP_RLCL else '0';
 356         rot_clear_right <= '1' when e_in.insn_type = OP_RLC or e_in.insn_type = OP_RLCR else '0';
 357
 358         if e_in.valid = '1' then
 359
 360             v.e.valid := '1';
 361             v.e.write_reg := e_in.write_reg;
 362             v.slow_op_dest := gspr_to_gpr(e_in.write_reg);
 363             v.slow_op_rc := e_in.rc;
 364             v.slow_op_oe := e_in.oe;
 365             v.slow_op_xerc := v.e.xerc;
 366
 367             case_0: case e_in.insn_type is
 368
 369             when OP_ILLEGAL =>
 370                 terminate_out <= '1';
 371                 report "illegal";
 372             when OP_ATTN =>
 373                 terminate_out <= '1';
 374                 report "ATTN";
 375             when OP_NOP =>
 376                 -- Do nothing
 377             when OP_ADD | OP_CMP =>
 378                 if e_in.invert_a = '0' then
 379                     a_inv := a_in;
 380                 else
 381                     a_inv := not a_in;
 382                 end if;
 383                 result_with_carry := ppc_adde(a_inv, b_in,
 384                                               decode_input_carry(e_in.input_carry, v.e.xerc));
 385                 result := result_with_carry(63 downto 0);
 386                 carry_32 := result(32) xor a_inv(32) xor b_in(32);
 387                 carry_64 := result_with_carry(64);
 388                 if e_in.insn_type = OP_ADD then
 389                     if e_in.output_carry = '1' then
 390                         set_carry(v.e, carry_32, carry_64);
 391                     end if;
 392                     if e_in.oe = '1' then
 393                         set_ov(v.e,
 394                                calc_ov(a_inv(63), b_in(63), carry_64, result_with_carry(63)),
 395                                calc_ov(a_inv(31), b_in(31), carry_32, result_with_carry(31)));
 396                     end if;
 397                     result_en := '1';
 398                 else
 399                     -- CMP and CMPL instructions
 400                     -- Note, we have done RB - RA, not RA - RB
 401                     bf := insn_bf(e_in.insn);
 402                     l := insn_l(e_in.insn);
 403                     v.e.write_cr_enable := '1';
 404                     crnum := to_integer(unsigned(bf));
 405                     v.e.write_cr_mask := num_to_fxm(crnum);
 406                     zerolo := not (or (a_in(31 downto 0) xor b_in(31 downto 0)));
 407                     zerohi := not (or (a_in(63 downto 32) xor b_in(63 downto 32)));
 408                     if zerolo = '1' and (l = '0' or zerohi = '1') then
 409                         -- values are equal
 410                         newcrf := "001" & v.e.xerc.so;
 411                     else
 412                         if l = '1' then
 413                             -- 64-bit comparison
 414                             msb_a := a_in(63);
 415                             msb_b := b_in(63);
 416                         else
 417                             -- 32-bit comparison
 418                             msb_a := a_in(31);
 419                             msb_b := b_in(31);
 420                         end if;
 421                         if msb_a /= msb_b then
 422                             -- Subtraction might overflow, but
 423                             -- comparison is clear from MSB difference.
 424                             -- for signed, 0 is greater; for unsigned, 1 is greater
 425                             a_lt := msb_a xnor e_in.is_signed;
 426                         else
 427                             -- Subtraction cannot overflow since MSBs are equal.
 428                             -- carry = 1 indicates RA is smaller (signed or unsigned)
 429                             a_lt := (not l and carry_32) or (l and carry_64);
 430                         end if;
 431                         newcrf := a_lt & not a_lt & '0' & v.e.xerc.so;
 432                     end if;
 433                     for i in 0 to 7 loop
 434                         lo := i*4;
 435                         hi := lo + 3;
 436                         v.e.write_cr_data(hi downto lo) := newcrf;
 437                     end loop;
 438                 end if;
 439             when OP_AND | OP_OR | OP_XOR =>
 440                 result := logical_result;
 441                 result_en := '1';
 442             when OP_B =>
 443                 f_out.redirect <= '1';
 444                 if (insn_aa(e_in.insn)) then
 445                     f_out.redirect_nia <= std_ulogic_vector(signed(b_in));
 446                 else
 447                     f_out.redirect_nia <= std_ulogic_vector(signed(e_in.nia) + signed(b_in));
 448                 end if;
 449             when OP_BC =>
 450                 -- read_data1 is CTR
 451                 bo := insn_bo(e_in.insn);
 452                 bi := insn_bi(e_in.insn);
 453                 if bo(4-2) = '0' then
 454                     result := std_ulogic_vector(unsigned(a_in) - 1);
 455                     result_en := '1';
 456                     v.e.write_reg := fast_spr_num(SPR_CTR);
 457                 end if;
 458                 if ppc_bc_taken(bo, bi, e_in.cr, a_in) = 1 then
 459                     f_out.redirect <= '1';
 460                     if (insn_aa(e_in.insn)) then
 461                         f_out.redirect_nia <= std_ulogic_vector(signed(b_in));
 462                     else
 463                         f_out.redirect_nia <= std_ulogic_vector(signed(e_in.nia) + signed(b_in));
 464                     end if;
 465                 end if;
 466             when OP_BCREG =>
 467                 -- read_data1 is CTR
 468                 -- read_data2 is target register (CTR, LR or TAR)
 469                 bo := insn_bo(e_in.insn);
 470                 bi := insn_bi(e_in.insn);
 471                 if bo(4-2) = '0' and e_in.insn(10) = '0' then
 472                     result := std_ulogic_vector(unsigned(a_in) - 1);
 473                     result_en := '1';
 474                     v.e.write_reg := fast_spr_num(SPR_CTR);
 475                 end if;
 476                 if ppc_bc_taken(bo, bi, e_in.cr, a_in) = 1 then
 477                     f_out.redirect <= '1';
 478                     f_out.redirect_nia <= b_in(63 downto 2) & "00";
 479                 end if;
 480             when OP_CMPB =>
 481                 result := ppc_cmpb(c_in, b_in);
 482                 result_en := '1';
 483             when OP_CNTZ =>
 484                 v.e.valid := '0';
 485                 v.cntz_in_progress := '1';
 486                 stall_out <= '1';
 487             when OP_EXTS =>
 488                 -- note data_len is a 1-hot encoding
 489                 negative := (e_in.data_len(0) and c_in(7)) or
 490                             (e_in.data_len(1) and c_in(15)) or
 491                             (e_in.data_len(2) and c_in(31));
 492                 result := (others => negative);
 493                 if e_in.data_len(2) = '1' then
 494                     result(31 downto 16) := c_in(31 downto 16);
 495                 end if;
 496                 if e_in.data_len(2) = '1' or e_in.data_len(1) = '1' then
 497                     result(15 downto 8) := c_in(15 downto 8);
 498                 end if;
 499                 result(7 downto 0) := c_in(7 downto 0);
 500                 result_en := '1';
 501             when OP_ISEL =>
 502                 crbit := to_integer(unsigned(insn_bc(e_in.insn)));
 503                 if e_in.cr(31-crbit) = '1' then
 504                     result := a_in;
 505                 else
 506                     result := b_in;
 507                 end if;
 508                 result_en := '1';
 509             when OP_MCRF =>
 510                 cr_op := insn_cr(e_in.insn);
 511                 report "CR OP " & to_hstring(cr_op);
 512                 if cr_op(0) = '0' then -- MCRF
 513                     bf := insn_bf(e_in.insn);
 514                     bfa := insn_bfa(e_in.insn);
 515                     v.e.write_cr_enable := '1';
 516                     crnum := to_integer(unsigned(bf));
 517                     scrnum := to_integer(unsigned(bfa));
 518                     v.e.write_cr_mask := num_to_fxm(crnum);
 519                     for i in 0 to 7 loop
 520                         lo := (7-i)*4;
 521                         hi := lo + 3;
 522                         if i = scrnum then
 523                             newcrf := e_in.cr(hi downto lo);
 524                         end if;
 525                     end loop;
 526                     for i in 0 to 7 loop
 527                         lo := i*4;
 528                         hi := lo + 3;
 529                         v.e.write_cr_data(hi downto lo) := newcrf;
 530                     end loop;
 531                 else
 532                     v.e.write_cr_enable := '1';
 533                     bt := insn_bt(e_in.insn);
 534                     ba := insn_ba(e_in.insn);
 535                     bb := insn_bb(e_in.insn);
 536                     btnum := 31 - to_integer(unsigned(bt));
 537                     banum := 31 - to_integer(unsigned(ba));
 538                     bbnum := 31 - to_integer(unsigned(bb));
 539                     -- Bits 5-8 of cr_op give the truth table of the requested
 540                     -- logical operation
 541                     cr_operands := e_in.cr(banum) & e_in.cr(bbnum);
 542                     crresult := cr_op(5 + to_integer(unsigned(cr_operands)));
 543                     v.e.write_cr_mask := num_to_fxm((31-btnum) / 4);
 544                     for i in 0 to 31 loop
 545                         if i = btnum then
 546                             v.e.write_cr_data(i) := crresult;
 547                         else
 548                             v.e.write_cr_data(i) := e_in.cr(i);
 549                         end if;
 550                     end loop;
 551                 end if;
 552             when OP_MFSPR =>
 553                 if is_fast_spr(e_in.read_reg1) then
 554                     result := a_in;
 555                     if decode_spr_num(e_in.insn) = SPR_XER then
 556                         -- bits 0:31 and 35:43 are treated as reserved and return 0s when read using mfxer
 557                         result(63 downto 32) := (others => '0');
 558                         result(63-32) := v.e.xerc.so;
 559                         result(63-33) := v.e.xerc.ov;
 560                         result(63-34) := v.e.xerc.ca;
 561                         result(63-35 downto 63-43) := "000000000";
 562                         result(63-44) := v.e.xerc.ov32;
 563                         result(63-45) := v.e.xerc.ca32;
 564                     end if;
 565                 else
 566                     case decode_spr_num(e_in.insn) is
 567                     when SPR_TB =>
 568                         result := ctrl.tb;
 569                     when others =>
 570                         result := (others => '0');
 571                     end case;
 572                 end if;
 573                 result_en := '1';
 574             when OP_MFCR =>
 575                 if e_in.insn(20) = '0' then
 576                     -- mfcr
 577                     result := x"00000000" & e_in.cr;
 578                 else
 579                     -- mfocrf
 580                     crnum := fxm_to_num(insn_fxm(e_in.insn));
 581                     result := (others => '0');
 582                     for i in 0 to 7 loop
 583                         lo := (7-i)*4;
 584                         hi := lo + 3;
 585                         if crnum = i then
 586                             result(hi downto lo) := e_in.cr(hi downto lo);
 587                         end if;
 588                     end loop;
 589                 end if;
 590                 result_en := '1';
 591             when OP_MTCRF =>
 592                 v.e.write_cr_enable := '1';
 593                 if e_in.insn(20) = '0' then
 594                     -- mtcrf
 595                     v.e.write_cr_mask := insn_fxm(e_in.insn);
 596                 else
 597                     -- mtocrf: We require one hot priority encoding here
 598                     crnum := fxm_to_num(insn_fxm(e_in.insn));
 599                     v.e.write_cr_mask := num_to_fxm(crnum);
 600                 end if;
 601                 v.e.write_cr_data := c_in(31 downto 0);
 602             when OP_MTSPR =>
 603                 report "MTSPR to SPR " & integer'image(decode_spr_num(e_in.insn)) &
 604                     "=" & to_hstring(c_in);
 605                 if is_fast_spr(e_in.write_reg) then
 606                     result := c_in;
 607                     result_en := '1';
 608                     if decode_spr_num(e_in.insn) = SPR_XER then
 609                         v.e.xerc.so := c_in(63-32);
 610                         v.e.xerc.ov := c_in(63-33);
 611                         v.e.xerc.ca := c_in(63-34);
 612                         v.e.xerc.ov32 := c_in(63-44);
 613                         v.e.xerc.ca32 := c_in(63-45);
 614                         v.e.write_xerc_enable := '1';
 615                     end if;
 616                 else
 617 -- TODO: Implement slow SPRs
 618 --                  case decode_spr_num(e_in.insn) is
 619 --                  when others =>
 620 --                  end case;
 621                 end if;
 622             when OP_POPCNT =>
 623                 result := popcnt_result;
 624                 result_en := '1';
 625             when OP_PRTY =>
 626                 result := parity_result;
 627                 result_en := '1';
 628             when OP_RLC | OP_RLCL | OP_RLCR | OP_SHL | OP_SHR =>
 629                 result := rotator_result;
 630                 if e_in.output_carry = '1' then
 631                     set_carry(v.e, rotator_carry, rotator_carry);
 632                 end if;
 633                 result_en := '1';
 634             when OP_SIM_CONFIG =>
 635                 -- bit 0 was used to select the microwatt console, which
 636                 -- we no longer support.
 637                 result := x"0000000000000000";
 638                 result_en := '1';
 639
 640             when OP_TDI =>
 641                 -- Keep our test cases happy for now, ignore trap instructions
 642                 report "OP_TDI FIXME";
 643
 644             when OP_ISYNC =>
 645                 f_out.redirect <= '1';
 646                 f_out.redirect_nia <= next_nia;
 647
 648             when OP_ICBI =>
 649                 icache_inval <= '1';
 650
 651             when OP_MUL_L64 | OP_MUL_H64 | OP_MUL_H32 =>
 652                 v.e.valid := '0';
 653                 v.mul_in_progress := '1';
 654                 stall_out <= '1';
 655                 x_to_multiply.valid <= '1';
 656
 657             when OP_DIV | OP_DIVE | OP_MOD =>
 658                 v.e.valid := '0';
 659                 v.div_in_progress := '1';
 660                 stall_out <= '1';
 661                 x_to_divider.valid <= '1';
 662
 663             when OP_LOAD | OP_STORE =>
 664                 -- loadstore/dcache has its own port to writeback
 665                 v.e.valid := '0';
 666
 667             when others =>
 668                 terminate_out <= '1';
 669                 report "illegal";
 670             end case;
 671
 672             v.e.rc := e_in.rc and e_in.valid;
 673
 674             -- Update LR on the next cycle after a branch link
 675             --
 676             -- WARNING: The LR update isn't tracked by our hazard tracker. This
 677             --          will work (well I hope) because it only happens on branches
 678             --          which will flush all decoded instructions. By the time
 679             --          fetch catches up, we'll have the new LR. This will
 680             --          *not* work properly however if we have a branch predictor,
 681             --          in which case the solution would probably be to keep a
 682             --          local cache of the updated LR in execute1 (flushed on
 683             --          exceptions) that is used instead of the value from
 684             --          decode when its content is valid.
 685             if e_in.lr = '1' then
 686                 v.lr_update := '1';
 687                 v.next_lr := next_nia;
 688                 v.e.valid := '0';
 689                 report "Delayed LR update to " & to_hstring(next_nia);
 690                 stall_out <= '1';
 691             end if;
 692         elsif r.lr_update = '1' then
 693             result_en := '1';
 694             result := r.next_lr;
 695             v.e.write_reg := fast_spr_num(SPR_LR);
 696             v.e.valid := '1';
 697         elsif r.cntz_in_progress = '1' then
 698             -- cnt[lt]z always takes two cycles
 699             result := countzero_result;
 700             result_en := '1';
 701             v.e.write_reg := gpr_to_gspr(v.slow_op_dest);
 702             v.e.rc := v.slow_op_rc;
 703             v.e.xerc := v.slow_op_xerc;
 704             v.e.valid := '1';
 705         elsif r.mul_in_progress = '1' or r.div_in_progress = '1' then
 706             if (r.mul_in_progress = '1' and multiply_to_x.valid = '1') or
 707                (r.div_in_progress = '1' and divider_to_x.valid = '1') then
 708                 if r.mul_in_progress = '1' then
 709                     result := multiply_to_x.write_reg_data;
 710                     overflow := multiply_to_x.overflow;
 711                 else
 712                     result := divider_to_x.write_reg_data;
 713                     overflow := divider_to_x.overflow;
 714                 end if;
 715                 result_en := '1';
 716                 v.e.write_reg := gpr_to_gspr(v.slow_op_dest);
 717                 v.e.rc := v.slow_op_rc;
 718                 v.e.xerc := v.slow_op_xerc;
 719                 v.e.write_xerc_enable := v.slow_op_oe;
 720                 -- We must test oe because the RC update code in writeback
 721                 -- will use the xerc value to set CR0:SO so we must not clobber
 722                 -- xerc if OE wasn't set.
 723                 if v.slow_op_oe = '1' then
 724                     v.e.xerc.ov := overflow;
 725                     v.e.xerc.ov32 := overflow;
 726                     v.e.xerc.so := v.slow_op_xerc.so or overflow;
 727                 end if;
 728                 v.e.valid := '1';
 729             else
 730                 stall_out <= '1';
 731                 v.mul_in_progress := r.mul_in_progress;
 732                 v.div_in_progress := r.div_in_progress;
 733             end if;
 734         end if;
 735
 736         v.e.write_data := result;
 737         v.e.write_enable := result_en;
 738
 739         -- Outputs to loadstore1 (async)
 740         lv := Execute1ToLoadstore1Init;
 741         if e_in.valid = '1' and (e_in.insn_type = OP_LOAD or e_in.insn_type = OP_STORE) then
 742             lv.valid := '1';
 743         end if;
 744         if e_in.insn_type = OP_LOAD then
 745             lv.load := '1';
 746         end if;
 747         lv.addr1 := a_in;
 748         lv.addr2 := b_in;
 749         lv.data := c_in;
 750         lv.write_reg := gspr_to_gpr(e_in.write_reg);
 751         lv.length := e_in.data_len;
 752         lv.byte_reverse := e_in.byte_reverse;
 753         lv.sign_extend := e_in.sign_extend;
 754         lv.update := e_in.update;
 755         lv.update_reg := gspr_to_gpr(e_in.read_reg1);
 756         lv.xerc := v.e.xerc;
 757         lv.reserve := e_in.reserve;
 758         lv.rc := e_in.rc;
 759         -- decode l*cix and st*cix instructions here
 760         if e_in.insn(31 downto 26) = "011111" and e_in.insn(10 downto 9) = "11" and
 761             e_in.insn(5 downto 1) = "10101" then
 762             lv.ci := '1';
 763         end if;
 764
 765         -- Update registers
 766         rin <= v;
 767
 768         -- update outputs
 769         --f_out <= r.f;
 770         l_out <= lv;
 771         e_out <= r.e;
 772         flush_out <= f_out.redirect;
 773     end process;
 774 end architecture behaviour;