EX1_BYPASS : boolean := true;
HAS_FPU : boolean := true;
HAS_BTC : boolean := true;
+ HAS_SHORT_MULT : boolean := false;
ALT_RESET_ADDRESS : std_ulogic_vector(63 downto 0) := (others => '0');
LOG_LENGTH : natural := 512;
ICACHE_NUM_LINES : natural := 64;
generic map (
EX1_BYPASS => EX1_BYPASS,
HAS_FPU => HAS_FPU,
+ HAS_SHORT_MULT => HAS_SHORT_MULT,
LOG_LENGTH => LOG_LENGTH
)
port map (
generic (
EX1_BYPASS : boolean := true;
HAS_FPU : boolean := true;
+ HAS_SHORT_MULT : boolean := false;
-- Non-zero to enable log data collection
LOG_LENGTH : natural := 0
);
signal a_in, b_in, c_in : std_ulogic_vector(63 downto 0);
signal cr_in : std_ulogic_vector(31 downto 0);
signal xerc_in : xer_common_t;
+ signal mshort_p : std_ulogic_vector(31 downto 0) := (others => '0');
signal valid_in : std_ulogic;
signal ctrl: ctrl_t := (others => (others => '0'));
return msr_out;
end;
+ -- Work out whether a signed value fits into n bits,
+ -- that is, see if it is in the range -2^(n-1) .. 2^(n-1) - 1
+ function fits_in_n_bits(val: std_ulogic_vector; n: integer) return boolean is
+ variable x, xp1: std_ulogic_vector(val'left downto val'right);
+ begin
+ x := val;
+ if val(val'left) = '0' then
+ x := not val;
+ end if;
+ xp1 := bit_reverse(std_ulogic_vector(unsigned(bit_reverse(x)) + 1));
+ x := x and not xp1;
+ -- For positive inputs, x has ones at the positions
+ -- to the left of the leftmost 1 bit in val.
+ -- For negative inputs, x has ones to the left of
+ -- the leftmost 0 bit in val.
+ return x(n - 1) = '1';
+ end;
+
-- Tell vivado to keep the hierarchy for the random module so that the
-- net names in the xdc file match.
attribute keep_hierarchy : string;
p_out => pmu_to_x
);
+ short_mult_0: if HAS_SHORT_MULT generate
+ begin
+ short_mult: entity work.short_multiply
+ port map (
+ clk => clk,
+ a_in => a_in(15 downto 0),
+ b_in => b_in(15 downto 0),
+ m_out => mshort_p
+ );
+ end generate;
+
dbg_msr_out <= ctrl.msr;
log_rd_addr <= r.log_addr_spr;
case current.sub_select(1 downto 0) is
when "00" =>
- muldiv_result <= multiply_to_x.result(63 downto 0);
+ if HAS_SHORT_MULT and r.mul_in_progress = '0' then
+ muldiv_result <= std_ulogic_vector(resize(signed(mshort_p), 64));
+ else
+ muldiv_result <= multiply_to_x.result(63 downto 0);
+ end if;
when "01" =>
muldiv_result <= multiply_to_x.result(127 downto 64);
when "10" =>
icache_inval <= '1';
when OP_MUL_L64 | OP_MUL_H64 | OP_MUL_H32 =>
- v.e.valid := '0';
- v.mul_in_progress := '1';
- v.busy := '1';
- x_to_multiply.valid <= '1';
+ if HAS_SHORT_MULT and e_in.insn_type = OP_MUL_L64 and e_in.insn(26) = '1' and
+ fits_in_n_bits(a_in, 16) and fits_in_n_bits(b_in, 16) then
+ -- Operands fit into 16 bits, so use short multiplier
+ if e_in.oe = '1' then
+ -- Note 16x16 multiply can't overflow, even for mullwo
+ set_ov(v.e, '0', '0');
+ end if;
+ else
+ -- Use standard multiplier
+ v.e.valid := '0';
+ v.mul_in_progress := '1';
+ v.busy := '1';
+ x_to_multiply.valid <= '1';
+ end if;
when OP_DIV | OP_DIVE | OP_MOD =>
v.e.valid := '0';
CLK_FREQUENCY : positive := 100000000;
HAS_FPU : boolean := true;
HAS_BTC : boolean := true;
+ HAS_SHORT_MULT : boolean := false;
USE_LITEDRAM : boolean := false;
NO_BRAM : boolean := false;
DISABLE_FLATTEN_CORE : boolean := false;
CLK_FREQ => CLK_FREQUENCY,
HAS_FPU => HAS_FPU,
HAS_BTC => HAS_BTC,
+ HAS_SHORT_MULT => HAS_SHORT_MULT,
HAS_DRAM => USE_LITEDRAM,
DRAM_SIZE => 256 * 1024 * 1024,
DRAM_INIT_SIZE => PAYLOAD_SIZE,
CLK_FREQUENCY : positive := 100000000;
HAS_FPU : boolean := true;
HAS_BTC : boolean := false;
+ HAS_SHORT_MULT: boolean := false;
ICACHE_NUM_LINES : natural := 64;
LOG_LENGTH : natural := 512;
DISABLE_FLATTEN_CORE : boolean := false;
CLK_FREQ => CLK_FREQUENCY,
HAS_FPU => HAS_FPU,
HAS_BTC => HAS_BTC,
+ HAS_SHORT_MULT => HAS_SHORT_MULT,
ICACHE_NUM_LINES => ICACHE_NUM_LINES,
LOG_LENGTH => LOG_LENGTH,
DISABLE_FLATTEN_CORE => DISABLE_FLATTEN_CORE,
CLK_FREQUENCY : positive := 100000000;
HAS_FPU : boolean := true;
HAS_BTC : boolean := true;
+ HAS_SHORT_MULT: boolean := false;
USE_LITEDRAM : boolean := false;
NO_BRAM : boolean := false;
DISABLE_FLATTEN_CORE : boolean := false;
CLK_FREQ => CLK_FREQUENCY,
HAS_FPU => HAS_FPU,
HAS_BTC => HAS_BTC,
+ HAS_SHORT_MULT=> HAS_SHORT_MULT,
HAS_DRAM => USE_LITEDRAM,
DRAM_SIZE => 512 * 1024 * 1024,
DRAM_INIT_SIZE => PAYLOAD_SIZE,
- uart_is_16550
- has_fpu
- has_btc
+ - has_short_mult
tools:
vivado: {part : xc7a100tcsg324-1}
toplevel : toplevel
- uart_is_16550
- has_fpu
- has_btc
+ - has_short_mult
generate: [litedram_nexys_video, liteeth_nexys_video, litesdcard_nexys_video]
tools:
vivado: {part : xc7a200tsbg484-1}
- has_uart1
- has_fpu=false
- has_btc=false
+ - has_short_mult
- use_litesdcard
tools:
vivado: {part : xc7a35ticsg324-1L}
- has_uart1
- has_fpu=false
- has_btc=false
+ - has_short_mult
generate: [litedram_arty, liteeth_arty, litesdcard_arty]
tools:
vivado: {part : xc7a35ticsg324-1L}
- has_uart1
- has_fpu
- has_btc
+ - has_short_mult
- use_litesdcard
tools:
vivado: {part : xc7a100ticsg324-1L}
- has_uart1
- has_fpu
- has_btc
+ - has_short_mult
generate: [litedram_arty, liteeth_arty, litesdcard_arty]
tools:
vivado: {part : xc7a100ticsg324-1L}
paramtype : generic
default : true
+ has_short_mult:
+ datatype : bool
+ description : Include a 16 bit x 16 bit single-cycle multiplier in the core
+ paramtype : generic
+ default : false
+
disable_flatten_core:
datatype : bool
description : Prevent Vivado from flattening the main core components
rin <= v;
end process;
end architecture behaviour;
+
+library ieee;
+use ieee.std_logic_1164.all;
+use ieee.numeric_std.all;
+
+entity short_multiply is
+ port (
+ clk : in std_ulogic;
+
+ a_in : in std_ulogic_vector(15 downto 0);
+ b_in : in std_ulogic_vector(15 downto 0);
+ m_out : out std_ulogic_vector(31 downto 0)
+ );
+end entity short_multiply;
+
+architecture behaviour of short_multiply is
+begin
+ m_out <= std_ulogic_vector(signed(a_in) * signed(b_in));
+end architecture behaviour;
SIM : boolean;
HAS_FPU : boolean := true;
HAS_BTC : boolean := true;
+ HAS_SHORT_MULT : boolean := false;
DISABLE_FLATTEN_CORE : boolean := false;
HAS_DRAM : boolean := false;
DRAM_SIZE : integer := 0;
SIM => SIM,
HAS_FPU => HAS_FPU,
HAS_BTC => HAS_BTC,
+ HAS_SHORT_MULT => HAS_SHORT_MULT,
DISABLE_FLATTEN => DISABLE_FLATTEN_CORE,
ALT_RESET_ADDRESS => (23 downto 0 => '0', others => '1'),
LOG_LENGTH => LOG_LENGTH,
end process;
end architecture behaviour;
+
+library ieee;
+use ieee.std_logic_1164.all;
+use ieee.numeric_std.all;
+
+library unisim;
+use unisim.vcomponents.all;
+
+entity short_multiply is
+ port (
+ clk : in std_logic;
+
+ a_in : in std_ulogic_vector(15 downto 0);
+ b_in : in std_ulogic_vector(15 downto 0);
+ m_out : out std_ulogic_vector(31 downto 0)
+ );
+end entity short_multiply;
+
+architecture behaviour of short_multiply is
+ signal mshort_p : std_ulogic_vector(47 downto 0);
+begin
+ mshort: DSP48E1
+ generic map (
+ ACASCREG => 0,
+ ALUMODEREG => 0,
+ AREG => 0,
+ BCASCREG => 0,
+ BREG => 0,
+ CARRYINREG => 0,
+ CARRYINSELREG => 0,
+ CREG => 0,
+ INMODEREG => 0,
+ MREG => 0,
+ OPMODEREG => 0,
+ PREG => 0
+ )
+ port map (
+ A => std_ulogic_vector(resize(signed(a_in(15 downto 0)), 30)),
+ ACIN => (others => '0'),
+ ALUMODE => "0000",
+ B => std_ulogic_vector(resize(signed(b_in(15 downto 0)), 18)),
+ BCIN => (others => '0'),
+ C => 48x"0",
+ CARRYCASCIN => '0',
+ CARRYIN => '0',
+ CARRYINSEL => "000",
+ CEA1 => '0',
+ CEA2 => '0',
+ CEAD => '0',
+ CEALUMODE => '0',
+ CEB1 => '0',
+ CEB2 => '0',
+ CEC => '0',
+ CECARRYIN => '0',
+ CECTRL => '0',
+ CED => '0',
+ CEINMODE => '0',
+ CEM => '0',
+ CEP => '0',
+ CLK => clk,
+ D => (others => '0'),
+ INMODE => "00000",
+ MULTSIGNIN => '0',
+ OPMODE => "0110101",
+ P => mshort_p,
+ PCIN => (others => '0'),
+ RSTA => '0',
+ RSTALLCARRYIN => '0',
+ RSTALUMODE => '0',
+ RSTB => '0',
+ RSTC => '0',
+ RSTCTRL => '0',
+ RSTD => '0',
+ RSTINMODE => '0',
+ RSTM => '0',
+ RSTP => '0'
+ );
+
+ m_out <= mshort_p(31 downto 0);
+
+end architecture behaviour;