decode1.vhdl helpers.vhdl insn_helpers.vhdl \
control.vhdl decode2.vhdl register_file.vhdl \
cr_file.vhdl crhelpers.vhdl ppc_fx_insns.vhdl rotator.vhdl \
- logical.vhdl countzero.vhdl multiply.vhdl divider.vhdl execute1.vhdl \
+ logical.vhdl countbits.vhdl multiply.vhdl divider.vhdl execute1.vhdl \
loadstore1.vhdl mmu.vhdl dcache.vhdl writeback.vhdl core_debug.vhdl \
core.vhdl fpu.vhdl pmu.vhdl
--- /dev/null
+library ieee;
+use ieee.std_logic_1164.all;
+use ieee.numeric_std.all;
+
+library work;
+use work.helpers.all;
+
+entity bit_counter is
+ port (
+ clk : in std_logic;
+ rs : in std_ulogic_vector(63 downto 0);
+ count_right : in std_ulogic;
+ do_popcnt : in std_ulogic;
+ is_32bit : in std_ulogic;
+ datalen : in std_ulogic_vector(3 downto 0);
+ result : out std_ulogic_vector(63 downto 0)
+ );
+end entity bit_counter;
+
+architecture behaviour of bit_counter is
+ -- signals for count-leading/trailing-zeroes
+ signal inp : std_ulogic_vector(63 downto 0);
+ signal sum : std_ulogic_vector(64 downto 0);
+ signal msb_r : std_ulogic;
+ signal onehot : std_ulogic_vector(63 downto 0);
+ signal onehot_r : std_ulogic_vector(63 downto 0);
+ signal bitnum : std_ulogic_vector(5 downto 0);
+ signal cntz : std_ulogic_vector(63 downto 0);
+
+ -- signals for popcnt
+ signal dlen_r : std_ulogic_vector(3 downto 0);
+ signal pcnt_r : std_ulogic;
+ subtype twobit is unsigned(1 downto 0);
+ type twobit32 is array(0 to 31) of twobit;
+ signal pc2 : twobit32;
+ subtype threebit is unsigned(2 downto 0);
+ type threebit16 is array(0 to 15) of threebit;
+ signal pc4 : threebit16;
+ subtype fourbit is unsigned(3 downto 0);
+ type fourbit8 is array(0 to 7) of fourbit;
+ signal pc8 : fourbit8;
+ signal pc8_r : fourbit8;
+ subtype sixbit is unsigned(5 downto 0);
+ type sixbit2 is array(0 to 1) of sixbit;
+ signal pc32 : sixbit2;
+ signal popcnt : std_ulogic_vector(63 downto 0);
+
+begin
+ countzero_r: process(clk)
+ begin
+ if rising_edge(clk) then
+ msb_r <= sum(64);
+ onehot_r <= onehot;
+ end if;
+ end process;
+
+ countzero: process(all)
+ begin
+ if is_32bit = '0' then
+ if count_right = '0' then
+ inp <= bit_reverse(rs);
+ else
+ inp <= rs;
+ end if;
+ else
+ inp(63 downto 32) <= x"FFFFFFFF";
+ if count_right = '0' then
+ inp(31 downto 0) <= bit_reverse(rs(31 downto 0));
+ else
+ inp(31 downto 0) <= rs(31 downto 0);
+ end if;
+ end if;
+
+ sum <= std_ulogic_vector(unsigned('0' & not inp) + 1);
+ onehot <= sum(63 downto 0) and inp;
+
+ -- The following occurs after a clock edge
+ bitnum <= bit_number(onehot_r);
+
+ cntz <= 57x"0" & msb_r & bitnum;
+ end process;
+
+ popcnt_r: process(clk)
+ begin
+ if rising_edge(clk) then
+ for i in 0 to 7 loop
+ pc8_r(i) <= pc8(i);
+ end loop;
+ dlen_r <= datalen;
+ pcnt_r <= do_popcnt;
+ end if;
+ end process;
+
+ popcnt_a: process(all)
+ begin
+ for i in 0 to 31 loop
+ pc2(i) <= unsigned("0" & rs(i * 2 downto i * 2)) + unsigned("0" & rs(i * 2 + 1 downto i * 2 + 1));
+ end loop;
+ for i in 0 to 15 loop
+ pc4(i) <= ('0' & pc2(i * 2)) + ('0' & pc2(i * 2 + 1));
+ end loop;
+ for i in 0 to 7 loop
+ pc8(i) <= ('0' & pc4(i * 2)) + ('0' & pc4(i * 2 + 1));
+ end loop;
+
+ -- after a clock edge
+ for i in 0 to 1 loop
+ pc32(i) <= ("00" & pc8_r(i * 4)) + ("00" & pc8_r(i * 4 + 1)) +
+ ("00" & pc8_r(i * 4 + 2)) + ("00" & pc8_r(i * 4 + 3));
+ end loop;
+
+ popcnt <= (others => '0');
+ if dlen_r(3 downto 2) = "00" then
+ -- popcntb
+ for i in 0 to 7 loop
+ popcnt(i * 8 + 3 downto i * 8) <= std_ulogic_vector(pc8_r(i));
+ end loop;
+ elsif dlen_r(3) = '0' then
+ -- popcntw
+ for i in 0 to 1 loop
+ popcnt(i * 32 + 5 downto i * 32) <= std_ulogic_vector(pc32(i));
+ end loop;
+ else
+ popcnt(6 downto 0) <= std_ulogic_vector(('0' & pc32(0)) + ('0' & pc32(1)));
+ end if;
+ end process;
+
+ result <= cntz when pcnt_r = '0' else popcnt;
+
+end behaviour;
--- /dev/null
+library vunit_lib;
+context vunit_lib.vunit_context;
+
+library ieee;
+use ieee.std_logic_1164.all;
+use ieee.numeric_std.all;
+
+library work;
+use work.common.all;
+
+library osvvm;
+use osvvm.RandomPkg.all;
+
+entity countbits_tb is
+ generic (runner_cfg : string := runner_cfg_default);
+end countbits_tb;
+
+architecture behave of countbits_tb is
+ constant clk_period: time := 10 ns;
+ signal rs: std_ulogic_vector(63 downto 0);
+ signal is_32bit, count_right: std_ulogic := '0';
+ signal res: std_ulogic_vector(63 downto 0);
+ signal clk: std_ulogic;
+
+begin
+ bitcounter_0: entity work.bit_counter
+ port map (
+ clk => clk,
+ rs => rs,
+ result => res,
+ count_right => count_right,
+ is_32bit => is_32bit,
+ do_popcnt => '0',
+ datalen => "0000"
+ );
+
+ clk_process: process
+ begin
+ clk <= '0';
+ wait for clk_period/2;
+ clk <= '1';
+ wait for clk_period/2;
+ end process;
+
+ stim_process: process
+ variable r: std_ulogic_vector(63 downto 0);
+ variable rnd : RandomPType;
+ begin
+ rnd.InitSeed(stim_process'path_name);
+
+ test_runner_setup(runner, runner_cfg);
+
+ while test_suite loop
+ if run("Test with input = 0") then
+ rs <= (others => '0');
+ is_32bit <= '0';
+ count_right <= '0';
+ wait for clk_period;
+ check_equal(res, 16#40#, result("for cntlzd"));
+ count_right <= '1';
+ wait for clk_period;
+ check_equal(res, 16#40#, result("for cnttzd"));
+ is_32bit <= '1';
+ count_right <= '0';
+ wait for clk_period;
+ check_equal(res, 16#20#, result("for cntlzw"));
+ count_right <= '1';
+ wait for clk_period;
+ check_equal(res, 16#20#, result("for cnttzw"));
+
+ elsif run("Test cntlzd/w") then
+ count_right <= '0';
+ for j in 0 to 100 loop
+ r := rnd.RandSlv(64);
+ r(63) := '1';
+ for i in 0 to 63 loop
+ rs <= r;
+ is_32bit <= '0';
+ wait for clk_period;
+ check_equal(res, i, result("for cntlzd " & to_hstring(rs)));
+ rs <= r(31 downto 0) & r(63 downto 32);
+ is_32bit <= '1';
+ wait for clk_period;
+ if i < 32 then
+ check_equal(res, i, result("for cntlzw " & to_hstring(rs)));
+ else
+ check_equal(res, 32, result("for cntlzw " & to_hstring(rs)));
+ end if;
+ r := '0' & r(63 downto 1);
+ end loop;
+ end loop;
+
+ elsif run("Test cnttzd/w") then
+ count_right <= '1';
+ for j in 0 to 100 loop
+ r := rnd.RandSlv(64);
+ r(0) := '1';
+ for i in 0 to 63 loop
+ rs <= r;
+ is_32bit <= '0';
+ wait for clk_period;
+ check_equal(res, i, result("for cnttzd " & to_hstring(rs)));
+ is_32bit <= '1';
+ wait for clk_period;
+ if i < 32 then
+ check_equal(res, i, result("for cnttzw " & to_hstring(rs)));
+ else
+ check_equal(res, 32, result("for cnttzw " & to_hstring(rs)));
+ end if;
+ r := r(62 downto 0) & '0';
+ end loop;
+ end loop;
+ end if;
+ end loop;
+
+ test_runner_cleanup(runner);
+ end process;
+end behave;
+++ /dev/null
-library ieee;
-use ieee.std_logic_1164.all;
-use ieee.numeric_std.all;
-
-library work;
-use work.helpers.all;
-
-entity zero_counter is
- port (
- clk : in std_logic;
- rs : in std_ulogic_vector(63 downto 0);
- count_right : in std_ulogic;
- is_32bit : in std_ulogic;
- result : out std_ulogic_vector(63 downto 0)
- );
-end entity zero_counter;
-
-architecture behaviour of zero_counter is
- signal inp : std_ulogic_vector(63 downto 0);
- signal sum : std_ulogic_vector(64 downto 0);
- signal msb_r : std_ulogic;
- signal onehot : std_ulogic_vector(63 downto 0);
- signal onehot_r : std_ulogic_vector(63 downto 0);
- signal bitnum : std_ulogic_vector(5 downto 0);
-
-begin
- countzero_r: process(clk)
- begin
- if rising_edge(clk) then
- msb_r <= sum(64);
- onehot_r <= onehot;
- end if;
- end process;
-
- countzero: process(all)
- begin
- if is_32bit = '0' then
- if count_right = '0' then
- inp <= bit_reverse(rs);
- else
- inp <= rs;
- end if;
- else
- inp(63 downto 32) <= x"FFFFFFFF";
- if count_right = '0' then
- inp(31 downto 0) <= bit_reverse(rs(31 downto 0));
- else
- inp(31 downto 0) <= rs(31 downto 0);
- end if;
- end if;
-
- sum <= std_ulogic_vector(unsigned('0' & not inp) + 1);
- onehot <= sum(63 downto 0) and inp;
-
- -- The following occurs after a clock edge
- bitnum <= bit_number(onehot_r);
-
- result <= x"00000000000000" & "0" & msb_r & bitnum;
- end process;
-end behaviour;
+++ /dev/null
-library vunit_lib;
-context vunit_lib.vunit_context;
-
-library ieee;
-use ieee.std_logic_1164.all;
-use ieee.numeric_std.all;
-
-library work;
-use work.common.all;
-
-library osvvm;
-use osvvm.RandomPkg.all;
-
-entity countzero_tb is
- generic (runner_cfg : string := runner_cfg_default);
-end countzero_tb;
-
-architecture behave of countzero_tb is
- constant clk_period: time := 10 ns;
- signal rs: std_ulogic_vector(63 downto 0);
- signal is_32bit, count_right: std_ulogic := '0';
- signal res: std_ulogic_vector(63 downto 0);
- signal clk: std_ulogic;
-
-begin
- zerocounter_0: entity work.zero_counter
- port map (
- clk => clk,
- rs => rs,
- result => res,
- count_right => count_right,
- is_32bit => is_32bit
- );
-
- clk_process: process
- begin
- clk <= '0';
- wait for clk_period/2;
- clk <= '1';
- wait for clk_period/2;
- end process;
-
- stim_process: process
- variable r: std_ulogic_vector(63 downto 0);
- variable rnd : RandomPType;
- begin
- rnd.InitSeed(stim_process'path_name);
-
- test_runner_setup(runner, runner_cfg);
-
- while test_suite loop
- if run("Test with input = 0") then
- rs <= (others => '0');
- is_32bit <= '0';
- count_right <= '0';
- wait for clk_period;
- check_equal(res, 16#40#, result("for cntlzd"));
- count_right <= '1';
- wait for clk_period;
- check_equal(res, 16#40#, result("for cnttzd"));
- is_32bit <= '1';
- count_right <= '0';
- wait for clk_period;
- check_equal(res, 16#20#, result("for cntlzw"));
- count_right <= '1';
- wait for clk_period;
- check_equal(res, 16#20#, result("for cnttzw"));
-
- elsif run("Test cntlzd/w") then
- count_right <= '0';
- for j in 0 to 100 loop
- r := rnd.RandSlv(64);
- r(63) := '1';
- for i in 0 to 63 loop
- rs <= r;
- is_32bit <= '0';
- wait for clk_period;
- check_equal(res, i, result("for cntlzd " & to_hstring(rs)));
- rs <= r(31 downto 0) & r(63 downto 32);
- is_32bit <= '1';
- wait for clk_period;
- if i < 32 then
- check_equal(res, i, result("for cntlzw " & to_hstring(rs)));
- else
- check_equal(res, 32, result("for cntlzw " & to_hstring(rs)));
- end if;
- r := '0' & r(63 downto 1);
- end loop;
- end loop;
-
- elsif run("Test cnttzd/w") then
- count_right <= '1';
- for j in 0 to 100 loop
- r := rnd.RandSlv(64);
- r(0) := '1';
- for i in 0 to 63 loop
- rs <= r;
- is_32bit <= '0';
- wait for clk_period;
- check_equal(res, i, result("for cnttzd " & to_hstring(rs)));
- is_32bit <= '1';
- wait for clk_period;
- if i < 32 then
- check_equal(res, i, result("for cnttzw " & to_hstring(rs)));
- else
- check_equal(res, 32, result("for cnttzw " & to_hstring(rs)));
- end if;
- r := r(62 downto 0) & '0';
- end loop;
- end loop;
- end if;
- end loop;
-
- test_runner_cleanup(runner);
- end process;
-end behave;
OP_AND => "001", -- logical_result
OP_OR => "001",
OP_XOR => "001",
- OP_POPCNT => "001",
OP_PRTY => "001",
OP_CMPB => "001",
OP_EXTS => "001",
OP_DIV => "011",
OP_DIVE => "011",
OP_MOD => "011",
- OP_CNTZ => "100", -- countzero_result
+ OP_CNTZ => "100", -- countbits_result
+ OP_POPCNT => "100",
OP_MFSPR => "101", -- spr_result
OP_B => "110", -- next_nia
OP_BC => "110",
signal rotator_result: std_ulogic_vector(63 downto 0);
signal rotator_carry: std_ulogic;
signal logical_result: std_ulogic_vector(63 downto 0);
- signal countzero_result: std_ulogic_vector(63 downto 0);
+ signal do_popcnt: std_ulogic;
+ signal countbits_result: std_ulogic_vector(63 downto 0);
signal alu_result: std_ulogic_vector(63 downto 0);
signal adder_result: std_ulogic_vector(63 downto 0);
signal misc_result: std_ulogic_vector(63 downto 0);
datalen => e_in.data_len
);
- countzero_0: entity work.zero_counter
+ countbits_0: entity work.bit_counter
port map (
clk => clk,
rs => c_in,
count_right => e_in.insn(10),
is_32bit => e_in.is_32bit,
- result => countzero_result
+ do_popcnt => do_popcnt,
+ datalen => e_in.data_len,
+ result => countbits_result
);
multiply_0: entity work.multiply
logical_result when "001",
rotator_result when "010",
muldiv_result when "011",
- countzero_result when "100",
+ countbits_result when "100",
spr_result when "101",
next_nia when "110",
misc_result when others;
rot_clear_right <= '1' when e_in.insn_type = OP_RLC or e_in.insn_type = OP_RLCR else '0';
rot_sign_ext <= '1' when e_in.insn_type = OP_EXTSWSLI else '0';
+ do_popcnt <= '1' when e_in.insn_type = OP_POPCNT else '0';
+
illegal := '0';
if r.intr_pending = '1' then
v.e.srr1 := r.e.srr1;
when OP_ADDG6S =>
when OP_CMPRB =>
when OP_CMPEQB =>
- when OP_AND | OP_OR | OP_XOR | OP_POPCNT | OP_PRTY | OP_CMPB | OP_EXTS |
+ when OP_AND | OP_OR | OP_XOR | OP_PRTY | OP_CMPB | OP_EXTS |
OP_BPERM | OP_BCD =>
when OP_B =>
end if;
do_trace := '0';
- when OP_CNTZ =>
+ when OP_CNTZ | OP_POPCNT =>
v.e.valid := '0';
v.cntz_in_progress := '1';
v.busy := '1';
-- valid_in = 0. Hence they don't happen in the same cycle as any of
-- the cases above which depend on valid_in = 1.
if r.cntz_in_progress = '1' then
- -- cnt[lt]z always takes two cycles
+ -- cnt[lt]z and popcnt* always take two cycles
v.e.valid := '1';
elsif r.mul_in_progress = '1' or r.div_in_progress = '1' then
if (r.mul_in_progress = '1' and multiply_to_x.valid = '1') or
architecture behaviour of logical is
- subtype twobit is unsigned(1 downto 0);
- type twobit32 is array(0 to 31) of twobit;
- signal pc2 : twobit32;
- subtype threebit is unsigned(2 downto 0);
- type threebit16 is array(0 to 15) of threebit;
- signal pc4 : threebit16;
- subtype fourbit is unsigned(3 downto 0);
- type fourbit8 is array(0 to 7) of fourbit;
- signal pc8 : fourbit8;
- subtype sixbit is unsigned(5 downto 0);
- type sixbit2 is array(0 to 1) of sixbit;
- signal pc32 : sixbit2;
signal par0, par1 : std_ulogic;
- signal popcnt : std_ulogic_vector(63 downto 0);
signal parity : std_ulogic_vector(63 downto 0);
signal permute : std_ulogic_vector(7 downto 0);
variable negative : std_ulogic;
variable j : integer;
begin
- -- population counts
- for i in 0 to 31 loop
- pc2(i) <= unsigned("0" & rs(i * 2 downto i * 2)) + unsigned("0" & rs(i * 2 + 1 downto i * 2 + 1));
- end loop;
- for i in 0 to 15 loop
- pc4(i) <= ('0' & pc2(i * 2)) + ('0' & pc2(i * 2 + 1));
- end loop;
- for i in 0 to 7 loop
- pc8(i) <= ('0' & pc4(i * 2)) + ('0' & pc4(i * 2 + 1));
- end loop;
- for i in 0 to 1 loop
- pc32(i) <= ("00" & pc8(i * 4)) + ("00" & pc8(i * 4 + 1)) +
- ("00" & pc8(i * 4 + 2)) + ("00" & pc8(i * 4 + 3));
- end loop;
- popcnt <= (others => '0');
- if datalen(3 downto 2) = "00" then
- -- popcntb
- for i in 0 to 7 loop
- popcnt(i * 8 + 3 downto i * 8) <= std_ulogic_vector(pc8(i));
- end loop;
- elsif datalen(3) = '0' then
- -- popcntw
- for i in 0 to 1 loop
- popcnt(i * 32 + 5 downto i * 32) <= std_ulogic_vector(pc32(i));
- end loop;
- else
- popcnt(6 downto 0) <= std_ulogic_vector(('0' & pc32(0)) + ('0' & pc32(1)));
- end if;
-
-- parity calculations
par0 <= rs(0) xor rs(8) xor rs(16) xor rs(24);
par1 <= rs(32) xor rs(40) xor rs(48) xor rs(56);
tmp := not tmp;
end if;
- when OP_POPCNT =>
- tmp := popcnt;
when OP_PRTY =>
tmp := parity;
when OP_CMPB =>
- ppc_fx_insns.vhdl
- sim_console.vhdl
- logical.vhdl
- - countzero.vhdl
+ - countbits.vhdl
- control.vhdl
- execute1.vhdl
- fpu.vhdl