README: hello world needs 16KB of RAM
[microwatt.git] / rotator.vhdl
1 library ieee;
2 use ieee.std_logic_1164.all;
3 use ieee.numeric_std.all;
4
5 library work;
6 use work.common.all;
7
8 entity rotator is
9 port (rs: in std_ulogic_vector(63 downto 0);
10 ra: in std_ulogic_vector(63 downto 0);
11 shift: in std_ulogic_vector(6 downto 0);
12 insn: in std_ulogic_vector(31 downto 0);
13 is_32bit: in std_ulogic;
14 right_shift: in std_ulogic;
15 arith: in std_ulogic;
16 clear_left: in std_ulogic;
17 clear_right: in std_ulogic;
18 result: out std_ulogic_vector(63 downto 0);
19 carry_out: out std_ulogic
20 );
21 end entity rotator;
22
23 architecture behaviour of rotator is
24 signal repl32: std_ulogic_vector(63 downto 0);
25 signal rot_count: std_ulogic_vector(5 downto 0);
26 signal rot1, rot2, rot: std_ulogic_vector(63 downto 0);
27 signal sh, mb, me: std_ulogic_vector(6 downto 0);
28 signal mr, ml: std_ulogic_vector(63 downto 0);
29 signal output_mode: std_ulogic_vector(1 downto 0);
30
31 -- note BE bit numbering
32 function right_mask(mask_begin: std_ulogic_vector(6 downto 0)) return std_ulogic_vector is
33 variable ret: std_ulogic_vector(63 downto 0);
34 begin
35 ret := (others => '0');
36 for i in 0 to 63 loop
37 if i >= to_integer(unsigned(mask_begin)) then
38 ret(63 - i) := '1';
39 end if;
40 end loop;
41 return ret;
42 end;
43
44 function left_mask(mask_end: std_ulogic_vector(6 downto 0)) return std_ulogic_vector is
45 variable ret: std_ulogic_vector(63 downto 0);
46 begin
47 ret := (others => '0');
48 if mask_end(6) = '0' then
49 for i in 0 to 63 loop
50 if i <= to_integer(unsigned(mask_end)) then
51 ret(63 - i) := '1';
52 end if;
53 end loop;
54 end if;
55 return ret;
56 end;
57
58 begin
59 rotator_0: process(all)
60 begin
61 -- First replicate bottom 32 bits to both halves if 32-bit
62 if is_32bit = '1' then
63 repl32 <= rs(31 downto 0) & rs(31 downto 0);
64 else
65 repl32 <= rs;
66 end if;
67
68 -- Negate shift count for right shifts
69 if right_shift = '1' then
70 rot_count <= std_ulogic_vector(- signed(shift(5 downto 0)));
71 else
72 rot_count <= shift(5 downto 0);
73 end if;
74
75 -- Rotator works in 3 stages using 2 bits of the rotate count each
76 -- time. This gives 4:1 multiplexors which is ideal for the 6-input
77 -- LUTs in the Xilinx Artix 7.
78 -- We look at the low bits of the rotate count first because they will
79 -- have less delay through the negation above.
80 -- First rotate by 0, 1, 2, or 3
81 case rot_count(1 downto 0) is
82 when "00" =>
83 rot1 <= repl32;
84 when "01" =>
85 rot1 <= repl32(62 downto 0) & repl32(63);
86 when "10" =>
87 rot1 <= repl32(61 downto 0) & repl32(63 downto 62);
88 when others =>
89 rot1 <= repl32(60 downto 0) & repl32(63 downto 61);
90 end case;
91 -- Next rotate by 0, 4, 8 or 12
92 case rot_count(3 downto 2) is
93 when "00" =>
94 rot2 <= rot1;
95 when "01" =>
96 rot2 <= rot1(59 downto 0) & rot1(63 downto 60);
97 when "10" =>
98 rot2 <= rot1(55 downto 0) & rot1(63 downto 56);
99 when others =>
100 rot2 <= rot1(51 downto 0) & rot1(63 downto 52);
101 end case;
102 -- Lastly rotate by 0, 16, 32 or 48
103 case rot_count(5 downto 4) is
104 when "00" =>
105 rot <= rot2;
106 when "01" =>
107 rot <= rot2(47 downto 0) & rot2(63 downto 48);
108 when "10" =>
109 rot <= rot2(31 downto 0) & rot2(63 downto 32);
110 when others =>
111 rot <= rot2(15 downto 0) & rot2(63 downto 16);
112 end case;
113
114 -- Trim shift count to 6 bits for 32-bit shifts
115 sh <= (shift(6) and not is_32bit) & shift(5 downto 0);
116
117 -- Work out mask begin/end indexes (caution, big-endian bit numbering)
118 if clear_left = '1' then
119 if is_32bit = '1' then
120 mb <= "01" & insn(10 downto 6);
121 else
122 mb <= "0" & insn(5) & insn(10 downto 6);
123 end if;
124 elsif right_shift = '1' then
125 -- this is basically mb <= sh + (is_32bit? 32: 0);
126 if is_32bit = '1' then
127 mb <= sh(5) & not sh(5) & sh(4 downto 0);
128 else
129 mb <= sh;
130 end if;
131 else
132 mb <= ('0' & is_32bit & "00000");
133 end if;
134 if clear_right = '1' and is_32bit = '1' then
135 me <= "01" & insn(5 downto 1);
136 elsif clear_right = '1' and clear_left = '0' then
137 me <= "0" & insn(5) & insn(10 downto 6);
138 else
139 -- effectively, 63 - sh
140 me <= sh(6) & not sh(5 downto 0);
141 end if;
142
143 -- Calculate left and right masks
144 mr <= right_mask(mb);
145 ml <= left_mask(me);
146
147 -- Work out output mode
148 -- 00 for sl[wd]
149 -- 0w for rlw*, rldic, rldicr, rldimi, where w = 1 iff mb > me
150 -- 10 for rldicl, sr[wd]
151 -- 1z for sra[wd][i], z = 1 if rs is negative
152 if (clear_left = '1' and clear_right = '0') or right_shift = '1' then
153 output_mode(1) <= '1';
154 output_mode(0) <= arith and repl32(63);
155 else
156 output_mode(1) <= '0';
157 if clear_right = '1' and unsigned(mb(5 downto 0)) > unsigned(me(5 downto 0)) then
158 output_mode(0) <= '1';
159 else
160 output_mode(0) <= '0';
161 end if;
162 end if;
163
164 -- Generate output from rotated input and masks
165 case output_mode is
166 when "00" =>
167 result <= (rot and (mr and ml)) or (ra and not (mr and ml));
168 when "01" =>
169 result <= (rot and (mr or ml)) or (ra and not (mr or ml));
170 when "10" =>
171 result <= rot and mr;
172 when others =>
173 result <= rot or not mr;
174 end case;
175
176 -- Generate carry output for arithmetic shift right of negative value
177 if output_mode = "11" then
178 carry_out <= or (rs and not ml);
179 else
180 carry_out <= '0';
181 end if;
182 end process;
183 end behaviour;