type state_t is (IDLE,
DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF,
- DO_FMR, DO_FMRG,
+ DO_FMR, DO_FMRG, DO_FCMP, DO_FTDIV, DO_FTSQRT,
DO_FCFID, DO_FCTI,
DO_FRSP, DO_FRI,
- DO_FADD, DO_FMUL, DO_FDIV,
+ DO_FADD, DO_FMUL, DO_FDIV, DO_FSQRT, DO_FMADD,
+ DO_FRE, DO_FRSQRTE,
+ DO_FSEL,
FRI_1,
- ADD_SHIFT, ADD_2, ADD_3,
+ ADD_1, ADD_SHIFT, ADD_2, ADD_3,
+ CMP_1, CMP_2,
MULT_1,
+ FMADD_1, FMADD_2, FMADD_3,
+ FMADD_4, FMADD_5, FMADD_6,
LOOKUP,
DIV_2, DIV_3, DIV_4, DIV_5, DIV_6,
+ FRE_1,
+ RSQRT_1,
+ FTDIV_1,
+ SQRT_1, SQRT_2, SQRT_3, SQRT_4,
+ SQRT_5, SQRT_6, SQRT_7, SQRT_8,
+ SQRT_9, SQRT_10, SQRT_11, SQRT_12,
INT_SHIFT, INT_ROUND, INT_ISHIFT,
INT_FINAL, INT_CHECK, INT_OFLOW,
FINISH, NORMALIZE,
DENORM,
RENORM_A, RENORM_A2,
RENORM_B, RENORM_B2,
- RENORM_C, RENORM_C2);
+ RENORM_C, RENORM_C2,
+ NAN_RESULT, EXC_RESULT);
type reg_type is record
state : state_t;
b : fpu_reg_type;
c : fpu_reg_type;
r : std_ulogic_vector(63 downto 0); -- 10.54 format
+ s : std_ulogic_vector(55 downto 0); -- extended fraction
x : std_ulogic;
p : std_ulogic_vector(63 downto 0); -- 8.56 format
y : std_ulogic_vector(63 downto 0); -- 8.56 format
round_mode : std_ulogic_vector(2 downto 0);
is_subtract : std_ulogic;
exp_cmp : std_ulogic;
+ madd_cmp : std_ulogic;
add_bsmall : std_ulogic;
is_multiply : std_ulogic;
+ is_sqrt : std_ulogic;
first : std_ulogic;
count : unsigned(1 downto 0);
+ doing_ftdiv : std_ulogic_vector(1 downto 0);
+ opsel_a : std_ulogic_vector(1 downto 0);
+ use_a : std_ulogic;
+ use_b : std_ulogic;
+ use_c : std_ulogic;
+ invalid : std_ulogic;
+ negate : std_ulogic;
+ longmask : std_ulogic;
end record;
- type lookup_table is array(0 to 255) of std_ulogic_vector(17 downto 0);
+ type lookup_table is array(0 to 1023) of std_ulogic_vector(17 downto 0);
signal r, rin : reg_type;
signal fp_result : std_ulogic_vector(63 downto 0);
- signal opsel_a : std_ulogic_vector(1 downto 0);
signal opsel_b : std_ulogic_vector(1 downto 0);
signal opsel_r : std_ulogic_vector(1 downto 0);
+ signal opsel_s : std_ulogic_vector(1 downto 0);
signal opsel_ainv : std_ulogic;
- signal opsel_amask : std_ulogic;
+ signal opsel_mask : std_ulogic;
signal opsel_binv : std_ulogic;
signal in_a : std_ulogic_vector(63 downto 0);
signal in_b : std_ulogic_vector(63 downto 0);
signal lost_bits : std_ulogic;
signal r_hi_nz : std_ulogic;
signal r_lo_nz : std_ulogic;
+ signal s_nz : std_ulogic;
signal misc_sel : std_ulogic_vector(3 downto 0);
signal f_to_multiply : MultiplyInputType;
signal multiply_to_f : MultiplyOutputType;
constant BIN_ZERO : std_ulogic_vector(1 downto 0) := "00";
constant BIN_R : std_ulogic_vector(1 downto 0) := "01";
- constant BIN_MASK : std_ulogic_vector(1 downto 0) := "10";
+ constant BIN_RND : std_ulogic_vector(1 downto 0) := "10";
+ constant BIN_PS6 : std_ulogic_vector(1 downto 0) := "11";
constant RES_SUM : std_ulogic_vector(1 downto 0) := "00";
constant RES_SHIFT : std_ulogic_vector(1 downto 0) := "01";
constant RES_MULT : std_ulogic_vector(1 downto 0) := "10";
constant RES_MISC : std_ulogic_vector(1 downto 0) := "11";
+ constant S_ZERO : std_ulogic_vector(1 downto 0) := "00";
+ constant S_NEG : std_ulogic_vector(1 downto 0) := "01";
+ constant S_SHIFT : std_ulogic_vector(1 downto 0) := "10";
+ constant S_MULT : std_ulogic_vector(1 downto 0) := "11";
+
-- msel values
constant MUL1_A : std_ulogic_vector(1 downto 0) := "00";
constant MUL1_B : std_ulogic_vector(1 downto 0) := "01";
constant MUL2_P : std_ulogic_vector(1 downto 0) := "10";
constant MUL2_R : std_ulogic_vector(1 downto 0) := "11";
- constant MULADD_ZERO : std_ulogic_vector(1 downto 0) := "00";
+ constant MULADD_ZERO : std_ulogic_vector(1 downto 0) := "00";
constant MULADD_CONST : std_ulogic_vector(1 downto 0) := "01";
constant MULADD_A : std_ulogic_vector(1 downto 0) := "10";
+ constant MULADD_RS : std_ulogic_vector(1 downto 0) := "11";
-- Inverse lookup table, indexed by the top 8 fraction bits
+ -- The first 256 entries are the reciprocal (1/x) lookup table,
+ -- and the remaining 768 entries are the reciprocal square root table.
-- Output range is [0.5, 1) in 0.19 format, though the top
-- bit isn't stored since it is always 1.
-- Each output value is the inverse of the center of the input
18x"04321", 18x"040dd", 18x"03e9b", 18x"03c5c", 18x"03a1f", 18x"037e4", 18x"035ac", 18x"03376",
18x"03142", 18x"02f11", 18x"02ce2", 18x"02ab5", 18x"0288b", 18x"02663", 18x"0243d", 18x"02219",
18x"01ff7", 18x"01dd8", 18x"01bbb", 18x"019a0", 18x"01787", 18x"01570", 18x"0135b", 18x"01149",
- 18x"00f39", 18x"00d2a", 18x"00b1e", 18x"00914", 18x"0070c", 18x"00506", 18x"00302", 18x"00100"
+ 18x"00f39", 18x"00d2a", 18x"00b1e", 18x"00914", 18x"0070c", 18x"00506", 18x"00302", 18x"00100",
+ -- 1/sqrt(x) lookup table
+ -- Input is in the range [1, 4), i.e. two bits to the left of the
+ -- binary point. Those 2 bits index the following 3 blocks of 256 values.
+ -- 1.0 ... 1.9999
+ 18x"3fe00", 18x"3fa06", 18x"3f612", 18x"3f224", 18x"3ee3a", 18x"3ea58", 18x"3e67c", 18x"3e2a4",
+ 18x"3ded2", 18x"3db06", 18x"3d73e", 18x"3d37e", 18x"3cfc2", 18x"3cc0a", 18x"3c85a", 18x"3c4ae",
+ 18x"3c106", 18x"3bd64", 18x"3b9c8", 18x"3b630", 18x"3b29e", 18x"3af10", 18x"3ab86", 18x"3a802",
+ 18x"3a484", 18x"3a108", 18x"39d94", 18x"39a22", 18x"396b6", 18x"3934e", 18x"38fea", 18x"38c8c",
+ 18x"38932", 18x"385dc", 18x"3828a", 18x"37f3e", 18x"37bf6", 18x"378b2", 18x"37572", 18x"37236",
+ 18x"36efe", 18x"36bca", 18x"3689a", 18x"36570", 18x"36248", 18x"35f26", 18x"35c06", 18x"358ea",
+ 18x"355d4", 18x"352c0", 18x"34fb0", 18x"34ca4", 18x"3499c", 18x"34698", 18x"34398", 18x"3409c",
+ 18x"33da2", 18x"33aac", 18x"337bc", 18x"334cc", 18x"331e2", 18x"32efc", 18x"32c18", 18x"32938",
+ 18x"3265a", 18x"32382", 18x"320ac", 18x"31dd8", 18x"31b0a", 18x"3183e", 18x"31576", 18x"312b0",
+ 18x"30fee", 18x"30d2e", 18x"30a74", 18x"307ba", 18x"30506", 18x"30254", 18x"2ffa4", 18x"2fcf8",
+ 18x"2fa4e", 18x"2f7a8", 18x"2f506", 18x"2f266", 18x"2efca", 18x"2ed2e", 18x"2ea98", 18x"2e804",
+ 18x"2e572", 18x"2e2e4", 18x"2e058", 18x"2ddce", 18x"2db48", 18x"2d8c6", 18x"2d646", 18x"2d3c8",
+ 18x"2d14c", 18x"2ced4", 18x"2cc5e", 18x"2c9ea", 18x"2c77a", 18x"2c50c", 18x"2c2a2", 18x"2c038",
+ 18x"2bdd2", 18x"2bb70", 18x"2b90e", 18x"2b6b0", 18x"2b454", 18x"2b1fa", 18x"2afa4", 18x"2ad4e",
+ 18x"2aafc", 18x"2a8ac", 18x"2a660", 18x"2a414", 18x"2a1cc", 18x"29f86", 18x"29d42", 18x"29b00",
+ 18x"298c2", 18x"29684", 18x"2944a", 18x"29210", 18x"28fda", 18x"28da6", 18x"28b74", 18x"28946",
+ 18x"28718", 18x"284ec", 18x"282c4", 18x"2809c", 18x"27e78", 18x"27c56", 18x"27a34", 18x"27816",
+ 18x"275fa", 18x"273e0", 18x"271c8", 18x"26fb0", 18x"26d9c", 18x"26b8a", 18x"2697a", 18x"2676c",
+ 18x"26560", 18x"26356", 18x"2614c", 18x"25f46", 18x"25d42", 18x"25b40", 18x"2593e", 18x"25740",
+ 18x"25542", 18x"25348", 18x"2514e", 18x"24f58", 18x"24d62", 18x"24b6e", 18x"2497c", 18x"2478c",
+ 18x"2459e", 18x"243b0", 18x"241c6", 18x"23fde", 18x"23df6", 18x"23c10", 18x"23a2c", 18x"2384a",
+ 18x"2366a", 18x"2348c", 18x"232ae", 18x"230d2", 18x"22efa", 18x"22d20", 18x"22b4a", 18x"22976",
+ 18x"227a2", 18x"225d2", 18x"22402", 18x"22234", 18x"22066", 18x"21e9c", 18x"21cd2", 18x"21b0a",
+ 18x"21944", 18x"2177e", 18x"215ba", 18x"213fa", 18x"21238", 18x"2107a", 18x"20ebc", 18x"20d00",
+ 18x"20b46", 18x"2098e", 18x"207d6", 18x"20620", 18x"2046c", 18x"202b8", 18x"20108", 18x"1ff58",
+ 18x"1fda8", 18x"1fbfc", 18x"1fa50", 18x"1f8a4", 18x"1f6fc", 18x"1f554", 18x"1f3ae", 18x"1f208",
+ 18x"1f064", 18x"1eec2", 18x"1ed22", 18x"1eb82", 18x"1e9e4", 18x"1e846", 18x"1e6aa", 18x"1e510",
+ 18x"1e378", 18x"1e1e0", 18x"1e04a", 18x"1deb4", 18x"1dd20", 18x"1db8e", 18x"1d9fc", 18x"1d86c",
+ 18x"1d6de", 18x"1d550", 18x"1d3c4", 18x"1d238", 18x"1d0ae", 18x"1cf26", 18x"1cd9e", 18x"1cc18",
+ 18x"1ca94", 18x"1c910", 18x"1c78c", 18x"1c60a", 18x"1c48a", 18x"1c30c", 18x"1c18e", 18x"1c010",
+ 18x"1be94", 18x"1bd1a", 18x"1bba0", 18x"1ba28", 18x"1b8b2", 18x"1b73c", 18x"1b5c6", 18x"1b452",
+ 18x"1b2e0", 18x"1b16e", 18x"1affe", 18x"1ae8e", 18x"1ad20", 18x"1abb4", 18x"1aa46", 18x"1a8dc",
+ -- 2.0 ... 2.9999
+ 18x"1a772", 18x"1a608", 18x"1a4a0", 18x"1a33a", 18x"1a1d4", 18x"1a070", 18x"19f0c", 18x"19da8",
+ 18x"19c48", 18x"19ae6", 18x"19986", 18x"19828", 18x"196ca", 18x"1956e", 18x"19412", 18x"192b8",
+ 18x"1915e", 18x"19004", 18x"18eae", 18x"18d56", 18x"18c00", 18x"18aac", 18x"18958", 18x"18804",
+ 18x"186b2", 18x"18562", 18x"18412", 18x"182c2", 18x"18174", 18x"18026", 18x"17eda", 18x"17d8e",
+ 18x"17c44", 18x"17afa", 18x"179b2", 18x"1786a", 18x"17724", 18x"175de", 18x"17498", 18x"17354",
+ 18x"17210", 18x"170ce", 18x"16f8c", 18x"16e4c", 18x"16d0c", 18x"16bcc", 18x"16a8e", 18x"16950",
+ 18x"16814", 18x"166d8", 18x"1659e", 18x"16464", 18x"1632a", 18x"161f2", 18x"160ba", 18x"15f84",
+ 18x"15e4e", 18x"15d1a", 18x"15be6", 18x"15ab2", 18x"15980", 18x"1584e", 18x"1571c", 18x"155ec",
+ 18x"154bc", 18x"1538e", 18x"15260", 18x"15134", 18x"15006", 18x"14edc", 18x"14db0", 18x"14c86",
+ 18x"14b5e", 18x"14a36", 18x"1490e", 18x"147e6", 18x"146c0", 18x"1459a", 18x"14476", 18x"14352",
+ 18x"14230", 18x"1410c", 18x"13fea", 18x"13eca", 18x"13daa", 18x"13c8a", 18x"13b6c", 18x"13a4e",
+ 18x"13930", 18x"13814", 18x"136f8", 18x"135dc", 18x"134c2", 18x"133a8", 18x"1328e", 18x"13176",
+ 18x"1305e", 18x"12f48", 18x"12e30", 18x"12d1a", 18x"12c06", 18x"12af2", 18x"129de", 18x"128ca",
+ 18x"127b8", 18x"126a6", 18x"12596", 18x"12486", 18x"12376", 18x"12266", 18x"12158", 18x"1204a",
+ 18x"11f3e", 18x"11e32", 18x"11d26", 18x"11c1a", 18x"11b10", 18x"11a06", 18x"118fc", 18x"117f4",
+ 18x"116ec", 18x"115e4", 18x"114de", 18x"113d8", 18x"112d2", 18x"111ce", 18x"110ca", 18x"10fc6",
+ 18x"10ec2", 18x"10dc0", 18x"10cbe", 18x"10bbc", 18x"10abc", 18x"109bc", 18x"108bc", 18x"107be",
+ 18x"106c0", 18x"105c2", 18x"104c4", 18x"103c8", 18x"102cc", 18x"101d0", 18x"100d6", 18x"0ffdc",
+ 18x"0fee2", 18x"0fdea", 18x"0fcf0", 18x"0fbf8", 18x"0fb02", 18x"0fa0a", 18x"0f914", 18x"0f81e",
+ 18x"0f72a", 18x"0f636", 18x"0f542", 18x"0f44e", 18x"0f35a", 18x"0f268", 18x"0f176", 18x"0f086",
+ 18x"0ef94", 18x"0eea4", 18x"0edb4", 18x"0ecc6", 18x"0ebd6", 18x"0eae8", 18x"0e9fa", 18x"0e90e",
+ 18x"0e822", 18x"0e736", 18x"0e64a", 18x"0e55e", 18x"0e474", 18x"0e38a", 18x"0e2a0", 18x"0e1b8",
+ 18x"0e0d0", 18x"0dfe8", 18x"0df00", 18x"0de1a", 18x"0dd32", 18x"0dc4c", 18x"0db68", 18x"0da82",
+ 18x"0d99e", 18x"0d8ba", 18x"0d7d6", 18x"0d6f4", 18x"0d612", 18x"0d530", 18x"0d44e", 18x"0d36c",
+ 18x"0d28c", 18x"0d1ac", 18x"0d0cc", 18x"0cfee", 18x"0cf0e", 18x"0ce30", 18x"0cd54", 18x"0cc76",
+ 18x"0cb9a", 18x"0cabc", 18x"0c9e0", 18x"0c906", 18x"0c82a", 18x"0c750", 18x"0c676", 18x"0c59c",
+ 18x"0c4c4", 18x"0c3ea", 18x"0c312", 18x"0c23a", 18x"0c164", 18x"0c08c", 18x"0bfb6", 18x"0bee0",
+ 18x"0be0a", 18x"0bd36", 18x"0bc62", 18x"0bb8c", 18x"0baba", 18x"0b9e6", 18x"0b912", 18x"0b840",
+ 18x"0b76e", 18x"0b69c", 18x"0b5cc", 18x"0b4fa", 18x"0b42a", 18x"0b35a", 18x"0b28a", 18x"0b1bc",
+ 18x"0b0ee", 18x"0b01e", 18x"0af50", 18x"0ae84", 18x"0adb6", 18x"0acea", 18x"0ac1e", 18x"0ab52",
+ 18x"0aa86", 18x"0a9bc", 18x"0a8f0", 18x"0a826", 18x"0a75c", 18x"0a694", 18x"0a5ca", 18x"0a502",
+ 18x"0a43a", 18x"0a372", 18x"0a2aa", 18x"0a1e4", 18x"0a11c", 18x"0a056", 18x"09f90", 18x"09ecc",
+ -- 3.0 ... 3.9999
+ 18x"09e06", 18x"09d42", 18x"09c7e", 18x"09bba", 18x"09af6", 18x"09a32", 18x"09970", 18x"098ae",
+ 18x"097ec", 18x"0972a", 18x"09668", 18x"095a8", 18x"094e8", 18x"09426", 18x"09368", 18x"092a8",
+ 18x"091e8", 18x"0912a", 18x"0906c", 18x"08fae", 18x"08ef0", 18x"08e32", 18x"08d76", 18x"08cba",
+ 18x"08bfe", 18x"08b42", 18x"08a86", 18x"089ca", 18x"08910", 18x"08856", 18x"0879c", 18x"086e2",
+ 18x"08628", 18x"08570", 18x"084b6", 18x"083fe", 18x"08346", 18x"0828e", 18x"081d8", 18x"08120",
+ 18x"0806a", 18x"07fb4", 18x"07efe", 18x"07e48", 18x"07d92", 18x"07cde", 18x"07c2a", 18x"07b76",
+ 18x"07ac2", 18x"07a0e", 18x"0795a", 18x"078a8", 18x"077f4", 18x"07742", 18x"07690", 18x"075de",
+ 18x"0752e", 18x"0747c", 18x"073cc", 18x"0731c", 18x"0726c", 18x"071bc", 18x"0710c", 18x"0705e",
+ 18x"06fae", 18x"06f00", 18x"06e52", 18x"06da4", 18x"06cf6", 18x"06c4a", 18x"06b9c", 18x"06af0",
+ 18x"06a44", 18x"06998", 18x"068ec", 18x"06840", 18x"06796", 18x"066ea", 18x"06640", 18x"06596",
+ 18x"064ec", 18x"06442", 18x"0639a", 18x"062f0", 18x"06248", 18x"061a0", 18x"060f8", 18x"06050",
+ 18x"05fa8", 18x"05f00", 18x"05e5a", 18x"05db4", 18x"05d0e", 18x"05c68", 18x"05bc2", 18x"05b1c",
+ 18x"05a76", 18x"059d2", 18x"0592e", 18x"05888", 18x"057e4", 18x"05742", 18x"0569e", 18x"055fa",
+ 18x"05558", 18x"054b6", 18x"05412", 18x"05370", 18x"052ce", 18x"0522e", 18x"0518c", 18x"050ec",
+ 18x"0504a", 18x"04faa", 18x"04f0a", 18x"04e6a", 18x"04dca", 18x"04d2c", 18x"04c8c", 18x"04bee",
+ 18x"04b50", 18x"04ab0", 18x"04a12", 18x"04976", 18x"048d8", 18x"0483a", 18x"0479e", 18x"04700",
+ 18x"04664", 18x"045c8", 18x"0452c", 18x"04490", 18x"043f6", 18x"0435a", 18x"042c0", 18x"04226",
+ 18x"0418a", 18x"040f0", 18x"04056", 18x"03fbe", 18x"03f24", 18x"03e8c", 18x"03df2", 18x"03d5a",
+ 18x"03cc2", 18x"03c2a", 18x"03b92", 18x"03afa", 18x"03a62", 18x"039cc", 18x"03934", 18x"0389e",
+ 18x"03808", 18x"03772", 18x"036dc", 18x"03646", 18x"035b2", 18x"0351c", 18x"03488", 18x"033f2",
+ 18x"0335e", 18x"032ca", 18x"03236", 18x"031a2", 18x"03110", 18x"0307c", 18x"02fea", 18x"02f56",
+ 18x"02ec4", 18x"02e32", 18x"02da0", 18x"02d0e", 18x"02c7c", 18x"02bec", 18x"02b5a", 18x"02aca",
+ 18x"02a38", 18x"029a8", 18x"02918", 18x"02888", 18x"027f8", 18x"0276a", 18x"026da", 18x"0264a",
+ 18x"025bc", 18x"0252e", 18x"024a0", 18x"02410", 18x"02384", 18x"022f6", 18x"02268", 18x"021da",
+ 18x"0214e", 18x"020c0", 18x"02034", 18x"01fa8", 18x"01f1c", 18x"01e90", 18x"01e04", 18x"01d78",
+ 18x"01cee", 18x"01c62", 18x"01bd8", 18x"01b4c", 18x"01ac2", 18x"01a38", 18x"019ae", 18x"01924",
+ 18x"0189c", 18x"01812", 18x"01788", 18x"01700", 18x"01676", 18x"015ee", 18x"01566", 18x"014de",
+ 18x"01456", 18x"013ce", 18x"01346", 18x"012c0", 18x"01238", 18x"011b2", 18x"0112c", 18x"010a4",
+ 18x"0101e", 18x"00f98", 18x"00f12", 18x"00e8c", 18x"00e08", 18x"00d82", 18x"00cfe", 18x"00c78",
+ 18x"00bf4", 18x"00b70", 18x"00aec", 18x"00a68", 18x"009e4", 18x"00960", 18x"008dc", 18x"00858",
+ 18x"007d6", 18x"00752", 18x"006d0", 18x"0064e", 18x"005cc", 18x"0054a", 18x"004c8", 18x"00446",
+ 18x"003c4", 18x"00342", 18x"002c2", 18x"00240", 18x"001c0", 18x"00140", 18x"000c0", 18x"00040"
);
-- Left and right shifter with 120 bit input and 64 bit output.
-- synchronous reads from lookup table
lut_access: process(clk)
+ variable addrhi : std_ulogic_vector(1 downto 0);
+ variable addr : std_ulogic_vector(9 downto 0);
begin
if rising_edge(clk) then
- inverse_est <= '1' & inverse_table(to_integer(unsigned(r.b.mantissa(53 downto 46))));
+ if r.is_sqrt = '1' then
+ addrhi := r.b.mantissa(55 downto 54);
+ else
+ addrhi := "00";
+ end if;
+ addr := addrhi & r.b.mantissa(53 downto 46);
+ inverse_est <= '1' & inverse_table(to_integer(unsigned(addr)));
end if;
end process;
variable need_check : std_ulogic;
variable msb : std_ulogic;
variable is_add : std_ulogic;
- variable qnan_result : std_ulogic;
- variable longmask : std_ulogic;
variable set_a : std_ulogic;
variable set_b : std_ulogic;
variable set_c : std_ulogic;
- variable px_nz : std_ulogic;
- variable maddend : std_ulogic_vector(127 downto 0);
variable set_y : std_ulogic;
+ variable set_s : std_ulogic;
+ variable qnan_result : std_ulogic;
+ variable px_nz : std_ulogic;
variable pcmpb_eq : std_ulogic;
variable pcmpb_lt : std_ulogic;
variable pshift : std_ulogic;
+ variable renorm_sqrt : std_ulogic;
+ variable sqrt_exp : signed(EXP_BITS-1 downto 0);
+ variable shiftin : std_ulogic;
+ variable mulexp : signed(EXP_BITS-1 downto 0);
+ variable maddend : std_ulogic_vector(127 downto 0);
+ variable sum : std_ulogic_vector(63 downto 0);
+ variable round_inc : std_ulogic_vector(63 downto 0);
begin
v := r;
illegal := '0';
v.fe_mode := or (e_in.fe_mode);
v.dest_fpr := e_in.frt;
v.single_prec := e_in.single;
+ v.longmask := e_in.single;
v.int_result := '0';
v.rc := e_in.rc;
v.is_cmp := e_in.out_cr;
v.round_mode := '0' & r.fpscr(FPSCR_RN+1 downto FPSCR_RN);
v.is_subtract := '0';
v.is_multiply := '0';
+ v.is_sqrt := '0';
v.add_bsmall := '0';
+ v.doing_ftdiv := "00";
+
adec := decode_dp(e_in.fra, int_input);
bdec := decode_dp(e_in.frb, int_input);
cdec := decode_dp(e_in.frc, int_input);
if adec.exponent > bdec.exponent then
v.exp_cmp := '1';
end if;
+ v.madd_cmp := '0';
+ if (adec.exponent + cdec.exponent + 1) >= bdec.exponent then
+ v.madd_cmp := '1';
+ end if;
end if;
r_hi_nz <= or (r.r(55 downto 31));
r_lo_nz <= or (r.r(30 downto 2));
+ s_nz <= or (r.s);
if r.single_prec = '0' then
- max_exp := to_signed(1023, EXP_BITS);
- min_exp := to_signed(-1022, EXP_BITS);
+ if r.doing_ftdiv(1) = '0' then
+ max_exp := to_signed(1023, EXP_BITS);
+ else
+ max_exp := to_signed(1020, EXP_BITS);
+ end if;
+ if r.doing_ftdiv(0) = '0' then
+ min_exp := to_signed(-1022, EXP_BITS);
+ else
+ min_exp := to_signed(-1021, EXP_BITS);
+ end if;
bias_exp := to_signed(1536, EXP_BITS);
else
max_exp := to_signed(127, EXP_BITS);
v.update_fprf := '0';
v.shift := to_signed(0, EXP_BITS);
v.first := '0';
- opsel_a <= AIN_R;
+ v.opsel_a := AIN_R;
opsel_ainv <= '0';
- opsel_amask <= '0';
+ opsel_mask <= '0';
opsel_b <= BIN_ZERO;
opsel_binv <= '0';
opsel_r <= RES_SUM;
+ opsel_s <= S_ZERO;
carry_in <= '0';
misc_sel <= "0000";
fpscr_mask := (others => '1');
renormalize := '0';
set_x := '0';
qnan_result := '0';
- longmask := r.single_prec;
set_a := '0';
set_b := '0';
set_c := '0';
+ set_s := '0';
f_to_multiply.is_32bit <= '0';
f_to_multiply.valid <= '0';
msel_1 <= MUL1_A;
msel_inv <= '0';
set_y := '0';
pshift := '0';
+ renorm_sqrt := '0';
+ shiftin := '0';
case r.state is
when IDLE =>
+ v.use_a := '0';
+ v.use_b := '0';
+ v.use_c := '0';
+ v.invalid := '0';
+ v.negate := '0';
if e_in.valid = '1' then
case e_in.insn(5 downto 1) is
when "00000" =>
- v.state := DO_MCRFS;
+ if e_in.insn(8) = '1' then
+ if e_in.insn(6) = '0' then
+ v.state := DO_FTDIV;
+ else
+ v.state := DO_FTSQRT;
+ end if;
+ elsif e_in.insn(7) = '1' then
+ v.state := DO_MCRFS;
+ else
+ v.opsel_a := AIN_B;
+ v.state := DO_FCMP;
+ end if;
when "00110" =>
if e_in.insn(10) = '0' then
if e_in.insn(8) = '0' then
v.state := DO_MTFSF;
end if;
when "01000" =>
+ v.opsel_a := AIN_B;
if e_in.insn(9 downto 8) /= "11" then
v.state := DO_FMR;
else
v.state := DO_FRI;
end if;
when "01100" =>
+ v.opsel_a := AIN_B;
v.state := DO_FRSP;
when "01110" =>
+ v.opsel_a := AIN_B;
if int_input = '1' then
-- fcfid[u][s]
v.state := DO_FCFID;
end if;
when "01111" =>
v.round_mode := "001";
+ v.opsel_a := AIN_B;
v.state := DO_FCTI;
when "10010" =>
+ v.opsel_a := AIN_A;
+ if v.b.mantissa(54) = '0' and v.a.mantissa(54) = '1' then
+ v.opsel_a := AIN_B;
+ end if;
v.state := DO_FDIV;
when "10100" | "10101" =>
+ v.opsel_a := AIN_A;
v.state := DO_FADD;
+ when "10110" =>
+ v.is_sqrt := '1';
+ v.opsel_a := AIN_B;
+ v.state := DO_FSQRT;
+ when "10111" =>
+ v.state := DO_FSEL;
+ when "11000" =>
+ v.opsel_a := AIN_B;
+ v.state := DO_FRE;
when "11001" =>
v.is_multiply := '1';
+ v.opsel_a := AIN_A;
+ if v.c.mantissa(54) = '0' and v.a.mantissa(54) = '1' then
+ v.opsel_a := AIN_C;
+ end if;
v.state := DO_FMUL;
+ when "11010" =>
+ v.is_sqrt := '1';
+ v.opsel_a := AIN_B;
+ v.state := DO_FRSQRTE;
+ when "11100" | "11101" | "11110" | "11111" =>
+ if v.a.mantissa(54) = '0' then
+ v.opsel_a := AIN_A;
+ elsif v.c.mantissa(54) = '0' then
+ v.opsel_a := AIN_C;
+ else
+ v.opsel_a := AIN_B;
+ end if;
+ v.state := DO_FMADD;
when others =>
illegal := '1';
end case;
end if;
v.x := '0';
v.old_exc := r.fpscr(FPSCR_VX downto FPSCR_XX);
+ set_s := '1';
when DO_MCRFS =>
j := to_integer(unsigned(insn_bfa(r.insn)));
v.instr_done := '1';
v.state := IDLE;
+ when DO_FTDIV =>
+ v.instr_done := '1';
+ v.state := IDLE;
+ v.cr_result := "0000";
+ if r.a.class = INFINITY or r.b.class = ZERO or r.b.class = INFINITY or
+ (r.b.class = FINITE and r.b.mantissa(53) = '0') then
+ v.cr_result(2) := '1';
+ end if;
+ if r.a.class = NAN or r.a.class = INFINITY or
+ r.b.class = NAN or r.b.class = ZERO or r.b.class = INFINITY or
+ (r.a.class = FINITE and r.a.exponent <= to_signed(-970, EXP_BITS)) then
+ v.cr_result(1) := '1';
+ else
+ v.doing_ftdiv := "11";
+ v.first := '1';
+ v.state := FTDIV_1;
+ v.instr_done := '0';
+ end if;
+
+ when DO_FTSQRT =>
+ v.instr_done := '1';
+ v.state := IDLE;
+ v.cr_result := "0000";
+ if r.b.class = ZERO or r.b.class = INFINITY or
+ (r.b.class = FINITE and r.b.mantissa(53) = '0') then
+ v.cr_result(2) := '1';
+ end if;
+ if r.b.class = NAN or r.b.class = INFINITY or r.b.class = ZERO
+ or r.b.negative = '1' or r.b.exponent <= to_signed(-970, EXP_BITS) then
+ v.cr_result(1) := '0';
+ end if;
+
+ when DO_FCMP =>
+ -- fcmp[uo]
+ -- r.opsel_a = AIN_B
+ v.instr_done := '1';
+ v.state := IDLE;
+ update_fx := '1';
+ v.result_exp := r.b.exponent;
+ if (r.a.class = NAN and r.a.mantissa(53) = '0') or
+ (r.b.class = NAN and r.b.mantissa(53) = '0') then
+ -- Signalling NAN
+ v.fpscr(FPSCR_VXSNAN) := '1';
+ if r.insn(6) = '1' and r.fpscr(FPSCR_VE) = '0' then
+ v.fpscr(FPSCR_VXVC) := '1';
+ end if;
+ invalid := '1';
+ v.cr_result := "0001"; -- unordered
+ elsif r.a.class = NAN or r.b.class = NAN then
+ if r.insn(6) = '1' then
+ -- fcmpo
+ v.fpscr(FPSCR_VXVC) := '1';
+ invalid := '1';
+ end if;
+ v.cr_result := "0001"; -- unordered
+ elsif r.a.class = ZERO and r.b.class = ZERO then
+ v.cr_result := "0010"; -- equal
+ elsif r.a.negative /= r.b.negative then
+ v.cr_result := r.a.negative & r.b.negative & "00";
+ elsif r.a.class = ZERO then
+ -- A and B are the same sign from here down
+ v.cr_result := not r.b.negative & r.b.negative & "00";
+ elsif r.a.class = INFINITY then
+ if r.b.class = INFINITY then
+ v.cr_result := "0010";
+ else
+ v.cr_result := r.a.negative & not r.a.negative & "00";
+ end if;
+ elsif r.b.class = ZERO then
+ -- A is finite from here down
+ v.cr_result := r.a.negative & not r.a.negative & "00";
+ elsif r.b.class = INFINITY then
+ v.cr_result := not r.b.negative & r.b.negative & "00";
+ elsif r.exp_cmp = '1' then
+ -- A and B are both finite from here down
+ v.cr_result := r.a.negative & not r.a.negative & "00";
+ elsif r.a.exponent /= r.b.exponent then
+ -- A exponent is smaller than B
+ v.cr_result := not r.a.negative & r.a.negative & "00";
+ else
+ -- Prepare to subtract mantissas, put B in R
+ v.cr_result := "0000";
+ v.instr_done := '0';
+ v.opsel_a := AIN_A;
+ v.state := CMP_1;
+ end if;
+ v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
+
when DO_MTFSB =>
-- mtfsb{0,1}
j := to_integer(unsigned(insn_bt(r.insn)));
v.state := IDLE;
when DO_FMR =>
- opsel_a <= AIN_B;
+ -- r.opsel_a = AIN_B
v.result_class := r.b.class;
v.result_exp := r.b.exponent;
v.quieten_nan := '0';
v.state := IDLE;
when DO_FRI => -- fri[nzpm]
- opsel_a <= AIN_B;
+ -- r.opsel_a = AIN_B
v.result_class := r.b.class;
v.result_sign := r.b.negative;
v.result_exp := r.b.exponent;
end if;
when DO_FRSP =>
- opsel_a <= AIN_B;
+ -- r.opsel_a = AIN_B, r.shift = 0
v.result_class := r.b.class;
v.result_sign := r.b.negative;
v.result_exp := r.b.exponent;
elsif r.b.exponent > to_signed(127, EXP_BITS) then
v.state := ROUND_OFLOW;
else
- v.shift := to_signed(-2, EXP_BITS);
v.state := ROUNDING;
end if;
else
-- instr bit 9: 1=dword 0=word
-- instr bit 8: 1=unsigned 0=signed
-- instr bit 1: 1=round to zero 0=use fpscr[RN]
- opsel_a <= AIN_B;
+ -- r.opsel_a = AIN_B
v.result_class := r.b.class;
v.result_sign := r.b.negative;
v.result_exp := r.b.exponent;
end case;
when DO_FCFID =>
+ -- r.opsel_a = AIN_B
v.result_sign := '0';
- opsel_a <= AIN_B;
if r.insn(8) = '0' and r.b.negative = '1' then
-- fcfid[s] with negative operand, set R = -B
opsel_ainv <= '1';
when DO_FADD =>
-- fadd[s] and fsub[s]
- opsel_a <= AIN_A;
+ -- r.opsel_a = AIN_A
v.result_sign := r.a.negative;
v.result_class := r.a.class;
v.result_exp := r.a.exponent;
v.fpscr(FPSCR_FR) := '0';
v.fpscr(FPSCR_FI) := '0';
+ v.use_a := '1';
+ v.use_b := '1';
is_add := r.a.negative xor r.b.negative xor r.insn(1);
if r.a.class = FINITE and r.b.class = FINITE then
v.is_subtract := not is_add;
v.add_bsmall := r.exp_cmp;
+ v.opsel_a := AIN_B;
if r.exp_cmp = '0' then
v.shift := r.a.exponent - r.b.exponent;
v.result_sign := r.b.negative xnor r.insn(1);
if r.a.exponent = r.b.exponent then
v.state := ADD_2;
else
+ v.longmask := '0';
v.state := ADD_SHIFT;
end if;
else
- opsel_a <= AIN_B;
- v.shift := r.b.exponent - r.a.exponent;
- v.result_exp := r.b.exponent;
- v.state := ADD_SHIFT;
+ v.state := ADD_1;
end if;
else
- if (r.a.class = NAN and r.a.mantissa(53) = '0') or
- (r.b.class = NAN and r.b.mantissa(53) = '0') then
- -- Signalling NAN
- v.fpscr(FPSCR_VXSNAN) := '1';
- invalid := '1';
- end if;
- if r.a.class = NAN then
- -- nothing to do, result is A
- elsif r.b.class = NAN then
- v.result_class := NAN;
- v.result_sign := r.b.negative;
- opsel_a <= AIN_B;
+ if r.a.class = NAN or r.b.class = NAN then
+ v.state := NAN_RESULT;
elsif r.a.class = INFINITY and r.b.class = INFINITY and is_add = '0' then
-- invalid operation, construct QNaN
v.fpscr(FPSCR_VXISI) := '1';
qnan_result := '1';
+ arith_done := '1';
elsif r.a.class = ZERO and r.b.class = ZERO and is_add = '0' then
-- return -0 for rounding to -infinity
v.result_sign := r.round_mode(1) and r.round_mode(0);
+ arith_done := '1';
elsif r.a.class = INFINITY or r.b.class = ZERO then
- -- nothing to do, result is A
+ -- result is A
+ v.opsel_a := AIN_A;
+ v.state := EXC_RESULT;
else
-- result is +/- B
- v.result_sign := r.b.negative xnor r.insn(1);
- v.result_class := r.b.class;
- v.result_exp := r.b.exponent;
- opsel_a <= AIN_B;
+ v.opsel_a := AIN_B;
+ v.negate := not r.insn(1);
+ v.state := EXC_RESULT;
end if;
- arith_done := '1';
end if;
when DO_FMUL =>
-- fmul[s]
- opsel_a <= AIN_A;
- v.result_sign := r.a.negative;
+ -- r.opsel_a = AIN_A unless C is denorm and A isn't
+ v.result_sign := r.a.negative xor r.c.negative;
v.result_class := r.a.class;
- v.result_exp := r.a.exponent;
v.fpscr(FPSCR_FR) := '0';
v.fpscr(FPSCR_FI) := '0';
+ v.use_a := '1';
+ v.use_c := '1';
if r.a.class = FINITE and r.c.class = FINITE then
- v.result_sign := r.a.negative xor r.c.negative;
v.result_exp := r.a.exponent + r.c.exponent;
-- Renormalize denorm operands
if r.a.mantissa(54) = '0' then
v.state := RENORM_A;
elsif r.c.mantissa(54) = '0' then
- opsel_a <= AIN_C;
v.state := RENORM_C;
else
f_to_multiply.valid <= '1';
v.state := MULT_1;
end if;
else
- if (r.a.class = NAN and r.a.mantissa(53) = '0') or
- (r.c.class = NAN and r.c.mantissa(53) = '0') then
- -- Signalling NAN
- v.fpscr(FPSCR_VXSNAN) := '1';
- invalid := '1';
- end if;
- if r.a.class = NAN then
- -- result is A
- elsif r.c.class = NAN then
- v.result_class := NAN;
- v.result_sign := r.c.negative;
- opsel_a <= AIN_C;
+ if r.a.class = NAN or r.c.class = NAN then
+ v.state := NAN_RESULT;
elsif (r.a.class = INFINITY and r.c.class = ZERO) or
(r.a.class = ZERO and r.c.class = INFINITY) then
-- invalid operation, construct QNaN
qnan_result := '1';
elsif r.a.class = ZERO or r.a.class = INFINITY then
-- result is +/- A
- v.result_sign := r.a.negative xor r.c.negative;
+ arith_done := '1';
else
-- r.c.class is ZERO or INFINITY
- v.result_class := r.c.class;
- v.result_sign := r.a.negative xor r.c.negative;
+ v.opsel_a := AIN_C;
+ v.negate := r.a.negative;
+ v.state := EXC_RESULT;
end if;
- arith_done := '1';
end if;
when DO_FDIV =>
- opsel_a <= AIN_A;
- v.result_sign := r.a.negative;
+ -- r.opsel_a = AIN_A unless B is denorm and A isn't
v.result_class := r.a.class;
- v.result_exp := r.a.exponent;
v.fpscr(FPSCR_FR) := '0';
v.fpscr(FPSCR_FI) := '0';
+ v.use_a := '1';
+ v.use_b := '1';
v.result_sign := r.a.negative xor r.b.negative;
v.result_exp := r.a.exponent - r.b.exponent;
v.count := "00";
if r.a.mantissa(54) = '0' then
v.state := RENORM_A;
elsif r.b.mantissa(54) = '0' then
- opsel_a <= AIN_B;
v.state := RENORM_B;
else
v.first := '1';
v.state := DIV_2;
end if;
else
- if (r.a.class = NAN and r.a.mantissa(53) = '0') or
- (r.b.class = NAN and r.b.mantissa(53) = '0') then
- -- Signalling NAN
- v.fpscr(FPSCR_VXSNAN) := '1';
- invalid := '1';
- end if;
- if r.a.class = NAN then
- -- result is A
- v.result_sign := r.a.negative;
- elsif r.b.class = NAN then
- v.result_class := NAN;
- v.result_sign := r.b.negative;
- opsel_a <= AIN_B;
+ if r.a.class = NAN or r.b.class = NAN then
+ v.state := NAN_RESULT;
elsif r.b.class = INFINITY then
if r.a.class = INFINITY then
v.fpscr(FPSCR_VXIDI) := '1';
else
v.result_class := ZERO;
end if;
+ arith_done := '1';
elsif r.b.class = ZERO then
if r.a.class = ZERO then
v.fpscr(FPSCR_VXZDZ) := '1';
end if;
v.result_class := INFINITY;
end if;
- -- else r.b.class = FINITE, result_class = r.a.class
+ arith_done := '1';
+ else -- r.b.class = FINITE, result_class = r.a.class
+ arith_done := '1';
+ end if;
+ end if;
+
+ when DO_FSEL =>
+ v.fpscr(FPSCR_FR) := '0';
+ v.fpscr(FPSCR_FI) := '0';
+ if r.a.class = ZERO or (r.a.negative = '0' and r.a.class /= NAN) then
+ v.opsel_a := AIN_C;
+ else
+ v.opsel_a := AIN_B;
+ end if;
+ v.quieten_nan := '0';
+ v.state := EXC_RESULT;
+
+ when DO_FSQRT =>
+ -- r.opsel_a = AIN_B
+ v.result_class := r.b.class;
+ v.result_sign := r.b.negative;
+ v.fpscr(FPSCR_FR) := '0';
+ v.fpscr(FPSCR_FI) := '0';
+ v.use_b := '1';
+ case r.b.class is
+ when FINITE =>
+ v.result_exp := r.b.exponent;
+ if r.b.negative = '1' then
+ v.fpscr(FPSCR_VXSQRT) := '1';
+ qnan_result := '1';
+ elsif r.b.mantissa(54) = '0' then
+ v.state := RENORM_B;
+ elsif r.b.exponent(0) = '0' then
+ v.state := SQRT_1;
+ else
+ v.shift := to_signed(1, EXP_BITS);
+ v.state := RENORM_B2;
+ end if;
+ when NAN =>
+ v.state := NAN_RESULT;
+ when ZERO =>
+ -- result is B
+ arith_done := '1';
+ when INFINITY =>
+ if r.b.negative = '1' then
+ v.fpscr(FPSCR_VXSQRT) := '1';
+ qnan_result := '1';
+ -- else result is B
+ end if;
+ arith_done := '1';
+ end case;
+
+ when DO_FRE =>
+ -- r.opsel_a = AIN_B
+ v.result_class := r.b.class;
+ v.result_sign := r.b.negative;
+ v.fpscr(FPSCR_FR) := '0';
+ v.fpscr(FPSCR_FI) := '0';
+ v.use_b := '1';
+ case r.b.class is
+ when FINITE =>
+ v.result_exp := - r.b.exponent;
+ if r.b.mantissa(54) = '0' then
+ v.state := RENORM_B;
+ else
+ v.state := FRE_1;
+ end if;
+ when NAN =>
+ v.state := NAN_RESULT;
+ when INFINITY =>
+ v.result_class := ZERO;
+ arith_done := '1';
+ when ZERO =>
+ v.result_class := INFINITY;
+ zero_divide := '1';
+ arith_done := '1';
+ end case;
+
+ when DO_FRSQRTE =>
+ -- r.opsel_a = AIN_B
+ v.result_class := r.b.class;
+ v.result_sign := r.b.negative;
+ v.fpscr(FPSCR_FR) := '0';
+ v.fpscr(FPSCR_FI) := '0';
+ v.use_b := '1';
+ v.shift := to_signed(1, EXP_BITS);
+ case r.b.class is
+ when FINITE =>
+ v.result_exp := r.b.exponent;
+ if r.b.negative = '1' then
+ v.fpscr(FPSCR_VXSQRT) := '1';
+ qnan_result := '1';
+ elsif r.b.mantissa(54) = '0' then
+ v.state := RENORM_B;
+ elsif r.b.exponent(0) = '0' then
+ v.state := RSQRT_1;
+ else
+ v.state := RENORM_B2;
+ end if;
+ when NAN =>
+ v.state := NAN_RESULT;
+ when INFINITY =>
+ if r.b.negative = '1' then
+ v.fpscr(FPSCR_VXSQRT) := '1';
+ qnan_result := '1';
+ else
+ v.result_class := ZERO;
+ end if;
+ arith_done := '1';
+ when ZERO =>
+ v.result_class := INFINITY;
+ zero_divide := '1';
+ arith_done := '1';
+ end case;
+
+ when DO_FMADD =>
+ -- fmadd, fmsub, fnmadd, fnmsub
+ -- r.opsel_a = AIN_A if A is denorm, else AIN_C if C is denorm,
+ -- else AIN_B
+ v.result_sign := r.a.negative;
+ v.result_class := r.a.class;
+ v.result_exp := r.a.exponent;
+ v.fpscr(FPSCR_FR) := '0';
+ v.fpscr(FPSCR_FI) := '0';
+ v.use_a := '1';
+ v.use_b := '1';
+ v.use_c := '1';
+ is_add := r.a.negative xor r.c.negative xor r.b.negative xor r.insn(1);
+ if r.a.class = FINITE and r.c.class = FINITE and
+ (r.b.class = FINITE or r.b.class = ZERO) then
+ v.is_subtract := not is_add;
+ mulexp := r.a.exponent + r.c.exponent;
+ v.result_exp := mulexp;
+ -- Make sure A and C are normalized
+ if r.a.mantissa(54) = '0' then
+ v.state := RENORM_A;
+ elsif r.c.mantissa(54) = '0' then
+ v.state := RENORM_C;
+ elsif r.b.class = ZERO then
+ -- no addend, degenerates to multiply
+ v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
+ f_to_multiply.valid <= '1';
+ v.is_multiply := '1';
+ v.state := MULT_1;
+ elsif r.madd_cmp = '0' then
+ -- addend is bigger, do multiply first
+ v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
+ f_to_multiply.valid <= '1';
+ v.state := FMADD_1;
+ else
+ -- product is bigger, shift B right and use it as the
+ -- addend to the multiplier
+ v.shift := r.b.exponent - mulexp + to_signed(64, EXP_BITS);
+ -- for subtract, multiplier does B - A * C
+ v.result_sign := not (r.a.negative xor r.c.negative xor r.insn(2) xor is_add);
+ v.result_exp := r.b.exponent;
+ v.state := FMADD_2;
+ end if;
+ else
+ if r.a.class = NAN or r.b.class = NAN or r.c.class = NAN then
+ v.state := NAN_RESULT;
+ elsif (r.a.class = ZERO and r.c.class = INFINITY) or
+ (r.a.class = INFINITY and r.c.class = ZERO) then
+ -- invalid operation, construct QNaN
+ v.fpscr(FPSCR_VXIMZ) := '1';
+ qnan_result := '1';
+ elsif r.a.class = INFINITY or r.c.class = INFINITY then
+ if r.b.class = INFINITY and is_add = '0' then
+ -- invalid operation, construct QNaN
+ v.fpscr(FPSCR_VXISI) := '1';
+ qnan_result := '1';
+ else
+ -- result is infinity
+ v.result_class := INFINITY;
+ v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
+ arith_done := '1';
+ end if;
+ else
+ -- Here A is zero, C is zero, or B is infinity
+ -- Result is +/-B in all of those cases
+ v.opsel_a := AIN_B;
+ if r.b.class /= ZERO or is_add = '1' then
+ v.negate := not (r.insn(1) xor r.insn(2));
+ else
+ -- have to be careful about rule for 0 - 0 result sign
+ v.negate := r.b.negative xor (r.round_mode(1) and r.round_mode(0)) xor r.insn(2);
+ end if;
+ v.state := EXC_RESULT;
end if;
- arith_done := '1';
end if;
when RENORM_A =>
renormalize := '1';
v.state := RENORM_A2;
+ if r.insn(4) = '1' then
+ v.opsel_a := AIN_C;
+ else
+ v.opsel_a := AIN_B;
+ end if;
when RENORM_A2 =>
+ -- r.opsel_a = AIN_C for fmul/fmadd, AIN_B for fdiv
set_a := '1';
v.result_exp := new_exp;
if r.insn(4) = '1' then
- opsel_a <= AIN_C;
if r.c.mantissa(54) = '1' then
- v.first := '1';
- v.state := MULT_1;
+ if r.insn(3) = '0' or r.b.class = ZERO then
+ v.first := '1';
+ v.state := MULT_1;
+ else
+ v.madd_cmp := '0';
+ if new_exp + 1 >= r.b.exponent then
+ v.madd_cmp := '1';
+ end if;
+ v.opsel_a := AIN_B;
+ v.state := DO_FMADD;
+ end if;
else
v.state := RENORM_C;
end if;
else
- opsel_a <= AIN_B;
- if r.b.mantissa(54) = '1' then
- v.first := '1';
- v.state := DIV_2;
- else
- v.state := RENORM_B;
+ if r.b.mantissa(54) = '1' then
+ v.first := '1';
+ v.state := DIV_2;
+ else
+ v.state := RENORM_B;
end if;
end if;
when RENORM_B =>
renormalize := '1';
+ renorm_sqrt := r.is_sqrt;
v.state := RENORM_B2;
when RENORM_B2 =>
set_b := '1';
- v.result_exp := r.result_exp + r.shift;
+ if r.is_sqrt = '0' then
+ v.result_exp := r.result_exp + r.shift;
+ else
+ v.result_exp := new_exp;
+ end if;
+ v.opsel_a := AIN_B;
v.state := LOOKUP;
when RENORM_C =>
when RENORM_C2 =>
set_c := '1';
v.result_exp := new_exp;
- v.first := '1';
- v.state := MULT_1;
+ if r.insn(3) = '0' or r.b.class = ZERO then
+ v.first := '1';
+ v.state := MULT_1;
+ else
+ v.madd_cmp := '0';
+ if new_exp + 1 >= r.b.exponent then
+ v.madd_cmp := '1';
+ end if;
+ v.opsel_a := AIN_B;
+ v.state := DO_FMADD;
+ end if;
+
+ when ADD_1 =>
+ -- transferring B to R
+ v.shift := r.b.exponent - r.a.exponent;
+ v.result_exp := r.b.exponent;
+ v.longmask := '0';
+ v.state := ADD_SHIFT;
when ADD_SHIFT =>
+ -- r.shift = - exponent difference, r.longmask = 0
opsel_r <= RES_SHIFT;
+ v.x := s_nz;
set_x := '1';
- longmask := '0';
- v.state := ADD_2;
-
- when ADD_2 =>
+ v.longmask := r.single_prec;
if r.add_bsmall = '1' then
- opsel_a <= AIN_A;
+ v.opsel_a := AIN_A;
else
- opsel_a <= AIN_B;
+ v.opsel_a := AIN_B;
end if;
+ v.state := ADD_2;
+
+ when ADD_2 =>
+ -- r.opsel_a = AIN_A if r.add_bsmall = 1 else AIN_B
opsel_b <= BIN_R;
opsel_binv <= r.is_subtract;
carry_in <= r.is_subtract and not r.x;
when ADD_3 =>
-- check for overflow or negative result (can't get both)
+ -- r.shift = -1
if r.r(63) = '1' then
-- result is opposite sign to expected
v.result_sign := not r.result_sign;
-- sum overflowed, shift right
opsel_r <= RES_SHIFT;
set_x := '1';
- v.shift := to_signed(-2, EXP_BITS);
if exp_huge = '1' then
v.state := ROUND_OFLOW;
else
end if;
elsif r.r(54) = '1' then
set_x := '1';
- v.shift := to_signed(-2, EXP_BITS);
v.state := ROUNDING;
elsif (r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
-- r.x must be zero at this point
v.state := NORMALIZE;
end if;
+ when CMP_1 =>
+ -- r.opsel_a = AIN_A
+ opsel_b <= BIN_R;
+ opsel_binv <= '1';
+ carry_in <= '1';
+ v.state := CMP_2;
+
+ when CMP_2 =>
+ if r.r(63) = '1' then
+ -- A is smaller in magnitude
+ v.cr_result := not r.a.negative & r.a.negative & "00";
+ elsif (r_hi_nz or r_lo_nz) = '0' then
+ v.cr_result := "0010";
+ else
+ v.cr_result := r.a.negative & not r.a.negative & "00";
+ end if;
+ v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
+ v.instr_done := '1';
+ v.state := IDLE;
+
when MULT_1 =>
f_to_multiply.valid <= r.first;
opsel_r <= RES_MULT;
v.state := FINISH;
end if;
+ when FMADD_1 =>
+ -- Addend is bigger here
+ v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
+ -- note v.shift is at most -2 here
+ v.shift := r.result_exp - r.b.exponent;
+ opsel_r <= RES_MULT;
+ opsel_s <= S_MULT;
+ set_s := '1';
+ f_to_multiply.valid <= r.first;
+ if multiply_to_f.valid = '1' then
+ v.longmask := '0';
+ v.state := ADD_SHIFT;
+ end if;
+
+ when FMADD_2 =>
+ -- Product is potentially bigger here
+ -- r.shift = addend exp - product exp + 64, r.r = r.b.mantissa
+ set_s := '1';
+ opsel_s <= S_SHIFT;
+ v.shift := r.shift - to_signed(64, EXP_BITS);
+ v.state := FMADD_3;
+
+ when FMADD_3 =>
+ -- r.shift = addend exp - product exp
+ opsel_r <= RES_SHIFT;
+ v.first := '1';
+ v.state := FMADD_4;
+
+ when FMADD_4 =>
+ msel_add <= MULADD_RS;
+ f_to_multiply.valid <= r.first;
+ msel_inv <= r.is_subtract;
+ opsel_r <= RES_MULT;
+ opsel_s <= S_MULT;
+ set_s := '1';
+ if multiply_to_f.valid = '1' then
+ v.state := FMADD_5;
+ end if;
+
+ when FMADD_5 =>
+ -- negate R:S:X if negative
+ if r.r(63) = '1' then
+ v.result_sign := not r.result_sign;
+ opsel_ainv <= '1';
+ carry_in <= not (s_nz or r.x);
+ opsel_s <= S_NEG;
+ set_s := '1';
+ end if;
+ v.shift := to_signed(56, EXP_BITS);
+ v.state := FMADD_6;
+
+ when FMADD_6 =>
+ -- r.shift = 56 (or 0, but only if r is now nonzero)
+ if (r.r(56) or r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
+ if s_nz = '0' then
+ -- must be a subtraction, and r.x must be zero
+ v.result_class := ZERO;
+ v.result_sign := r.round_mode(1) and r.round_mode(0);
+ arith_done := '1';
+ else
+ -- R is all zeroes but there are non-zero bits in S
+ -- so shift them into R and set S to 0
+ opsel_r <= RES_SHIFT;
+ set_s := '1';
+ -- stay in state FMADD_6
+ end if;
+ elsif r.r(56 downto 54) = "001" then
+ v.state := FINISH;
+ else
+ renormalize := '1';
+ v.state := NORMALIZE;
+ end if;
+
when LOOKUP =>
- opsel_a <= AIN_B;
+ -- r.opsel_a = AIN_B
-- wait one cycle for inverse_table[B] lookup
v.first := '1';
- v.state := DIV_2;
+ if r.insn(4) = '0' then
+ if r.insn(3) = '0' then
+ v.state := DIV_2;
+ else
+ v.state := SQRT_1;
+ end if;
+ elsif r.insn(2) = '0' then
+ v.state := FRE_1;
+ else
+ v.state := RSQRT_1;
+ end if;
when DIV_2 =>
-- compute Y = inverse_table[B] (when count=0); P = 2 - B * Y
end if;
v.state := FINISH;
+ when FRE_1 =>
+ opsel_r <= RES_MISC;
+ misc_sel <= "0111";
+ v.shift := to_signed(1, EXP_BITS);
+ v.state := NORMALIZE;
+
+ when FTDIV_1 =>
+ v.cr_result(1) := exp_tiny or exp_huge;
+ if exp_tiny = '1' or exp_huge = '1' or r.a.class = ZERO or r.first = '0' then
+ v.instr_done := '1';
+ v.state := IDLE;
+ else
+ v.shift := r.a.exponent;
+ v.doing_ftdiv := "10";
+ end if;
+
+ when RSQRT_1 =>
+ opsel_r <= RES_MISC;
+ misc_sel <= "0111";
+ sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
+ v.result_exp := - sqrt_exp;
+ v.shift := to_signed(1, EXP_BITS);
+ v.state := NORMALIZE;
+
+ when SQRT_1 =>
+ -- put invsqr[B] in R and compute P = invsqr[B] * B
+ -- also transfer B (in R) to A
+ set_a := '1';
+ opsel_r <= RES_MISC;
+ misc_sel <= "0111";
+ msel_1 <= MUL1_B;
+ msel_2 <= MUL2_LUT;
+ f_to_multiply.valid <= '1';
+ v.shift := to_signed(-1, EXP_BITS);
+ v.count := "00";
+ v.state := SQRT_2;
+
+ when SQRT_2 =>
+ -- shift R right one place
+ -- not expecting multiplier result yet
+ -- r.shift = -1
+ opsel_r <= RES_SHIFT;
+ v.first := '1';
+ v.state := SQRT_3;
+
+ when SQRT_3 =>
+ -- put R into Y, wait for product from multiplier
+ msel_2 <= MUL2_R;
+ set_y := r.first;
+ pshift := '1';
+ if multiply_to_f.valid = '1' then
+ -- put result into R
+ opsel_r <= RES_MULT;
+ v.first := '1';
+ v.state := SQRT_4;
+ end if;
+
+ when SQRT_4 =>
+ -- compute 1.5 - Y * P
+ msel_1 <= MUL1_Y;
+ msel_2 <= MUL2_P;
+ msel_add <= MULADD_CONST;
+ msel_inv <= '1';
+ f_to_multiply.valid <= r.first;
+ pshift := '1';
+ if multiply_to_f.valid = '1' then
+ v.state := SQRT_5;
+ end if;
+
+ when SQRT_5 =>
+ -- compute Y = Y * P
+ msel_1 <= MUL1_Y;
+ msel_2 <= MUL2_P;
+ f_to_multiply.valid <= '1';
+ v.first := '1';
+ v.state := SQRT_6;
+
+ when SQRT_6 =>
+ -- pipeline in R = R * P
+ msel_1 <= MUL1_R;
+ msel_2 <= MUL2_P;
+ f_to_multiply.valid <= r.first;
+ pshift := '1';
+ if multiply_to_f.valid = '1' then
+ v.first := '1';
+ v.state := SQRT_7;
+ end if;
+
+ when SQRT_7 =>
+ -- first multiply is done, put result in Y
+ msel_2 <= MUL2_P;
+ set_y := r.first;
+ -- wait for second multiply (should be here already)
+ pshift := '1';
+ if multiply_to_f.valid = '1' then
+ -- put result into R
+ opsel_r <= RES_MULT;
+ v.first := '1';
+ v.count := r.count + 1;
+ if r.count < 2 then
+ v.state := SQRT_4;
+ else
+ v.first := '1';
+ v.state := SQRT_8;
+ end if;
+ end if;
+
+ when SQRT_8 =>
+ -- compute P = A - R * R, which can be +ve or -ve
+ -- we arranged for B to be put into A earlier
+ msel_1 <= MUL1_R;
+ msel_2 <= MUL2_R;
+ msel_add <= MULADD_A;
+ msel_inv <= '1';
+ pshift := '1';
+ f_to_multiply.valid <= r.first;
+ if multiply_to_f.valid = '1' then
+ v.first := '1';
+ v.state := SQRT_9;
+ end if;
+
+ when SQRT_9 =>
+ -- compute P = P * Y
+ -- since Y is an estimate of 1/sqrt(B), this makes P an
+ -- estimate of the adjustment needed to R. Since the error
+ -- could be negative and we have an unsigned multiplier, the
+ -- upper bits can be wrong, but it turns out the lowest 8 bits
+ -- are correct and are all we need (given 3 iterations through
+ -- SQRT_4 to SQRT_7).
+ msel_1 <= MUL1_Y;
+ msel_2 <= MUL2_P;
+ pshift := '1';
+ f_to_multiply.valid <= r.first;
+ if multiply_to_f.valid = '1' then
+ v.state := SQRT_10;
+ end if;
+
+ when SQRT_10 =>
+ -- Add the bottom 8 bits of P, sign-extended,
+ -- divided by 4, onto R.
+ -- The division by 4 is because R is 10.54 format
+ -- whereas P is 8.56 format.
+ opsel_b <= BIN_PS6;
+ sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
+ v.result_exp := sqrt_exp;
+ v.shift := to_signed(1, EXP_BITS);
+ v.first := '1';
+ v.state := SQRT_11;
+
+ when SQRT_11 =>
+ -- compute P = A - R * R (remainder)
+ -- also put 2 * R + 1 into B for comparison with P
+ msel_1 <= MUL1_R;
+ msel_2 <= MUL2_R;
+ msel_add <= MULADD_A;
+ msel_inv <= '1';
+ f_to_multiply.valid <= r.first;
+ shiftin := '1';
+ set_b := r.first;
+ if multiply_to_f.valid = '1' then
+ v.state := SQRT_12;
+ end if;
+
+ when SQRT_12 =>
+ -- test if remainder is 0 or >= B = 2*R + 1
+ if pcmpb_lt = '1' then
+ -- square root is correct, set X if remainder non-zero
+ v.x := r.p(58) or px_nz;
+ else
+ -- square root needs to be incremented by 1
+ carry_in <= '1';
+ v.x := not pcmpb_eq;
+ end if;
+ v.state := FINISH;
+
when INT_SHIFT =>
+ -- r.shift = b.exponent - 52
opsel_r <= RES_SHIFT;
set_x := '1';
v.state := INT_ROUND;
v.shift := to_signed(-2, EXP_BITS);
when INT_ROUND =>
+ -- r.shift = -2
opsel_r <= RES_SHIFT;
round := fp_rounding(r.r, r.x, '0', r.round_mode, r.result_sign);
v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
end if;
when INT_ISHIFT =>
+ -- r.shift = b.exponent - 54;
opsel_r <= RES_SHIFT;
v.state := INT_FINAL;
arith_done := '1';
when FRI_1 =>
+ -- r.shift = b.exponent - 52
opsel_r <= RES_SHIFT;
set_x := '1';
- v.shift := to_signed(-2, EXP_BITS);
v.state := ROUNDING;
when FINISH =>
elsif exp_huge = '1' then
v.state := ROUND_OFLOW;
else
- v.shift := to_signed(-2, EXP_BITS);
v.state := ROUNDING;
end if;
end if;
when NORMALIZE =>
-- Shift so we have 9 leading zeroes (we know R is non-zero)
+ -- r.shift = clz(r.r) - 9
opsel_r <= RES_SHIFT;
set_x := '1';
if exp_tiny = '1' then
elsif exp_huge = '1' then
v.state := ROUND_OFLOW;
else
- v.shift := to_signed(-2, EXP_BITS);
v.state := ROUNDING;
end if;
when ROUND_UFLOW =>
+ -- r.shift = - amount by which exponent underflows
v.tiny := '1';
if r.fpscr(FPSCR_UE) = '0' then
-- disabled underflow exception case
-- have to denormalize before rounding
opsel_r <= RES_SHIFT;
set_x := '1';
- v.shift := to_signed(-2, EXP_BITS);
v.state := ROUNDING;
else
-- enabled underflow exception case
renormalize := '1';
v.state := NORMALIZE;
else
- v.shift := to_signed(-2, EXP_BITS);
v.state := ROUNDING;
end if;
end if;
else
-- enabled overflow exception
v.result_exp := r.result_exp - bias_exp;
- v.shift := to_signed(-2, EXP_BITS);
v.state := ROUNDING;
end if;
when ROUNDING =>
- opsel_amask <= '1';
+ opsel_mask <= '1';
round := fp_rounding(r.r, r.x, r.single_prec, r.round_mode, r.result_sign);
v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
if round(1) = '1' then
- -- set mask to increment the LSB for the precision
- opsel_b <= BIN_MASK;
- carry_in <= '1';
+ -- increment the LSB for the precision
+ opsel_b <= BIN_RND;
v.shift := to_signed(-1, EXP_BITS);
v.state := ROUNDING_2;
else
when ROUNDING_2 =>
-- Check for overflow during rounding
+ -- r.shift = -1
v.x := '0';
if r.r(55) = '1' then
opsel_r <= RES_SHIFT;
end if;
when ROUNDING_3 =>
+ -- r.shift = clz(r.r) - 9
mant_nz := r_hi_nz or (r_lo_nz and not r.single_prec);
if mant_nz = '0' then
v.result_class := ZERO;
end if;
when DENORM =>
+ -- r.shift = result_exp - -1022
opsel_r <= RES_SHIFT;
arith_done := '1';
+ when NAN_RESULT =>
+ if (r.use_a = '1' and r.a.class = NAN and r.a.mantissa(53) = '0') or
+ (r.use_b = '1' and r.b.class = NAN and r.b.mantissa(53) = '0') or
+ (r.use_c = '1' and r.c.class = NAN and r.c.mantissa(53) = '0') then
+ -- Signalling NAN
+ v.fpscr(FPSCR_VXSNAN) := '1';
+ invalid := '1';
+ end if;
+ if r.use_a = '1' and r.a.class = NAN then
+ v.opsel_a := AIN_A;
+ elsif r.use_b = '1' and r.b.class = NAN then
+ v.opsel_a := AIN_B;
+ elsif r.use_c = '1' and r.c.class = NAN then
+ v.opsel_a := AIN_C;
+ end if;
+ v.state := EXC_RESULT;
+
+ when EXC_RESULT =>
+ -- r.opsel_a = AIN_A, AIN_B or AIN_C according to which input is the result
+ case r.opsel_a is
+ when AIN_B =>
+ v.result_sign := r.b.negative xor r.negate;
+ v.result_exp := r.b.exponent;
+ v.result_class := r.b.class;
+ when AIN_C =>
+ v.result_sign := r.c.negative xor r.negate;
+ v.result_exp := r.c.exponent;
+ v.result_class := r.c.class;
+ when others =>
+ v.result_sign := r.a.negative xor r.negate;
+ v.result_exp := r.a.exponent;
+ v.result_class := r.a.class;
+ end case;
+ arith_done := '1';
+
end case;
if zero_divide = '1' then
v.result_sign := '0';
misc_sel <= "0001";
opsel_r <= RES_MISC;
+ arith_done := '1';
+ end if;
+ if invalid = '1' then
+ v.invalid := '1';
end if;
if arith_done = '1' then
-- Enabled invalid exception doesn't write result or FPRF
-- Neither does enabled zero-divide exception
- if (invalid and r.fpscr(FPSCR_VE)) = '0' and
+ if (v.invalid and r.fpscr(FPSCR_VE)) = '0' and
(zero_divide and r.fpscr(FPSCR_ZE)) = '0' then
v.writing_back := '1';
v.update_fprf := '1';
maddend := (others => '0');
case msel_add is
when MULADD_CONST =>
- -- addend is 2.0 in 16.112 format
- maddend(113) := '1'; -- 2.0
+ -- addend is 2.0 or 1.5 in 16.112 format
+ if r.is_sqrt = '0' then
+ maddend(113) := '1'; -- 2.0
+ else
+ maddend(112 downto 111) := "11"; -- 1.5
+ end if;
when MULADD_A =>
-- addend is A in 16.112 format
maddend(121 downto 58) := r.a.mantissa;
+ when MULADD_RS =>
+ -- addend is concatenation of R and S in 16.112 format
+ maddend := "000000" & r.r & r.s & "00";
when others =>
end case;
if msel_inv = '1' then
-- Data path.
-- This has A and B input multiplexers, an adder, a shifter,
-- count-leading-zeroes logic, and a result mux.
- if longmask = '1' then
+ if r.longmask = '1' then
mshift := r.shift + to_signed(-29, EXP_BITS);
else
mshift := r.shift;
else
mask := right_mask(unsigned(mshift(5 downto 0)));
end if;
- case opsel_a is
+ case r.opsel_a is
when AIN_R =>
in_a0 := r.r;
when AIN_A =>
if opsel_ainv = '1' then
in_a0 := not in_a0;
end if;
- if opsel_amask = '1' then
- in_a0 := in_a0 and not mask;
- end if;
in_a <= in_a0;
case opsel_b is
when BIN_ZERO =>
in_b0 := (others => '0');
when BIN_R =>
in_b0 := r.r;
- when BIN_MASK =>
- in_b0 := mask;
+ when BIN_RND =>
+ round_inc := (31 => r.single_prec, 2 => not r.single_prec, others => '0');
+ in_b0 := round_inc;
when others =>
- in_b0 := (others => '0');
+ -- BIN_PS6, 6 LSBs of P/4 sign-extended to 64
+ in_b0 := std_ulogic_vector(resize(signed(r.p(7 downto 2)), 64));
end case;
if opsel_binv = '1' then
in_b0 := not in_b0;
end if;
in_b <= in_b0;
if r.shift >= to_signed(-64, EXP_BITS) and r.shift <= to_signed(63, EXP_BITS) then
- shift_res := shifter_64(r.r & x"00000000000000",
+ shift_res := shifter_64(r.r & (shiftin or r.s(55)) & r.s(54 downto 0),
std_ulogic_vector(r.shift(6 downto 0)));
else
shift_res := (others => '0');
end if;
+ sum := std_ulogic_vector(unsigned(in_a) + unsigned(in_b) + carry_in);
+ if opsel_mask = '1' then
+ sum(1 downto 0) := "00";
+ if r.single_prec = '1' then
+ sum(30 downto 2) := (others => '0');
+ end if;
+ end if;
case opsel_r is
when RES_SUM =>
- result <= std_ulogic_vector(unsigned(in_a) + unsigned(in_b) + carry_in);
+ result <= sum;
when RES_SHIFT =>
result <= shift_res;
when RES_MULT =>
when "0110" =>
-- fmrgew result
misc := r.a.mantissa(63 downto 32) & r.b.mantissa(63 downto 32);
+ when "0111" =>
+ misc := 10x"000" & inverse_est & 35x"000000000";
when "1000" =>
-- max positive result for fctiw[z]
misc := x"000000007fffffff";
result <= misc;
end case;
v.r := result;
+ if set_s = '1' then
+ case opsel_s is
+ when S_NEG =>
+ v.s := std_ulogic_vector(unsigned(not r.s) + (not r.x));
+ when S_MULT =>
+ v.s := multiply_to_f.result(57 downto 2);
+ when S_SHIFT =>
+ v.s := shift_res(63 downto 8);
+ if shift_res(7 downto 0) /= x"00" then
+ v.x := '1';
+ end if;
+ when others =>
+ v.s := (others => '0');
+ end case;
+ end if;
if set_a = '1' then
v.a.exponent := new_exp;
if renormalize = '1' then
clz := count_left_zeroes(r.r);
+ if renorm_sqrt = '1' then
+ -- make denormalized value end up with even exponent
+ clz(0) := '1';
+ end if;
v.shift := resize(signed('0' & clz) - 9, EXP_BITS);
end if;