2 Authors : Vinod.G, Rishi Naidu, Aditya Govardhan
3 Email : g.vinod1993@gmail.com
4 Last Update : 27th November 2017
5 See LICENSE for more details
7 Implementation is based on a IEEE paper Titled:
8 "Implementation of Single Precision Floating Point Square Root on FPGAs"
14 `include "defined_parameters.bsv"
15 import defined_types::*;
18 import SpecialFIFOs::*;
21 Bit#(TMul#(TAdd#(fpman,3),2)) mantissa; //Holds the extended mantissa
22 Bit#(TAdd#(fpman,3)) result_mantissa; //Holds the Output mantissa
23 Bit#(TAdd#(fpexp,1)) exponent;
24 bit sign; //Final sign bit
25 Bit#(TAdd#(fpman,6)) remainder; //Remainder after eact iteration
26 Bit#(TAdd#(fpman,3)) root; //Root after each iteration
27 Bit#(3) rounding_mode;
28 }Stage_data#(numeric type fpman, numeric type fpexp) deriving(Bits,Eq); //Data structure of interstage FIFO and register
34 interface Ifc_fpu_sqrt#(numeric type fpinp, numeric type fpman, numeric type fpexp);
36 method Action _start(Bit#(1) sign, Bit#(fpman) lv_mantissa, Bit#(fpexp) lv_exponent, Bit#(3) rounding_mode, Bit#(5) condFlags);
39 // method Action deque_buffer();
40 method Maybe#(Floating_output#(fpinp)) get_result();
45 `ifdef fpu_hierarchical
46 interface Ifc_fpu_sqrt32;
48 method Action _start(Bit#(1) sign, Bit#(23) lv_mantissa, Bit#(8) lv_exponent, Bit#(3) rounding_mode, Bit#(5) condFlags);
51 // method Action deque_buffer();
52 method Maybe#(Floating_output#(32)) get_result();
56 interface Ifc_fpu_sqrt64;
58 method Action _start(Bit#(1) sign, Bit#(52) lv_mantissa, Bit#(11) lv_exponent, Bit#(3) rounding_mode, Bit#(5) condFlags);
60 // method Action deque_buffer();
61 method Maybe#(Floating_output#(64)) get_result();
69 module mkfpu_sqrt(Ifc_fpu_sqrt#(fpinp,fpman,fpexp))
71 Add#(TAdd#(fpman,fpexp),1,fpinp),
73 Add#(fpman3,2,fpman5),
75 Add#(fpman5,1,fpman6),
76 Mul#(fpman3,2,ext_fpman),
78 Log#(TAdd#(1,ext_fpman),ext_fplog),
83 Add#(d__, ext_fplog, fpexp1),
86 //Add#(a__, TAdd#(1, TAdd#(fpexp, TAdd#(1, TSub#(fpman, 1)))), 64),
87 //Add#(b__, ext_fplog, fpexp1),
89 //Add#(d__,TAdd#(fpman3,1),e__),
91 //Add#(1,f__,ext_fpman),
99 let fPMAN = valueOf(fpman);
100 let fPMAN3 = valueOf(fpman3);
101 let fPMAN5 = valueOf(fpman5);
102 let fPMAN6 = valueOf(fpman6);
103 let fPEXP = valueOf(fpexp);
104 let fPINP = valueOf(fpinp);
105 let eXT = valueOf(ext_fpman);
107 Reg#(Maybe#(Floating_output#(fpinp))) ff_final_out <- mkDReg(tagged Invalid); //Final Output FIFO
109 ConfigReg#(Stage_data#(fpman,fpexp)) rg_inter_stage <- mkConfigReg(?); //Inter Stage register
110 ConfigReg#(Bit#(6)) rg_state <-mkConfigReg(0); //State counter of the module
111 Wire#(Bool) wr_flush <- mkDWire(False);
112 (*mutually_exclusive = "rl_flush,rl_stage2,rl_inter_stage,rl_final_stage"*)
113 rule rl_flush(wr_flush);
116 //***********ITERATION :2********************//
117 rule rl_stage2 (rg_state==1 && !wr_flush);
118 let lv_remainder = rg_inter_stage.remainder; //Get remainder data from stage1
119 Bit#(fpman3) lv_root = rg_inter_stage.root; //Get root value from stage1
120 let mantissa = rg_inter_stage.mantissa; //Updated mantissa
121 let rounding_mode = rg_inter_stage.rounding_mode;
122 Bit#(fpman3) result_mantissa = rg_inter_stage.result_mantissa;//Get result value
123 Bit#(fpman6) lv_remainder_temp = {lv_remainder[fPMAN3:0],mantissa[eXT-1],mantissa[eXT-2]};
124 Bit#(fpman6) lv_root_temp_1 = {1'b0,lv_root[fPMAN3-1:0],1'b1,1'b1};
125 Bit#(fpman6) lv_root_temp_2 = {1'b0,lv_root[fPMAN3-1:0],1'b0,1'b1};
126 //Determining remainder
127 if (lv_remainder[fPMAN5]==1) begin //When r <0
128 lv_remainder = lv_remainder_temp + lv_root_temp_1;
131 lv_remainder = lv_remainder_temp - lv_root_temp_2;
134 //Determining quotient
135 if(lv_remainder[fPMAN5]==1'b1) begin //When r <0
136 lv_root = {lv_root[fPMAN3-2:0],1'b0};
139 lv_root = {lv_root[fPMAN3-2:0],1'b1};
142 result_mantissa[0]= lv_root[0]; //Storing the next bit in result_mantissa
143 mantissa = mantissa <<2; //Shifting mantissa to get next 2 bits
144 result_mantissa = result_mantissa <<1; //Shifting result_mantissa to make space to store the next bit
145 rg_state <= rg_state +1; //Incrementing state counter
147 `ifdef verbose $display("****************************************State = %d", rg_state); `endif
148 `ifdef verbose $display("Remainder =%h", lv_remainder);`endif
149 `ifdef verbose $display("Mantissa = %h",result_mantissa);`endif
151 //Storing the required values in register
153 rg_inter_stage <= Stage_data{mantissa : mantissa,
154 result_mantissa : result_mantissa,
156 remainder:lv_remainder,
157 sign : rg_inter_stage.sign,
158 exponent : rg_inter_stage.exponent,
159 rounding_mode : rounding_mode
163 //********************ITERATION : 3 TO 25**************
164 //RECURSIVE STAGE (saves hardware)
165 rule rl_inter_stage (rg_state>1 && rg_state < fromInteger(fPMAN3-1) && !wr_flush );
166 //Here register is used instead of FIFO as we have to read and write in the same cycle
168 let lv_remainder = rg_inter_stage.remainder; //Getting remainder
169 Bit#(fpman3) lv_root = rg_inter_stage.root; //Getting root value
170 let mantissa = rg_inter_stage.mantissa; //Getting updated mantissa value
171 let rounding_mode = rg_inter_stage.rounding_mode;
172 Bit#(fpman3) result_mantissa = rg_inter_stage.result_mantissa; //Getting the result bit of the square root
173 Bit#(fpman6) lv_remainder_temp = {lv_remainder[fPMAN3:0],mantissa[eXT-1],mantissa[eXT-2]};
174 Bit#(fpman6) lv_root_temp_1 = {1'b0,lv_root[fPMAN3-1:0],1'b1,1'b1};
175 Bit#(fpman6) lv_root_temp_2 = {1'b0,lv_root[fPMAN3-1:0],1'b0,1'b1};
177 //Determining the remainder
178 if (lv_remainder[fPMAN5]==1'b1) begin //When r <0
179 lv_remainder = lv_remainder_temp + lv_root_temp_1;
182 lv_remainder = lv_remainder_temp - lv_root_temp_2;
185 //Determining quotient
186 if (lv_remainder[fPMAN5]==1'b1) begin //When r <0
187 lv_root = {lv_root[fPMAN3-2:0],1'b0};
190 lv_root = {lv_root[fPMAN3-2:0],1'b1};
192 result_mantissa[0] = lv_root[0]; //Storing the result bit from root
193 mantissa = mantissa <<2; //Shifting mantissa to get the next 2 bits
194 result_mantissa = result_mantissa <<1; //Making space for the next bit
195 rg_state <= rg_state +1; //Incrementing state counter
197 `ifdef verbose $display("****************************************State = %d", rg_state);`endif
198 `ifdef verbose $display("Remainder =%h", lv_remainder);`endif
199 `ifdef verbose $display("Mantissa = %h",result_mantissa);`endif
201 //Storing required values in register for next iteration
202 rg_inter_stage <= Stage_data { mantissa:mantissa ,
203 result_mantissa : result_mantissa,
205 remainder:lv_remainder,
206 sign : rg_inter_stage.sign,
207 exponent: rg_inter_stage.exponent,
208 rounding_mode : rounding_mode};
211 //*****************ITERATION :26 ***********************//
212 rule rl_final_stage (rg_state==fromInteger(fPMAN3-1) && !wr_flush);
213 let lv_remainder = rg_inter_stage.remainder; //Getting remainder value for iteration
214 Bit#(fpman3) lv_root = rg_inter_stage.root; //Getting root value for iteration
215 let mantissa = rg_inter_stage.mantissa; //Getting shifted mantissa value
216 Bit#(fpman3) result_mantissa = rg_inter_stage.result_mantissa; //Getting the result bits
217 let result_exponent = rg_inter_stage.exponent; //Getting the final result exponent value
218 Bit#(fpman6) lv_remainder_temp = {lv_remainder[fPMAN3:0],mantissa[eXT-1],mantissa[eXT-2]};
219 Bit#(fpman6) lv_root_temp_1 = {1'b0,lv_root[fPMAN3-1:0],1'b1,1'b1};
220 Bit#(fpman6) lv_root_temp_2 = {1'b0,lv_root[fPMAN3-1:0],1'b0,1'b1};
221 //Determining the remainder
222 if (lv_remainder[fPMAN5]==1'b1) begin //When r <0
223 lv_remainder = lv_remainder_temp + lv_root_temp_1;
226 lv_remainder = lv_remainder_temp - lv_root_temp_2;
228 //Determining quotient
229 if(lv_remainder[fPMAN5]==1) begin //When r <0
230 lv_root = {lv_root[fPMAN3-2:0],1'b0};
233 lv_root = {lv_root[fPMAN3-2:0],1'b1};
236 result_mantissa[0]= lv_root[0];
237 Bit#(fpman6) lv_root_rem = {2'b0,lv_root[fPMAN3-1:0],1'b1};
238 //**********Restoring the remainder if the remainder<0***********//
239 //mantissa = mantissa <<2;
240 if (lv_remainder[fPMAN5] == 1'b1) begin
241 //lv_remainder = lv_remainder + {3'b0,lv_root[24:0],1'b1};
242 lv_remainder = lv_remainder + lv_root_rem;
245 //********Carrying out the rounding operation**************//
246 Bit#(3) rounding_mode = rg_inter_stage.rounding_mode;
248 bit lv_roundup =0; //Declaring roundup bit
249 bit lv_guard = result_mantissa[1]; //Setting the guard bit
250 bit lv_round = result_mantissa[0]; //Setting the round bit
251 bit lv_sticky = |(lv_remainder); //Setting the sticky bit
252 bit lv_sign = rg_inter_stage.sign; //Getting sign bit
253 bit lv_inexact = lv_guard | lv_round | lv_sticky;
254 if(rounding_mode== 'b000) // round to nearest, ties to even
255 lv_roundup = lv_guard & (result_mantissa[2] | lv_round | lv_sticky);
256 else if(rounding_mode == 'b100) // round to nearest, ties to max magnitude
257 lv_roundup = lv_guard; //& (lv_round | lv_sticky | ~lv_sign);
258 else if(rounding_mode == 'b011 ) // round up
259 lv_roundup = lv_inexact & (~lv_sign);
260 else if(rounding_mode == 'b010) // round down
261 lv_roundup = lv_inexact & (lv_sign);
263 Bit#(TAdd#(fpman3,1)) lv_extended_mantissa = {1'b0,result_mantissa};
264 if (lv_roundup==1) begin
265 lv_extended_mantissa = lv_extended_mantissa + 'd4; //If roundup then add 4 as the LSB for final mantissa is 3rd bit
266 if (lv_extended_mantissa[fPMAN3]==1) //When mantissa overflows
267 result_exponent = result_exponent +1; //Increment exponent by 1
270 //Here most exceptions are taken care of in first stage, so module doesn't perform all iterations
272 `ifdef verbose $display("****************************************State = %d", rg_state);`endif
273 `ifdef verbose $display("Remainder =%h", lv_remainder);`endif
274 `ifdef verbose $display("Mantissa = %h",lv_extended_mantissa);`endif
275 Bit#(fpexp) exp_out = result_exponent[fPEXP-1:0];
276 Bit#(fpman) man_out = lv_extended_mantissa[fPMAN+1:2];
277 Bit#(fpinp) final_result = {lv_sign, exp_out, man_out}; //Setting the final result
279 ff_final_out <= tagged Valid Floating_output{
280 final_result:final_result,
281 fflags : {4'b0,lv_inexact}
287 //*******************ITERATION :1 *********************************//
288 method Action _start(Bit#(1) sign, Bit#(fpman) lv_mantissa, Bit#(fpexp) lv_exponent, Bit#(3) rounding_mode, Bit#(5) condFlags) if(rg_state==0);
290 bit lv_is_invalid =0; //Invalid Flag
291 bit signalling_nan = condFlags[0];
292 Bit#(fpexp1) exponent = {1'b0, lv_exponent}; //Input exponent
293 Bit#(ext_fpman) mantissa = '0;
294 Bit#(TAdd#(fpman3,1)) man4_zeros = '0;
296 if(condFlags[4]==1) begin //Subnormal input
297 exponent = exponent + 1;// a tweak to make exponent -126 since 8'b0000000 represents -127 which is not the real exponent of subnormal numbers
298 mantissa = {1'b0,1'b0,lv_mantissa,man4_zeros};
301 mantissa = {1'b0,1'b1,lv_mantissa,man4_zeros}; //Extend mantissa to 48 bits as we need 24 bit output mantissa (Each iteartion use 2 bits of the opearand)
303 // `ifdef verbose $display("sign = %b exponent = %b mantissa = %b.%b", sign, exponent, mantissa[eXT-1], _operand1[fPMAN-1:0]);`endif
304 // Int#(9) actual_exponent = unpack(exponent - 'b001111111);
305 // `ifdef verbose $display("actual_exponent = %0d", actual_exponent);`endif
307 /******************subnormal support*********************/
308 Bit#(ext_fplog) lv_leading_zeros = pack(countZerosMSB(mantissa));
309 mantissa = mantissa << (lv_leading_zeros - 1);
310 exponent = exponent - (zeroExtend(lv_leading_zeros) - 1); //possibility for a proviso problem
312 if (exponent[0]==0) //If the exponent is even
313 mantissa = mantissa <<1; //Mantissa is left shifted so that Exponent-127 is even
315 Bit#(fpman6) lv_remainder = '0; //Declaring local remainder variable
316 Bit#(fpexp) bias = {1'b0,'1};
317 Bit#(fpman3) lv_root = '0; //Declaring local root/quotient variable
318 Bit#(fpman3) result_mantissa = 0; //Will store the square root answer
320 // Bit#(8) result_exponent = (exponent >>1) +'d63 + zeroExtend(exponent[0]); //Calculating the result exponent
321 Bit#(fpexp1) result_exponent = (exponent >> 1) + (zeroExtend((bias-1)>>1)) + zeroExtend(exponent[0]); //Calculating the result exponent
322 `ifdef verbose $display("Flags %h lv_mantissa : %h lv_exponent :%h lv_sign : %b",condFlags,lv_mantissa,lv_exponent,sign); `endif
323 `ifdef verbose $display("Result_exponent %h bias %d exponent >> 1 %h exponent[0] %h",result_exponent,(bias-1) >> 1,exponent >> 1, exponent[0]);`endif
324 //Determining remainder
325 if (lv_remainder[fPMAN5]==1) begin //When r <0
326 lv_remainder = {lv_remainder[fPMAN3:0],mantissa[eXT-1],mantissa[eXT-2]} + {1'b0,lv_root[fPMAN3-1:0],1'b1,1'b1};
329 lv_remainder = {lv_remainder[fPMAN3:0],mantissa[eXT-1],mantissa[eXT-2]} - {1'b0,lv_root[fPMAN3-1:0],1'b0,1'b1};
331 `ifdef verbose $display("lv_remainder: %h",lv_remainder);`endif
333 //Determining quotient
334 if (lv_remainder[fPMAN5]==1) begin //When r <0
335 lv_root = {lv_root[fPMAN+1:0],1'b0};
338 lv_root = {lv_root[fPMAN+1:0],1'b1};
341 result_mantissa[0] = lv_root [0]; //Setting the LSB of the result
342 mantissa = mantissa << 2; //Shifting the mantissa to get the next 2 bits of next iteration
343 result_mantissa = result_mantissa << 1; //Shifting the result mantissa to make space for the next bit
348 Bit#(fpexp) exp_all_ones = '1;
349 Bit#(fpexp) exp_all_zeros = '0;
350 Bit#(fpman) man_all_zeros = '0;
351 Bit#(fpman) man_all_ones = '1;
352 Bit#(TSub#(fpman,1)) man1_all_zeros = '0;
354 if((condFlags[2] | condFlags[0]) == 1) //operand is NaN
356 else if(condFlags[1] == 1) // check if operand is infinite
358 if(sign == 1) // if -inf then result is NaN
360 else // if +inf then result is +inf
363 else if(condFlags[3] == 1)
367 if (lv_inv == 1 || (sign == 1 && lv_zero == 0)) begin // when the input is NAN or Negative => Invalid flag is raised
368 ff_final_out <= tagged Valid Floating_output{ final_result:{1'b0, exp_all_ones , {1'b1,man1_all_zeros}}, //Quite Nan
369 fflags :{signalling_nan | (sign&~condFlags[2]),4'b0}};
371 else if(lv_inf == 1) begin
372 ff_final_out <= tagged Valid Floating_output{ final_result:{1'b0, exp_all_ones , man_all_zeros}, //Infinity
375 else if (lv_zero == 1) begin
376 ff_final_out <= tagged Valid Floating_output{ final_result:{sign, exp_all_zeros,man_all_zeros}, //Zeros
380 //State counter incremented only when it does not meet any above exceptional cases
381 rg_state <= rg_state+1; //Increment the State_counter for next iteration
384 `ifdef verbose $display("****************************************State = %0d", rg_state);`endif
385 `ifdef verbose $display("Remainder = %b", lv_remainder);`endif
386 `ifdef verbose $display("Mantissa = %b",result_mantissa);`endif
388 //Storing required data in FIFO stage1 for next iteration
389 rg_inter_stage <= Stage_data{ mantissa : mantissa,
390 result_mantissa : result_mantissa,
392 remainder : lv_remainder,
394 exponent : result_exponent,
395 rounding_mode : rounding_mode };
398 method Maybe#(Floating_output#(fpinp)) get_result();
408 `ifdef fpu_hierarchical
410 module mkfpu_sqrt32(Ifc_fpu_sqrt32);
411 Ifc_fpu_sqrt#(32,23,8) uut <- mkfpu_sqrt();
412 method Action _start(Bit#(1) sign, Bit#(23) lv_mantissa, Bit#(8) lv_exponent, Bit#(3) rounding_mode, Bit#(5) condFlags);
413 uut._start(sign,lv_mantissa,lv_exponent,rounding_mode,condFlags);
416 // method Action deque_buffer();
417 method Maybe#(Floating_output#(32)) get_result();
418 return uut.get_result();
426 module mkfpu_sqrt64(Ifc_fpu_sqrt64);
427 Ifc_fpu_sqrt#(64,52,11) uut <- mkfpu_sqrt();
428 method Action _start(Bit#(1) sign, Bit#(52) lv_mantissa, Bit#(11) lv_exponent, Bit#(3) rounding_mode, Bit#(5) condFlags);
429 uut._start(sign,lv_mantissa,lv_exponent,rounding_mode,condFlags);
432 // method Action deque_buffer();
433 method Maybe#(Floating_output#(64)) get_result();
434 return uut.get_result();
442 // //*************Test bench******************//
444 /*module mkTb_fpu_sqrt(Empty);
446 Reg#(Bit#(32)) rg_clock <-mkReg(0);
447 Reg#(Bit#(32)) rg__operand1ut1 <- mkReg(32'h76af0cb2);
448 //Reg#(Bit#(64)) rg__operand1ut1 <- mkReg(64'h019000000000000);
450 Ifc_fpu_sqrt#(32,23,8) square_root <- mkfpu_sqrt;
453 rg_clock<=rg_clock+1;
454 if(rg_clock=='d60) begin
459 rule give__operand1ut(rg_clock==2);
460 `ifdef verbose $display("Giving input %h at %0d", rg__operand1ut1, rg_clock,$time);`endif
461 square_root._start(rg__operand1ut1, 3'b011);
464 rule get_output(square_root.get_result matches tagged Valid .lv_output);
465 `ifdef verbose $display("taking output at %0d", rg_clock);`endif
466 `ifdef verbose $display("Output= %h" , lv_output.final_result,$time);`endif
467 square_root.deque_buffer();
473 module mkTb_fpu_sqrt_2 (Empty);
475 RegFile #(Bit #(10), Bit #(36)) input_data <- mkRegFileFullLoad("./testcases/fpgen_testcases/Sqrt_testcases.hex");
476 Reg #(Bit #(10)) index <- mkReg(0);
477 Reg #(Bit #(32)) state_clock <- mkReg(1);
478 Reg #(Bit #(32)) rg_state <- mkReg(0);
479 /*****************Module Instantiation******************************/
480 // Ifc_fpu_sqrt#(32,23,8) sqrt <- mkfpu_sqrt;
481 /******************File Creation************************************/
482 /* Reg#(int) cnt <- mkReg(0); //File Creation counter
483 let fh <- mkReg(InvalidFile) ; //File Handler
484 rule open (cnt == 0 ) ;
485 File tb_sqrt_output <- $fopen("tb_sqrt_output.hex", "w+");
486 fh <= tb_sqrt_output;
489 /*******************input******************************************/
490 /* rule take_input_in (rg_state == 0);
491 sqrt._start(input_data.sub(index)[35:4], input_data.sub(index)[2:0]);
496 /*******************output*****************************************/
497 /* rule display_output (rg_state == 1 &&& sqrt.get_result matches tagged Valid .abc);
498 $fwrite(fh, "%h\n", abc.final_result[31:0]);
503 /******************end testing*************************************/
504 /* rule end_testing (index == 65);