genlib/cordic: cleanup, documentation, unittests
authorRobert Jordens <jordens@gmail.com>
Sat, 30 Nov 2013 13:55:01 +0000 (06:55 -0700)
committerSebastien Bourdeauducq <sebastien@milkymist.org>
Mon, 2 Dec 2013 11:56:24 +0000 (12:56 +0100)
doc/api.rst
examples/sim/cordic_err.py [new file with mode: 0644]
migen/genlib/cordic.py
migen/test/test_cordic.py [new file with mode: 0644]

index 1362caa987a49633586fbf9bea8477f573f1bfe7..958702da52777e2bd75d6a1a99a1359b9806e081 100644 (file)
@@ -21,3 +21,10 @@ migen API Documentation
 .. automodule:: migen.genlib.coding
         :members:
         :show-inheritance:
+
+:mod:`genlib.cordic` Module
+------------------------------
+
+.. automodule:: migen.genlib.cordic
+        :members:
+        :show-inheritance:
diff --git a/examples/sim/cordic_err.py b/examples/sim/cordic_err.py
new file mode 100644 (file)
index 0000000..4a7bc1b
--- /dev/null
@@ -0,0 +1,115 @@
+import random
+
+import numpy as np
+import matplotlib.pyplot as plt
+
+from migen.fhdl.std import *
+from migen.fhdl import verilog
+from migen.genlib.cordic import Cordic
+from migen.sim.generic import Simulator
+
+
+class TestBench(Module):
+       def __init__(self, n=None, xmax=.98, i=None, **kwargs):
+               self.submodules.cordic = Cordic(**kwargs)
+               if n is None:
+                       n = 1<<flen(self.cordic.xi)
+               self.c = c = 2**(flen(self.cordic.xi) - 1)
+               self.cz = cz = 2**(flen(self.cordic.zi) - 1)
+               if i is None:
+                       i = [(int(xmax*c/self.cordic.gain), 0, int(cz*(i/n - .5)))
+                                       for i in range(n)]
+               self.i = i
+               random.shuffle(self.i)
+               self.ii = iter(self.i)
+               self.o = []
+
+       def do_simulation(self, s):
+               if s.rd(self.cordic.new_in):
+                       try:
+                               xi, yi, zi = next(self.ii)
+                       except StopIteration:
+                               s.interrupt = True
+                               return
+                       s.wr(self.cordic.xi, xi)
+                       s.wr(self.cordic.yi, yi)
+                       s.wr(self.cordic.zi, zi)
+               if s.rd(self.cordic.new_out):
+                       xo = s.rd(self.cordic.xo)
+                       yo = s.rd(self.cordic.yo)
+                       zo = s.rd(self.cordic.zo)
+                       self.o.append((xo, yo, zo))
+
+       def run_io(self): 
+               with Simulator(self) as sim:
+                       sim.run()
+               del self.i[-1], self.o[0]
+               if self.i[0] != (0, 0, 0):
+                       assert self.o[0] != (0, 0, 0)
+               if self.i[-1] != self.i[-2]:
+                       assert self.o[-1] != self.o[-2], self.o[-2:]
+
+def rms_err(width, guard=None, stages=None, n=None):
+       tb = TestBench(width=width, guard=guard, stages=stages,
+                       n=n, eval_mode="combinatorial")
+       tb.run_io()
+       c = 2**(flen(tb.cordic.xi) - 1)
+       cz = 2**(flen(tb.cordic.zi) - 1)
+       g = tb.cordic.gain
+       xi, yi, zi = np.array(tb.i).T/c
+       zi *= c/cz*tb.cordic.zmax
+       xo1, yo1, zo1 = np.array(tb.o).T
+       xo = np.floor(c*g*(np.cos(zi)*xi - np.sin(zi)*yi))
+       yo = np.floor(c*g*(np.sin(zi)*xi + np.cos(zi)*yi))
+       dx = xo1 - xo
+       dy = yo1 - yo
+       mm = np.fabs([dx, dy]).max()
+       rms = np.sqrt(dx**2 + dy**2).sum()/len(xo)
+       return rms, mm
+
+def rms_err_map():
+       widths, stages = np.mgrid[8:33:1, 8:37:1]
+       errf = np.vectorize(lambda w, s: _rms_err(int(w), None, int(s), n=333))
+       err = errf(widths, stages)
+       print(err)
+       lev = np.arange(10)
+       fig, ax = plt.subplots()
+       c1 = ax.contour(widths, stages, err[0], lev/10, cmap=plt.cm.Greys_r)
+       c2 = ax.contour(widths, stages, err[1], lev, cmap=plt.cm.Reds_r)
+       ax.plot(widths[:, 0], stages[0, np.argmin(err[0], 1)], "ko")
+       ax.plot(widths[:, 0], stages[0, np.argmin(err[1], 1)], "ro")
+       print(widths[:, 0], stages[0, np.argmin(err[0], 1)],
+                       stages[0, np.argmin(err[1], 1)])
+       ax.set_xlabel("width")
+       ax.set_ylabel("stages")
+       ax.grid("on")
+       fig.colorbar(c1)
+       fig.colorbar(c2)
+       fig.savefig("cordic_rms.pdf")
+
+def plot_function(**kwargs):
+       tb = TestBench(eval_mode="combinatorial", **kwargs)
+       tb.run_io()
+       c = 2**(flen(tb.cordic.xi) - 1)
+       cz = 2**(flen(tb.cordic.zi) - 1)
+       g = tb.cordic.gain
+       xi, yi, zi = np.array(tb.i).T
+       xo, yo, zo = np.array(tb.o).T
+       fig, ax = plt.subplots()
+       ax.plot(zi, xo, "r,")
+       ax.plot(zi, yo, "g,")
+       ax.plot(zi, zo, "g,")
+
+
+if __name__ == "__main__":
+       c = Cordic(width=16, guard=None, eval_mode="combinatorial")
+       print(verilog.convert(c, ios={c.xi, c.yi, c.zi, c.xo, c.yo, c.zo,
+               c.new_in, c.new_out}))
+       #print(rms_err(8))
+       #rms_err_map()
+       #plot_function(func_mode="hyperbolic", xmax=.3, width=16, n=333)
+       #plot_function(func_mode="circular", width=16, n=333)
+       #plot_function(func_mode="hyperbolic", cordic_mode="vector",
+    #        xmax=.3, width=16, n=333)
+       #plot_function(func_mode="circular", width=16, n=333)
+       plt.show()
index b52ff78ad541600e93608b9cc250e6d248400674..d508257e14e2c7968829156ca9b6ff9f54a60726 100644 (file)
-from math import atan, atanh, log, sqrt, pi
+from math import atan, atanh, log, sqrt, pi, ceil
 
 from migen.fhdl.std import *
 
 class TwoQuadrantCordic(Module):
-       """
+       """Coordinate rotation digital computer
+
+       Trigonometric, and arithmetic functions implemented using
+       additions/subtractions and shifts.
+
        http://eprints.soton.ac.uk/267873/1/tcas1_cordic_review.pdf
+
+       http://www.andraka.com/files/crdcsrvy.pdf
+
+       http://zatto.free.fr/manual/Volder_CORDIC.pdf
+
+       The way the CORDIC is executed is controlled by `eval_mode`.
+       If `"iterative"` the stages are iteratively evaluated, one per clock
+       cycle. This mode uses the least amount of registers, but has the
+       lowest throughput and highest latency.  If `"pipelined"` all stages
+       are executed in every clock cycle but separated by registers.  This
+       mode has full throughput but uses many registers and has large
+       latency. If `"combinatorial"`, there are no registers, throughput is
+       maximal and latency is zero. `"pipelined"` and `"combinatorial"` use
+       the same number of sphifters and adders.
+
+       The type of trigonometric/arithmetic function is determined by
+       `cordic_mode` and `func_mode`. :math:`g` is the gain of the CORDIC.
+
+               * rotate-circular: rotate the vector `(xi, yi)` by an angle `zi`.
+                 Used to calculate trigonometric functions, `sin(), cos(),
+                 tan() = sin()/cos()`, or to perform polar-to-cartesian coordinate
+                 transformation:
+
+                       .. math::
+                               x_o = g \\cos(z_i) x_i - g \\sin(z_i) y_i
+
+                               y_o = g \\sin(z_i) x_i + g \\cos(z_i) y_i
+
+               * vector-circular: determine length and angle of the vector
+                 `(xi, yi)`.  Used to calculate `arctan(), sqrt()` or
+                 to perform cartesian-to-polar transformation:
+
+                       .. math::
+                               x_o = g\\sqrt{x_i^2 + y_i^2}
+
+                               z_o = z_i + \\tan^{-1}(y_i/x_i)
+
+               * rotate-hyperbolic: hyperbolic functions of `zi`. Used to
+                 calculate hyprbolic functions, `sinh, cosh, tanh = cosh/sinh,
+                 exp = cosh + sinh`:
+
+                       .. math::
+                               x_o = g \\cosh(z_i) x_i + g \\sinh(z_i) y_i
+
+                               y_o = g \\sinh(z_i) x_i + g \\cosh(z_i) z_i
+
+               * vector-hyperbolic: natural logarithm `ln(), arctanh()`, and
+                 `sqrt()`. Use `x_i = a + b` and `y_i = a - b` to obtain `2*
+                 sqrt(a*b)` and `ln(a/b)/2`:
+
+                       .. math::
+                               x_o = g\\sqrt{x_i^2 - y_i^2}
+
+                               z_o = z_i + \\tanh^{-1}(y_i/x_i)
+
+               * rotate-linear: multiply and accumulate (not a very good
+                 multiplier implementation):
+
+                       .. math::
+                               y_o = g(y_i + x_i z_i)
+
+               * vector-linear: divide and accumulate:
+
+                       .. math::
+                               z_o = g(z_i + y_i/x_i)
+
+       Parameters
+       ----------
+       width : int
+               Bit width of the input and output signals. Defaults to 16. Input
+               and output signals are signed.
+       widthz : int
+               Bit with of `zi` and `zo`. Defaults to the `width`.
+       stages : int or None
+               Number of CORDIC incremental rotation stages. Defaults to
+               `width + min(1, guard)`.
+       guard : int or None
+               Add guard bits to the intermediate signals. If `None`,
+               defaults to `guard = log2(width)` which guarantees accuracy
+               to `width` bits.
+       eval_mode : str, {"iterative", "pipelined", "combinatorial"}
+       cordic_mode : str, {"rotate", "vector"}
+       func_mode : str, {"circular", "linear", "hyperbolic"}
+               Evaluation and arithmetic mode. See above.
+
+       Attributes
+       ----------
+       xi, yi, zi : Signal(width), in
+               Input values, signed.
+       xo, yo, zo : Signal(width), out
+               Output values, signed.
+       new_out : Signal(1), out
+               Asserted if output values are freshly updated in the current
+               cycle.
+       new_in : Signal(1), out
+               Asserted if new input values are being read in the next cycle.
+       zmax : float
+               `zi` and `zo` normalization factor. Floating point `zmax`
+               corresponds to `1<<(widthz - 1)`. `x` and `y` are scaled such
+               that floating point `1` corresponds to `1<<(width - 1)`.
+       gain : float
+               Cumulative, intrinsic gain and scaling factor. In circular mode
+               `sqrt(xi**2 + yi**2)` should be no larger than `2**(width - 1)/gain`
+               to prevent overflow. Additionally, in hyperbolic and linear mode,
+               the operation itself can cause overflow.
+       interval : int
+               Output interval in clock cycles. Inverse throughput.
+       latency : int
+               Input-to-output latency. The result corresponding to the inputs
+               appears at the outputs `latency` cycles later.
+
+       Notes
+       -----
+
+       Each stage `i` in the CORDIC performs the following operation:
+
+       .. math::
+               x_{i+1} = x_i - m d_i y_i r^{-s_{m,i}},
+
+               y_{i+1} = y_i + d_i x_i r^{-s_{m,i}},
+
+               z_{i+1} = z_i - d_i a_{m,i},
+
+       where:
+
+               * :math:`d_i`: clockwise or counterclockwise, determined by
+                 `sign(z_i)` in rotate mode or `sign(-y_i)` in vector mode.
+
+               * :math:`r`: radix of the number system (2)
+
+               * :math:`m`: 1: circular, 0: linear, -1: hyperbolic
+
+               * :math:`s_{m,i}`: non decreasing integer shift sequence
+
+               * :math:`a_{m,i}`: elemetary rotation angle: :math:`a_{m,i} =
+                 \\tan^{-1}(\\sqrt{m} s_{m,i})/\\sqrt{m}`.
        """
-       def __init__(self, width=16, stages=None, guard=0,
+       def __init__(self, width=16, widthz=None, stages=None, guard=0,
                        eval_mode="iterative", cordic_mode="rotate",
                        func_mode="circular"):
                # validate parameters
                assert eval_mode in ("combinatorial", "pipelined", "iterative")
                assert cordic_mode in ("rotate", "vector")
-               self.cordic_mode = cordic_mode
                assert func_mode in ("circular", "linear", "hyperbolic")
+               self.cordic_mode = cordic_mode
                self.func_mode = func_mode
                if guard is None:
                        # guard bits to guarantee "width" accuracy
                        guard = int(log(width)/log(2))
+               if widthz is None:
+                       widthz = width
                if stages is None:
-                       stages = width + guard
+                       stages = width + min(1, guard) # cuts error below LSB
 
-               # calculate the constants
-               if func_mode == "circular":
-                       s = range(stages)
-                       a = [atan(2**-i) for i in s]
-                       g = [sqrt(1 + 2**(-2*i)) for i in s]
-                       zmax = pi/2
-               elif func_mode == "linear":
-                       s = range(stages)
-                       a = [2**-i for i in s]
-                       g = [1 for i in s]
-                       zmax = 2.
-               elif func_mode == "hyperbolic":
-                       s = list(range(1, stages+1))
-                       # need to repeat these stages:
-                       j = 4
-                       while j < stages+1:
-                               s.append(j)
-                               j = 3*j + 1
-                       s.sort()
-                       stages = len(s)
-                       a = [atanh(2**-i) for i in s]
-                       g = [sqrt(1 - 2**(-2*i)) for i in s]
-                       zmax = 1.
+               # input output interface
+               self.xi = Signal((width, True))
+               self.yi = Signal((width, True))
+               self.zi = Signal((widthz, True))
+               self.xo = Signal((width, True))
+               self.yo = Signal((width, True))
+               self.zo = Signal((widthz, True))
+               self.new_in = Signal()
+               self.new_out = Signal()
 
-               a = [Signal((width+guard, True), "{}{}".format("a", i),
-                       reset=int(round(ai*2**(width + guard - 1)/zmax)))
-                       for i, ai in enumerate(a)]
-               self.zmax = zmax #/2**(width - 1)
-               self.gain = 1.
-               for gi in g:
-                       self.gain *= gi
+               ###
 
-               exec_target, num_reg, self.latency, self.interval = {
-                       "combinatorial": (self.comb, stages + 1, 0,          1),
-                       "pipelined":     (self.sync, stages + 1, stages,     1),
-                       "iterative":     (self.sync, 3,          stages + 1, stages + 1),
-                       }[eval_mode]
+               a, s, self.zmax, self.gain = self._constants(stages, widthz + guard)
+               stages = len(a) # may have increased due to repetitions
 
-               # i/o and inter-stage signals
-               self.fresh = Signal()
-               self.xi, self.yi, self.zi, self.xo, self.yo, self.zo = (
-                               Signal((width, True), l + io) for io in "io" for l in "xyz")
-               x, y, z = ([Signal((width + guard, True), "{}{}".format(l, i))
-                       for i in range(num_reg)] for l in "xyz")
+               if eval_mode == "iterative":
+                       num_sig = 3
+                       self.interval = stages + 1
+                       self.latency = stages + 2
+               else:
+                       num_sig = stages + 1
+                       self.interval = 1
+                       if eval_mode == "pipelined":
+                               self.latency = stages
+                       else: # combinatorial
+                               self.latency = 0
 
+               # inter-stage signals
+               x = [Signal((width + guard, True)) for i in range(num_sig)]
+               y = [Signal((width + guard, True)) for i in range(num_sig)]
+               z = [Signal((widthz + guard, True)) for i in range(num_sig)]
+
+               # hook up inputs and outputs to the first and last inter-stage
+               # signals
                self.comb += [
                        x[0].eq(self.xi<<guard),
                        y[0].eq(self.yi<<guard),
@@ -75,167 +206,143 @@ class TwoQuadrantCordic(Module):
                        self.zo.eq(z[-1]>>guard),
                        ]
 
-               if eval_mode in ("combinatorial", "pipelined"):
-                       self.comb += self.fresh.eq(1)
-                       for i in range(stages):
-                               exec_target += self.stage(x[i], y[i], z[i],
-                                               x[i + 1], y[i + 1], z[i + 1], i, a[i])
-               elif eval_mode == "iterative":
-                       # we afford one additional iteration for register in/out
-                       # shifting, trades muxes for registers
+               if eval_mode == "iterative":
+                       # We afford one additional iteration for in/out.
                        i = Signal(max=stages + 1)
-                       ai = Signal((width+guard, True))
-                       self.comb += ai.eq(Array(a)[i])
-                       exec_target += [
+                       self.comb += [
+                                       self.new_in.eq(i == stages),
+                                       self.new_out.eq(i == 1),
+                                       ]
+                       ai = Signal((widthz + guard, True))
+                       self.sync += ai.eq(Array(a)[i])
+                       if range(stages) == s:
+                               si = i - 1 # shortcut if no stage repetitions
+                       else:
+                               si = Signal(max=stages + 1)
+                               self.sync += si.eq(Array(s)[i])
+                       xi, yi, zi = x[1], y[1], z[1]
+                       self.sync += [
+                                       self._stage(xi, yi, zi, xi, yi, zi, si, ai),
                                        i.eq(i + 1),
                                        If(i == stages,
                                                i.eq(0),
-                                               self.fresh.eq(1),
-                                               Cat(x[1], y[1], z[1]).eq(Cat(x[0], y[0], z[0])),
-                                               Cat(x[2], y[2], z[2]).eq(Cat(x[1], y[1], z[1])),
-                                       ).Else(
-                                               self.fresh.eq(0),
-                                               # in-place stages
-                                               self.stage(x[1], y[1], z[1], x[1], y[1], z[1], i, ai),
+                                       ),
+                                       If(i == 0,
+                                               x[2].eq(xi),
+                                               y[2].eq(yi),
+                                               z[2].eq(zi),
+                                               xi.eq(x[0]),
+                                               yi.eq(y[0]),
+                                               zi.eq(z[0]),
                                        )]
+               else:
+                       self.comb += [
+                                       self.new_out.eq(1),
+                                       self.new_in.eq(1),
+                                       ]
+                       for i, si in enumerate(s):
+                               stmt = self._stage(x[i], y[i], z[i],
+                                               x[i + 1], y[i + 1], z[i + 1], si, a[i])
+                               if eval_mode == "pipelined":
+                                       self.sync += stmt
+                               else: # combinatorial
+                                       self.comb += stmt
+
+       def _constants(self, stages, bits):
+               if self.func_mode == "circular":
+                       s = range(stages)
+                       a = [atan(2**-i) for i in s]
+                       g = [sqrt(1 + 2**(-2*i)) for i in s]
+                       #zmax = sum(a)
+                       # use pi anyway as the input z can cause overflow
+                       # and we need the range for quadrant mapping
+                       zmax = pi
+               elif self.func_mode == "linear":
+                       s = range(stages)
+                       a = [2**-i for i in s]
+                       g = [1 for i in s]
+                       #zmax = sum(a)
+                       # use 2 anyway as this simplifies a and scaling
+                       zmax = 2.
+               else: # hyperbolic
+                       s = []
+                       # need to repeat some stages:
+                       j = 4
+                       for i in range(stages):
+                               if i == j:
+                                       s.append(j)
+                                       j = 3*j + 1
+                               s.append(i + 1)
+                       a = [atanh(2**-i) for i in s]
+                       g = [sqrt(1 - 2**(-2*i)) for i in s]
+                       zmax = sum(a)*2
+               a = [int(ai*2**(bits - 1)/zmax) for ai in a]
+               # round here helps the width=2**i - 1 case but hurts the
+               # important width=2**i case
+               gain = 1.
+               for gi in g:
+                       gain *= gi
+               return a, s, zmax, gain
 
-       def stage(self, xi, yi, zi, xo, yo, zo, i, a):
-               """
-               x_{i+1} = x_{i} - m*d_i*y_i*r**(-s_{m,i})
-               y_{i+1} = d_i*x_i*r**(-s_{m,i}) + y_i
-               z_{i+1} = z_i - d_i*a_{m,i}
-
-               d_i: clockwise or counterclockwise
-               r: radix of the number system
-               m: 1: circular, 0: linear, -1: hyperbolic
-               s_{m,i}: non decreasing integer shift sequence
-               a_{m,i}: elemetary rotation angle
-               """
-               dx, dy, dz = xi>>i, yi>>i, a
-               direction = {"rotate": zi < 0, "vector": yi >= 0}[self.cordic_mode]
-               dy = {"circular": dy, "linear": 0, "hyperbolic": -dy}[self.func_mode]
-               ret = If(direction,
-                                       xo.eq(xi + dy),
-                                       yo.eq(yi - dx),
+       def _stage(self, xi, yi, zi, xo, yo, zo, i, ai):
+               if self.cordic_mode == "rotate":
+                       direction = zi < 0
+               else: # vector
+                       direction = yi >= 0
+               dx = yi>>i
+               dy = xi>>i
+               dz = ai
+               if self.func_mode == "linear":
+                       dx = 0
+               elif self.func_mode == "hyperbolic":
+                       dx = -dx
+               stmt = If(direction,
+                                       xo.eq(xi + dx),
+                                       yo.eq(yi - dy),
                                        zo.eq(zi + dz),
                                ).Else(
-                                       xo.eq(xi - dy),
-                                       yo.eq(yi + dx),
+                                       xo.eq(xi - dx),
+                                       yo.eq(yi + dy),
                                        zo.eq(zi - dz),
                                )
-               return ret
+               return stmt
 
 class Cordic(TwoQuadrantCordic):
+       """Four-quadrant CORDIC
+
+       Same as :class:`TwoQuadrantCordic` but with support and convergence
+       for `abs(zi) > pi/2 in circular rotate mode or `xi < 0` in circular
+       vector mode.
+       """
        def __init__(self, **kwargs):
                TwoQuadrantCordic.__init__(self, **kwargs)
-               if not (self.func_mode, self.cordic_mode) == ("circular", "rotate"):
+               if self.func_mode != "circular":
                        return # no need to remap quadrants
-               cxi, cyi, czi, cxo, cyo, czo = (self.xi, self.yi, self.zi,
-                               self.xo, self.yo, self.zo)
+
                width = flen(self.xi)
-               for l in "xyz":
-                       for d in "io":
-                               setattr(self, l+d, Signal((width, True), l+d))
-               qin = Signal()
-               qout = Signal()
-               if self.latency == 0:
-                       self.comb += qout.eq(qin)
-               elif self.latency == 1:
-                       self.sync += qout.eq(qin)
-               else:
-                       sr = Signal(self.latency-1)
-                       self.sync += Cat(sr, qout).eq(Cat(qin, sr))
-               pi2 = (1<<(width-2))-1
-               self.zmax *= 2
+               widthz = flen(self.zi)
+               cxi, cyi, czi = self.xi, self.yi, self.zi
+               self.xi = Signal((width, True))
+               self.yi = Signal((width, True))
+               self.zi = Signal((widthz, True))
+
+               ###
+
+               pi2 = 1<<(widthz - 2)
+               if self.cordic_mode == "rotate":
+                       #rot = self.zi + pi2 < 0
+                       rot = self.zi[-1] ^ self.zi[-2]
+               else: # vector
+                       rot = self.xi < 0
+                       #rot = self.xi[-1]
                self.comb += [
-                               # zi, zo are scaled to cover the range, this also takes
-                               # care of mapping the zi quadrants
-                               Cat(cxi, cyi, czi).eq(Cat(self.xi, self.yi, self.zi<<1)),
-                               Cat(self.xo, self.yo, self.zo).eq(Cat(cxo, cyo, czo>>1)),
-                               # shift in the (2,3)-quadrant flag
-                               qin.eq((-self.zi < -pi2) | (self.zi+1 < -pi2)),
-                               # need to remap xo/yo quadrants (2,3) -> (4,1)
-                               If(qout,
-                                       self.xo.eq(-cxo),
-                                       self.yo.eq(-cyo),
-                               )]
-
-class TB(Module):
-       def __init__(self, n, **kwargs):
-               self.submodules.cordic = Cordic(**kwargs)
-               self.xi = [.9/self.cordic.gain] * n
-               self.yi = [0] * n
-               self.zi = [2*i/n-1 for i in range(n)]
-               self.xo = []
-               self.yo = []
-               self.zo = []
-
-       def do_simulation(self, s):
-               c = 2**(flen(self.cordic.xi)-1)
-               if s.rd(self.cordic.fresh):
-                       self.xo.append(s.rd(self.cordic.xo))
-                       self.yo.append(s.rd(self.cordic.yo))
-                       self.zo.append(s.rd(self.cordic.zo))
-                       if not self.xi:
-                               s.interrupt = True
-                               return
-                       for r, v in zip((self.cordic.xi, self.cordic.yi, self.cordic.zi),
-                                       (self.xi, self.yi, self.zi)):
-                               s.wr(r, int(v.pop(0)*c))
-
-def _main():
-       from migen.fhdl import verilog
-       from migen.sim.generic import Simulator, TopLevel
-       from matplotlib import pyplot as plt
-       import numpy as np
-
-       c = Cordic(width=16, eval_mode="iterative",
-               cordic_mode="rotate", func_mode="circular")
-       print(verilog.convert(c, ios={c.xi, c.yi, c.zi, c.xo,
-               c.yo, c.zo}))
-
-       n = 200
-       tb = TB(n, width=8, guard=3, eval_mode="pipelined",
-                       cordic_mode="rotate", func_mode="circular")
-       sim = Simulator(tb, TopLevel("cordic.vcd"))
-       sim.run(n*16+20)
-       plt.plot(tb.xo)
-       plt.plot(tb.yo)
-       plt.plot(tb.zo)
-       plt.show()
-
-def _rms_err(width, stages, n):
-       from migen.sim.generic import Simulator
-       import numpy as np
-       import matplotlib.pyplot as plt
-
-       tb = TB(width=int(width), stages=int(stages), n=n,
-                       eval_mode="combinatorial")
-       sim = Simulator(tb)
-       sim.run(n+100)
-       z = tb.cordic.zmax*(np.arange(n)/n*2-1)
-       x = np.cos(z)*.9
-       y = np.sin(z)*.9
-       dx = tb.xo[1:]-x*2**(width-1)
-       dy = tb.yo[1:]-y*2**(width-1)
-       return ((dx**2+dy**2)**.5).sum()/n
-
-def _test_err():
-       from matplotlib import pyplot as plt
-       import numpy as np
-
-       widths, stages = np.mgrid[4:33:1, 4:33:1]
-       err = np.vectorize(lambda w, s: rms_err(w, s, 173))(widths, stages)
-       err = -np.log2(err)/widths
-       print(err)
-       plt.contour(widths, stages, err, 50, cmap=plt.cm.Greys)
-       plt.plot(widths[:, 0], stages[0, np.argmax(err, 1)], "bx-")
-       print(widths[:, 0], stages[0, np.argmax(err, 1)])
-       plt.colorbar()
-       plt.grid("on")
-       plt.show()
-
-if __name__ == "__main__":
-       _main()
-       #_rms_err(16, 16, 345)
-       #_test_err()
+                               cxi.eq(self.xi),
+                               cyi.eq(self.yi),
+                               czi.eq(self.zi),
+                               If(rot,
+                                       cxi.eq(-self.xi),
+                                       cyi.eq(-self.yi),
+                                       czi.eq(self.zi + 2*pi2),
+                                       #czi.eq(self.zi ^ (2*pi2)),
+                               ),
+                               ]
diff --git a/migen/test/test_cordic.py b/migen/test/test_cordic.py
new file mode 100644 (file)
index 0000000..417bdb2
--- /dev/null
@@ -0,0 +1,163 @@
+import unittest
+from random import randrange, random
+from math import *
+
+from migen.fhdl.std import *
+from migen.genlib.cordic import *
+
+from migen.test.support import SimCase, SimBench
+
+
+class CordicCase(SimCase, unittest.TestCase):
+       class TestBench(SimBench):
+               def __init__(self, **kwargs):
+                       k = dict(width=8, guard=None, stages=None,
+                               eval_mode="combinatorial", cordic_mode="rotate",
+                               func_mode="circular")
+                       k.update(kwargs)
+                       self.submodules.dut = Cordic(**k)
+
+       def _run_io(self, n, gen, proc, delta=1, deltaz=1):
+               c = 2**(flen(self.tb.dut.xi) - 1)
+               g = self.tb.dut.gain
+               zm = self.tb.dut.zmax
+               pipe = {}
+               genn = [gen() for i in range(n)]
+               def cb(tb, s):
+                       if s.rd(tb.dut.new_in):
+                               if genn:
+                                       xi, yi, zi = genn.pop(0)
+                               else:
+                                       s.interrupt = True
+                                       return
+                               xi = floor(xi*c/g)
+                               yi = floor(yi*c/g)
+                               zi = floor(zi*c/zm)
+                               s.wr(tb.dut.xi, xi)
+                               s.wr(tb.dut.yi, yi)
+                               s.wr(tb.dut.zi, zi)
+                               pipe[s.cycle_counter] = xi, yi, zi
+                       if s.rd(tb.dut.new_out):
+                               t = s.cycle_counter - tb.dut.latency - 1
+                               if t < 1:
+                                       return
+                               xi, yi, zi = pipe.pop(t)
+                               xo, yo, zo = proc(xi/c, yi/c, zi/c*zm)
+                               xo = floor(xo*c*g)
+                               yo = floor(yo*c*g)
+                               zo = floor(zo*c/zm)
+                               xo1 = s.rd(tb.dut.xo)
+                               yo1 = s.rd(tb.dut.yo)
+                               zo1 = s.rd(tb.dut.zo)
+                               print((xi, yi, zi), (xo, yo, zo), (xo1, yo1, zo1))
+                               self.assertAlmostEqual(xo, xo1, delta=delta)
+                               self.assertAlmostEqual(yo, yo1, delta=delta)
+                               self.assertAlmostEqual(abs(zo - zo1) % (2*c), 0, delta=deltaz)
+               self.run_with(cb)
+
+       def test_rot_circ(self):
+               def gen():
+                       ti = 2*pi*random()
+                       r = random()*.98
+                       return r*cos(ti), r*sin(ti), (2*random() - 1)*pi
+               def proc(xi, yi, zi):
+                       xo = cos(zi)*xi - sin(zi)*yi
+                       yo = sin(zi)*xi + cos(zi)*yi
+                       return xo, yo, 0
+               self._run_io(50, gen, proc, delta=2)
+
+       def test_rot_circ_16(self):
+               self.setUp(width=16)
+               self.test_rot_circ()
+
+       def test_rot_circ_pipe(self):
+               self.setUp(eval_mode="pipelined")
+               self.test_rot_circ()
+
+       def test_rot_circ_iter(self):
+               self.setUp(eval_mode="iterative")
+               self.test_rot_circ()
+
+       def _test_vec_circ(self):
+               def gen():
+                       ti = pi*(2*random() - 1)
+                       r = .98 #*random()
+                       return r*cos(ti), r*sin(ti), 0 #pi*(2*random() - 1)
+               def proc(xi, yi, zi):
+                       return sqrt(xi**2 + yi**2), 0, zi + atan2(yi, xi)
+               self._run_io(50, gen, proc)
+
+       def test_vec_circ(self):
+               self.setUp(cordic_mode="vector")
+               self._test_vec_circ()
+
+       def test_vec_circ_16(self):
+               self.setUp(width=16, cordic_mode="vector")
+               self._test_vec_circ()
+
+       def _test_rot_hyp(self):
+               def gen():
+                       return .6, 0, 2.1*(random() - .5)
+               def proc(xi, yi, zi):
+                       xo = cosh(zi)*xi - sinh(zi)*yi
+                       yo = sinh(zi)*xi + cosh(zi)*yi
+                       return xo, yo, 0
+               self._run_io(50, gen, proc, delta=2)
+
+       def test_rot_hyp(self):
+               self.setUp(func_mode="hyperbolic")
+               self._test_rot_hyp()
+
+       def test_rot_hyp_16(self):
+               self.setUp(func_mode="hyperbolic", width=16)
+               self._test_rot_hyp()
+
+       def test_rot_hyp_iter(self):
+               self.setUp(cordic_mode="rotate", func_mode="hyperbolic",
+                               eval_mode="iterative")
+               self._test_rot_hyp()
+
+       def _test_vec_hyp(self):
+               def gen():
+                       xi = random()*.6 + .2
+                       yi = random()*xi*.8
+                       return xi, yi, 0
+               def proc(xi, yi, zi):
+                       return sqrt(xi**2 - yi**2), 0, atanh(yi/xi)
+               self._run_io(50, gen, proc)
+
+       def test_vec_hyp(self):
+               self.setUp(cordic_mode="vector", func_mode="hyperbolic")
+               self._test_vec_hyp()
+
+       def _test_rot_lin(self):
+               def gen():
+                       xi = 2*random() - 1
+                       if abs(xi) < .01:
+                               xi = .01
+                       yi = (2*random() - 1)*.5
+                       zi = (2*random() - 1)*.5
+                       return xi, yi, zi
+               def proc(xi, yi, zi):
+                       return xi, yi + xi*zi, 0
+               self._run_io(50, gen, proc)
+
+       def test_rot_lin(self):
+               self.setUp(func_mode="linear")
+               self._test_rot_lin()
+
+       def _test_vec_lin(self):
+               def gen():
+                       yi = random()*.95 + .05
+                       if random() > 0:
+                               yi *= -1
+                       xi = abs(yi) + random()*(1 - abs(yi))
+                       zi = 2*random() - 1
+                       return xi, yi, zi
+               def proc(xi, yi, zi):
+                       return xi, 0, zi + yi/xi
+               self._run_io(50, gen, proc, deltaz=2, delta=2)
+
+       def test_vec_lin(self):
+               self.setUp(func_mode="linear", cordic_mode="vector", width=8)
+               self._test_vec_lin()