framebuffer: unpack memory words in pixel clock domain for better perf
authorSebastien Bourdeauducq <sebastien@milkymist.org>
Sun, 17 Nov 2013 22:41:18 +0000 (23:41 +0100)
committerSebastien Bourdeauducq <sebastien@milkymist.org>
Sun, 17 Nov 2013 22:41:18 +0000 (23:41 +0100)
misoclib/framebuffer/__init__.py
misoclib/framebuffer/format.py
misoclib/framebuffer/phy.py

index 7399e40381eaa47d979c7f0a4ccebd5d0a358e94..360a245f271e6cbf3884e7e1d7729b8b3d612e38 100644 (file)
@@ -9,23 +9,20 @@ from misoclib.framebuffer.phy import Driver
 
 class Framebuffer(Module, AutoCSR):
        def __init__(self, pads_vga, pads_dvi, lasmim, simulation=False):
-               pack_factor = lasmim.dw//(2*bpp)
-               packed_pixels = structuring.pack_layout(pixel_layout, pack_factor)
+               pack_factor = lasmim.dw//bpp
                
                self._enable = CSRStorage()
-               self.fi = FrameInitiator()
+               self.fi = FrameInitiator(pack_factor)
                self.dma = spi.DMAReadController(dma_lasmi.Reader(lasmim), spi.MODE_EXTERNAL, length_reset=640*480*4)
-               self.driver = Driver(pads_vga, pads_dvi)
+               self.driver = Driver(pack_factor, pads_vga, pads_dvi)
 
-               cast = structuring.Cast(lasmim.dw, packed_pixels, reverse_to=True)
-               unpack = structuring.Unpack(pack_factor, pixel_layout)
-               vtg = VTG()
+               cast = structuring.Cast(lasmim.dw, pixel_layout(pack_factor), reverse_to=True)
+               vtg = VTG(pack_factor)
                
                g = DataFlowGraph()
                g.add_connection(self.fi, vtg, sink_ep="timing")
                g.add_connection(self.dma, cast)
-               g.add_connection(cast, unpack)
-               g.add_connection(unpack, vtg, sink_ep="pixels")
+               g.add_connection(cast, vtg, sink_ep="pixels")
                g.add_connection(vtg, self.driver)
                self.submodules += CompositeActor(g)
 
@@ -36,10 +33,11 @@ class Framebuffer(Module, AutoCSR):
                ]
 
 class Blender(PipelinedActor, AutoCSR):
-       def __init__(self, nimages, latency):
-               sink_layout = [("i"+str(i), pixel_layout) for i in range(nimages)]
+       def __init__(self, nimages, pack_factor, latency):
+               epixel_layout = pixel_layout(pack_factor)
+               sink_layout = [("i"+str(i), epixel_layout) for i in range(nimages)]
                self.sink = Sink(sink_layout)
-               self.source = Source(pixel_layout)
+               self.source = Source(epixel_layout)
                factors = []
                for i in range(nimages):
                        name = "f"+str(i)
@@ -54,8 +52,8 @@ class Blender(PipelinedActor, AutoCSR):
                self.sync += If(self.pipe_ce, sink_registered.eq(self.sink.payload))
 
                imgs = [getattr(sink_registered, "i"+str(i)) for i in range(nimages)]
-               outval = Record(pixel_layout)
-               for e in pixel_layout:
+               outval = Record(epixel_layout)
+               for e in epixel_layout:
                        name = e[0]
                        inpixs = [getattr(img, name) for img in imgs]
                        outpix = getattr(outval, name)
@@ -74,7 +72,7 @@ class Blender(PipelinedActor, AutoCSR):
 
                pipe_stmts = []
                for i in range(latency-1):
-                       new_outval = Record(pixel_layout)
+                       new_outval = Record(epixel_layout)
                        pipe_stmts.append(new_outval.eq(outval))
                        outval = new_outval
                self.sync += If(self.pipe_ce, pipe_stmts)
@@ -82,29 +80,25 @@ class Blender(PipelinedActor, AutoCSR):
 
 class MixFramebuffer(Module, AutoCSR):
        def __init__(self, pads_vga, pads_dvi, *lasmims, blender_latency=5):
-               pack_factor = lasmims[0].dw//(2*bpp)
-               packed_pixels = structuring.pack_layout(pixel_layout, pack_factor)
+               pack_factor = lasmims[0].dw//bpp
                
                self._enable = CSRStorage()
-               self.fi = FrameInitiator()
-               self.blender = Blender(len(lasmims), blender_latency)
-               self.driver = Driver(pads_vga, pads_dvi)
+               self.fi = FrameInitiator(pack_factor)
+               self.blender = Blender(len(lasmims), pack_factor, blender_latency)
+               self.driver = Driver(pack_factor, pads_vga, pads_dvi)
                self.comb += self.fi.trigger.eq(self._enable.storage)
 
                g = DataFlowGraph()
+               epixel_layout = pixel_layout(pack_factor)
                for n, lasmim in enumerate(lasmims):
                        dma = spi.DMAReadController(dma_lasmi.Reader(lasmim), spi.MODE_EXTERNAL, length_reset=640*480*4)
-                       cast = structuring.Cast(lasmim.dw, packed_pixels, reverse_to=True)
-                       unpack = structuring.Unpack(pack_factor, pixel_layout)
-
+                       cast = structuring.Cast(lasmim.dw, epixel_layout, reverse_to=True)
                        g.add_connection(dma, cast)
-                       g.add_connection(cast, unpack)
-                       g.add_connection(unpack, self.blender, sink_subr=["i"+str(n)])
-
+                       g.add_connection(cast, self.blender, sink_subr=["i"+str(n)])
                        self.comb += dma.generator.trigger.eq(self._enable.storage)
                        setattr(self, "dma"+str(n), dma)
 
-               vtg = VTG()
+               vtg = VTG(pack_factor)
                self.comb += vtg.enable.eq(self._enable.storage)
                g.add_connection(self.fi, vtg, sink_ep="timing")
                g.add_connection(self.blender, vtg, sink_ep="pixels")
index fdfc643316a35b3122c8e2e17aba023d6b45f2d2..5a72a49d0a088a9bf879d0512f1102c79e97cb62 100644 (file)
@@ -3,7 +3,7 @@ from migen.flow.actor import *
 from migen.bank.description import CSRStorage
 from migen.actorlib import spi
 
-_hbits = 11
+_hbits = 12
 _vbits = 12
 
 bpp = 32
@@ -14,10 +14,8 @@ pixel_layout_s = [
        ("g", bpc),
        ("b", bpc)
 ]
-pixel_layout = [
-       ("p0", pixel_layout_s),
-       ("p1", pixel_layout_s)
-]
+def pixel_layout(pack_factor):
+       return [("p"+str(i), pixel_layout_s) for i in range(pack_factor)]
 
 bpc_phy = 8
 phy_layout_s = [
@@ -25,21 +23,21 @@ phy_layout_s = [
        ("g", bpc_phy),
        ("b", bpc_phy)
 ]
-phy_layout = [
-       ("hsync", 1),
-       ("vsync", 1),
-       ("de", 1),
-       ("p0", phy_layout_s),
-       ("p1", phy_layout_s)
-]
+def phy_layout(pack_factor):
+       r = [("hsync", 1), ("vsync", 1), ("de", 1)]
+       for i in range(pack_factor):
+               r.append(("p"+str(i), phy_layout_s))
+       return r
 
 class FrameInitiator(spi.SingleGenerator):
-       def __init__(self):
+       def __init__(self, pack_factor):
+               h_alignment_bits = log2_int(pack_factor)
+               hbits_dyn = _hbits - h_alignment_bits
                layout = [
-                       ("hres", _hbits, 640, 1),
-                       ("hsync_start", _hbits, 656, 1),
-                       ("hsync_end", _hbits, 752, 1),
-                       ("hscan", _hbits, 800, 1),
+                       ("hres", hbits_dyn, 640, h_alignment_bits),
+                       ("hsync_start", hbits_dyn, 656, h_alignment_bits),
+                       ("hsync_end", hbits_dyn, 752, h_alignment_bits),
+                       ("hscan", hbits_dyn, 800, h_alignment_bits),
                        
                        ("vres", _vbits, 480),
                        ("vsync_start", _vbits, 492),
@@ -49,19 +47,20 @@ class FrameInitiator(spi.SingleGenerator):
                spi.SingleGenerator.__init__(self, layout, spi.MODE_EXTERNAL)
 
 class VTG(Module):
-       def __init__(self):
+       def __init__(self, pack_factor):
+               hbits_dyn = _hbits - log2_int(pack_factor)
                self.enable = Signal()
                self.timing = Sink([
-                               ("hres", _hbits),
-                               ("hsync_start", _hbits),
-                               ("hsync_end", _hbits),
-                               ("hscan", _hbits),
+                               ("hres", hbits_dyn),
+                               ("hsync_start", hbits_dyn),
+                               ("hsync_end", hbits_dyn),
+                               ("hscan", hbits_dyn),
                                ("vres", _vbits),
                                ("vsync_start", _vbits),
                                ("vsync_end", _vbits),
                                ("vscan", _vbits)])
-               self.pixels = Sink(pixel_layout)
-               self.phy = Source(phy_layout)
+               self.pixels = Sink(pixel_layout(pack_factor))
+               self.phy = Source(phy_layout(pack_factor))
                self.busy = Signal()
 
                ###
@@ -71,7 +70,7 @@ class VTG(Module):
                active = Signal()
                
                generate_en = Signal()
-               hcounter = Signal(_hbits)
+               hcounter = Signal(hbits_dyn)
                vcounter = Signal(_vbits)
                
                skip = bpc - bpc_phy
@@ -79,7 +78,7 @@ class VTG(Module):
                        active.eq(hactive & vactive),
                        If(active,
                                [getattr(getattr(self.phy.payload, p), c).eq(getattr(getattr(self.pixels.payload, p), c)[skip:])
-                                       for p in ["p0", "p1"] for c in ["r", "g", "b"]],
+                                       for p in ["p"+str(i) for i in range(pack_factor)] for c in ["r", "g", "b"]],
                                self.phy.payload.de.eq(1)
                        ),
                        
index ff71e6908024dd738eada94f002585c018aacf73..fd2812833b19e9cafdbf90b0215663bba1476cd2 100644 (file)
@@ -8,8 +8,8 @@ from misoclib.framebuffer.format import bpc_phy, phy_layout
 from misoclib.framebuffer import dvi
 
 class _FIFO(Module):
-       def __init__(self):
-               self.phy = Sink(phy_layout)
+       def __init__(self, pack_factor):
+               self.phy = Sink(phy_layout(pack_factor))
                self.busy = Signal()
                
                self.pix_hsync = Signal()
@@ -21,7 +21,7 @@ class _FIFO(Module):
        
                ###
 
-               fifo = RenameClockDomains(AsyncFIFO(phy_layout, 512),
+               fifo = RenameClockDomains(AsyncFIFO(phy_layout(pack_factor), 512),
                        {"write": "sys", "read": "pix"})
                self.submodules += fifo
                self.comb += [
@@ -31,23 +31,22 @@ class _FIFO(Module):
                        self.busy.eq(0)
                ]
 
-               pix_parity = Signal()
+               unpack_counter = Signal(max=pack_factor)
+               assert(pack_factor & (pack_factor - 1) == 0) # only support powers of 2
                self.sync.pix += [
-                       pix_parity.eq(~pix_parity),
+                       unpack_counter.eq(unpack_counter + 1),
                        self.pix_hsync.eq(fifo.dout.hsync),
                        self.pix_vsync.eq(fifo.dout.vsync),
-                       self.pix_de.eq(fifo.dout.de),
-                       If(pix_parity,
-                               self.pix_r.eq(fifo.dout.p1.r),
-                               self.pix_g.eq(fifo.dout.p1.g),
-                               self.pix_b.eq(fifo.dout.p1.b)
-                       ).Else(
-                               self.pix_r.eq(fifo.dout.p0.r),
-                               self.pix_g.eq(fifo.dout.p0.g),
-                               self.pix_b.eq(fifo.dout.p0.b)
-                       )
+                       self.pix_de.eq(fifo.dout.de)
                ]
-               self.comb += fifo.re.eq(pix_parity)
+               for i in range(pack_factor):
+                       pixel = getattr(fifo.dout, "p"+str(i))
+                       self.sync.pix += If(unpack_counter == i,
+                               self.pix_r.eq(pixel.r),
+                               self.pix_g.eq(pixel.g),
+                               self.pix_b.eq(pixel.b)
+                       )
+               self.comb += fifo.re.eq(unpack_counter == (pack_factor - 1))
 
 # This assumes a 50MHz base clock
 class _Clocking(Module, AutoCSR):
@@ -168,8 +167,8 @@ class _Clocking(Module, AutoCSR):
                                o_O=pads_dvi.clk_p, o_OB=pads_dvi.clk_n)
 
 class Driver(Module, AutoCSR):
-       def __init__(self, pads_vga, pads_dvi):
-               fifo = _FIFO()
+       def __init__(self, pack_factor, pads_vga, pads_dvi):
+               fifo = _FIFO(pack_factor)
                self.submodules += fifo
                self.phy = fifo.phy
                self.busy = fifo.busy