From: Sebastien Bourdeauducq Date: Sun, 17 Nov 2013 22:41:18 +0000 (+0100) Subject: framebuffer: unpack memory words in pixel clock domain for better perf X-Git-Tag: 24jan2021_ls180~2796 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=4cfcda6c8ca9ee7ff84384594afb581292da570a;p=litex.git framebuffer: unpack memory words in pixel clock domain for better perf --- diff --git a/misoclib/framebuffer/__init__.py b/misoclib/framebuffer/__init__.py index 7399e403..360a245f 100644 --- a/misoclib/framebuffer/__init__.py +++ b/misoclib/framebuffer/__init__.py @@ -9,23 +9,20 @@ from misoclib.framebuffer.phy import Driver class Framebuffer(Module, AutoCSR): def __init__(self, pads_vga, pads_dvi, lasmim, simulation=False): - pack_factor = lasmim.dw//(2*bpp) - packed_pixels = structuring.pack_layout(pixel_layout, pack_factor) + pack_factor = lasmim.dw//bpp self._enable = CSRStorage() - self.fi = FrameInitiator() + self.fi = FrameInitiator(pack_factor) self.dma = spi.DMAReadController(dma_lasmi.Reader(lasmim), spi.MODE_EXTERNAL, length_reset=640*480*4) - self.driver = Driver(pads_vga, pads_dvi) + self.driver = Driver(pack_factor, pads_vga, pads_dvi) - cast = structuring.Cast(lasmim.dw, packed_pixels, reverse_to=True) - unpack = structuring.Unpack(pack_factor, pixel_layout) - vtg = VTG() + cast = structuring.Cast(lasmim.dw, pixel_layout(pack_factor), reverse_to=True) + vtg = VTG(pack_factor) g = DataFlowGraph() g.add_connection(self.fi, vtg, sink_ep="timing") g.add_connection(self.dma, cast) - g.add_connection(cast, unpack) - g.add_connection(unpack, vtg, sink_ep="pixels") + g.add_connection(cast, vtg, sink_ep="pixels") g.add_connection(vtg, self.driver) self.submodules += CompositeActor(g) @@ -36,10 +33,11 @@ class Framebuffer(Module, AutoCSR): ] class Blender(PipelinedActor, AutoCSR): - def __init__(self, nimages, latency): - sink_layout = [("i"+str(i), pixel_layout) for i in range(nimages)] + def __init__(self, nimages, pack_factor, latency): + epixel_layout = pixel_layout(pack_factor) + sink_layout = [("i"+str(i), epixel_layout) for i in range(nimages)] self.sink = Sink(sink_layout) - self.source = Source(pixel_layout) + self.source = Source(epixel_layout) factors = [] for i in range(nimages): name = "f"+str(i) @@ -54,8 +52,8 @@ class Blender(PipelinedActor, AutoCSR): self.sync += If(self.pipe_ce, sink_registered.eq(self.sink.payload)) imgs = [getattr(sink_registered, "i"+str(i)) for i in range(nimages)] - outval = Record(pixel_layout) - for e in pixel_layout: + outval = Record(epixel_layout) + for e in epixel_layout: name = e[0] inpixs = [getattr(img, name) for img in imgs] outpix = getattr(outval, name) @@ -74,7 +72,7 @@ class Blender(PipelinedActor, AutoCSR): pipe_stmts = [] for i in range(latency-1): - new_outval = Record(pixel_layout) + new_outval = Record(epixel_layout) pipe_stmts.append(new_outval.eq(outval)) outval = new_outval self.sync += If(self.pipe_ce, pipe_stmts) @@ -82,29 +80,25 @@ class Blender(PipelinedActor, AutoCSR): class MixFramebuffer(Module, AutoCSR): def __init__(self, pads_vga, pads_dvi, *lasmims, blender_latency=5): - pack_factor = lasmims[0].dw//(2*bpp) - packed_pixels = structuring.pack_layout(pixel_layout, pack_factor) + pack_factor = lasmims[0].dw//bpp self._enable = CSRStorage() - self.fi = FrameInitiator() - self.blender = Blender(len(lasmims), blender_latency) - self.driver = Driver(pads_vga, pads_dvi) + self.fi = FrameInitiator(pack_factor) + self.blender = Blender(len(lasmims), pack_factor, blender_latency) + self.driver = Driver(pack_factor, pads_vga, pads_dvi) self.comb += self.fi.trigger.eq(self._enable.storage) g = DataFlowGraph() + epixel_layout = pixel_layout(pack_factor) for n, lasmim in enumerate(lasmims): dma = spi.DMAReadController(dma_lasmi.Reader(lasmim), spi.MODE_EXTERNAL, length_reset=640*480*4) - cast = structuring.Cast(lasmim.dw, packed_pixels, reverse_to=True) - unpack = structuring.Unpack(pack_factor, pixel_layout) - + cast = structuring.Cast(lasmim.dw, epixel_layout, reverse_to=True) g.add_connection(dma, cast) - g.add_connection(cast, unpack) - g.add_connection(unpack, self.blender, sink_subr=["i"+str(n)]) - + g.add_connection(cast, self.blender, sink_subr=["i"+str(n)]) self.comb += dma.generator.trigger.eq(self._enable.storage) setattr(self, "dma"+str(n), dma) - vtg = VTG() + vtg = VTG(pack_factor) self.comb += vtg.enable.eq(self._enable.storage) g.add_connection(self.fi, vtg, sink_ep="timing") g.add_connection(self.blender, vtg, sink_ep="pixels") diff --git a/misoclib/framebuffer/format.py b/misoclib/framebuffer/format.py index fdfc6433..5a72a49d 100644 --- a/misoclib/framebuffer/format.py +++ b/misoclib/framebuffer/format.py @@ -3,7 +3,7 @@ from migen.flow.actor import * from migen.bank.description import CSRStorage from migen.actorlib import spi -_hbits = 11 +_hbits = 12 _vbits = 12 bpp = 32 @@ -14,10 +14,8 @@ pixel_layout_s = [ ("g", bpc), ("b", bpc) ] -pixel_layout = [ - ("p0", pixel_layout_s), - ("p1", pixel_layout_s) -] +def pixel_layout(pack_factor): + return [("p"+str(i), pixel_layout_s) for i in range(pack_factor)] bpc_phy = 8 phy_layout_s = [ @@ -25,21 +23,21 @@ phy_layout_s = [ ("g", bpc_phy), ("b", bpc_phy) ] -phy_layout = [ - ("hsync", 1), - ("vsync", 1), - ("de", 1), - ("p0", phy_layout_s), - ("p1", phy_layout_s) -] +def phy_layout(pack_factor): + r = [("hsync", 1), ("vsync", 1), ("de", 1)] + for i in range(pack_factor): + r.append(("p"+str(i), phy_layout_s)) + return r class FrameInitiator(spi.SingleGenerator): - def __init__(self): + def __init__(self, pack_factor): + h_alignment_bits = log2_int(pack_factor) + hbits_dyn = _hbits - h_alignment_bits layout = [ - ("hres", _hbits, 640, 1), - ("hsync_start", _hbits, 656, 1), - ("hsync_end", _hbits, 752, 1), - ("hscan", _hbits, 800, 1), + ("hres", hbits_dyn, 640, h_alignment_bits), + ("hsync_start", hbits_dyn, 656, h_alignment_bits), + ("hsync_end", hbits_dyn, 752, h_alignment_bits), + ("hscan", hbits_dyn, 800, h_alignment_bits), ("vres", _vbits, 480), ("vsync_start", _vbits, 492), @@ -49,19 +47,20 @@ class FrameInitiator(spi.SingleGenerator): spi.SingleGenerator.__init__(self, layout, spi.MODE_EXTERNAL) class VTG(Module): - def __init__(self): + def __init__(self, pack_factor): + hbits_dyn = _hbits - log2_int(pack_factor) self.enable = Signal() self.timing = Sink([ - ("hres", _hbits), - ("hsync_start", _hbits), - ("hsync_end", _hbits), - ("hscan", _hbits), + ("hres", hbits_dyn), + ("hsync_start", hbits_dyn), + ("hsync_end", hbits_dyn), + ("hscan", hbits_dyn), ("vres", _vbits), ("vsync_start", _vbits), ("vsync_end", _vbits), ("vscan", _vbits)]) - self.pixels = Sink(pixel_layout) - self.phy = Source(phy_layout) + self.pixels = Sink(pixel_layout(pack_factor)) + self.phy = Source(phy_layout(pack_factor)) self.busy = Signal() ### @@ -71,7 +70,7 @@ class VTG(Module): active = Signal() generate_en = Signal() - hcounter = Signal(_hbits) + hcounter = Signal(hbits_dyn) vcounter = Signal(_vbits) skip = bpc - bpc_phy @@ -79,7 +78,7 @@ class VTG(Module): active.eq(hactive & vactive), If(active, [getattr(getattr(self.phy.payload, p), c).eq(getattr(getattr(self.pixels.payload, p), c)[skip:]) - for p in ["p0", "p1"] for c in ["r", "g", "b"]], + for p in ["p"+str(i) for i in range(pack_factor)] for c in ["r", "g", "b"]], self.phy.payload.de.eq(1) ), diff --git a/misoclib/framebuffer/phy.py b/misoclib/framebuffer/phy.py index ff71e690..fd281283 100644 --- a/misoclib/framebuffer/phy.py +++ b/misoclib/framebuffer/phy.py @@ -8,8 +8,8 @@ from misoclib.framebuffer.format import bpc_phy, phy_layout from misoclib.framebuffer import dvi class _FIFO(Module): - def __init__(self): - self.phy = Sink(phy_layout) + def __init__(self, pack_factor): + self.phy = Sink(phy_layout(pack_factor)) self.busy = Signal() self.pix_hsync = Signal() @@ -21,7 +21,7 @@ class _FIFO(Module): ### - fifo = RenameClockDomains(AsyncFIFO(phy_layout, 512), + fifo = RenameClockDomains(AsyncFIFO(phy_layout(pack_factor), 512), {"write": "sys", "read": "pix"}) self.submodules += fifo self.comb += [ @@ -31,23 +31,22 @@ class _FIFO(Module): self.busy.eq(0) ] - pix_parity = Signal() + unpack_counter = Signal(max=pack_factor) + assert(pack_factor & (pack_factor - 1) == 0) # only support powers of 2 self.sync.pix += [ - pix_parity.eq(~pix_parity), + unpack_counter.eq(unpack_counter + 1), self.pix_hsync.eq(fifo.dout.hsync), self.pix_vsync.eq(fifo.dout.vsync), - self.pix_de.eq(fifo.dout.de), - If(pix_parity, - self.pix_r.eq(fifo.dout.p1.r), - self.pix_g.eq(fifo.dout.p1.g), - self.pix_b.eq(fifo.dout.p1.b) - ).Else( - self.pix_r.eq(fifo.dout.p0.r), - self.pix_g.eq(fifo.dout.p0.g), - self.pix_b.eq(fifo.dout.p0.b) - ) + self.pix_de.eq(fifo.dout.de) ] - self.comb += fifo.re.eq(pix_parity) + for i in range(pack_factor): + pixel = getattr(fifo.dout, "p"+str(i)) + self.sync.pix += If(unpack_counter == i, + self.pix_r.eq(pixel.r), + self.pix_g.eq(pixel.g), + self.pix_b.eq(pixel.b) + ) + self.comb += fifo.re.eq(unpack_counter == (pack_factor - 1)) # This assumes a 50MHz base clock class _Clocking(Module, AutoCSR): @@ -168,8 +167,8 @@ class _Clocking(Module, AutoCSR): o_O=pads_dvi.clk_p, o_OB=pads_dvi.clk_n) class Driver(Module, AutoCSR): - def __init__(self, pads_vga, pads_dvi): - fifo = _FIFO() + def __init__(self, pack_factor, pads_vga, pads_dvi): + fifo = _FIFO(pack_factor) self.submodules += fifo self.phy = fifo.phy self.busy = fifo.busy