dvisampler: pack pixels in pixel clock domain to improve performance
authorSebastien Bourdeauducq <sebastien@milkymist.org>
Sat, 16 Nov 2013 12:53:26 +0000 (13:53 +0100)
committerSebastien Bourdeauducq <sebastien@milkymist.org>
Sat, 16 Nov 2013 12:53:26 +0000 (13:53 +0100)
misoclib/dvisampler/__init__.py
misoclib/dvisampler/analysis.py
misoclib/dvisampler/common.py
misoclib/dvisampler/dma.py

index fb1bbb88cb15922d87e6969bc0260d8594ecef22..612e914187d22c9e76c70e877e305c467ce0d531 100644 (file)
@@ -12,7 +12,7 @@ from misoclib.dvisampler.analysis import SyncPolarity, ResolutionDetection, Fram
 from misoclib.dvisampler.dma import DMA
 
 class DVISampler(Module, AutoCSR):
-       def __init__(self, pads, asmiport, n_dma_slots=2):
+       def __init__(self, pads, lasmim, n_dma_slots=2):
                self.submodules.edid = EDID(pads)
                self.submodules.clocking = Clocking(pads)
 
@@ -62,7 +62,7 @@ class DVISampler(Module, AutoCSR):
                        self.resdetection.vsync.eq(self.syncpol.vsync)
                ]
 
-               self.submodules.frame = FrameExtraction()
+               self.submodules.frame = FrameExtraction(24*lasmim.dw//32)
                self.comb += [
                        self.frame.valid_i.eq(self.syncpol.valid_o),
                        self.frame.de.eq(self.syncpol.de),
@@ -72,7 +72,7 @@ class DVISampler(Module, AutoCSR):
                        self.frame.b.eq(self.syncpol.b)
                ]
 
-               self.submodules.dma = DMA(asmiport, n_dma_slots)
+               self.submodules.dma = DMA(lasmim, n_dma_slots)
                self.comb += self.frame.frame.connect(self.dma.frame)
                self.ev = self.dma.ev
 
index 17579f706ec808d762b748dda72043594015ad0b..5108f8d8e683f639c97acbc2d9674d8756f19728 100644 (file)
@@ -5,7 +5,7 @@ from migen.genlib.record import Record
 from migen.bank.description import *
 from migen.flow.actor import *
 
-from misoclib.dvisampler.common import channel_layout, frame_layout
+from misoclib.dvisampler.common import channel_layout
 
 class SyncPolarity(Module):
        def __init__(self):
@@ -106,7 +106,7 @@ class ResolutionDetection(Module, AutoCSR):
                self.specials += MultiReg(vcounter_st, self._vres.status)
 
 class FrameExtraction(Module, AutoCSR):
-       def __init__(self):
+       def __init__(self, word_width):
                # in pix clock domain
                self.valid_i = Signal()
                self.vsync = Signal()
@@ -116,39 +116,55 @@ class FrameExtraction(Module, AutoCSR):
                self.b = Signal(8)
 
                # in sys clock domain
-               self.frame = Source(frame_layout)
+               word_layout = [("parity", 1), ("pixels", word_width)]
+               self.frame = Source(word_layout)
                self.busy = Signal()
 
                self._r_overflow = CSR()
 
                ###
 
-               fifo_stb = Signal()
-               fifo_in = Record(frame_layout)
-               self.comb += [
-                       fifo_stb.eq(self.valid_i & self.de),
-                       fifo_in.r.eq(self.r),
-                       fifo_in.g.eq(self.g),
-                       fifo_in.b.eq(self.b),
-               ]
+               # start of frame detection
                vsync_r = Signal()
+               new_frame = Signal()
+               self.comb += new_frame.eq(self.vsync & ~vsync_r)
+               self.sync.pix += vsync_r.eq(self.vsync)
+
+               # pack pixels into words
+               cur_word = Signal(word_width)
+               cur_word_valid = Signal()
+               encoded_pixel = Signal(24)
+               self.comb += encoded_pixel.eq(Cat(self.b, self.g, self.r))
+               pack_factor = word_width//24
+               assert(pack_factor & (pack_factor - 1) == 0) # only support powers of 2
+               pack_counter = Signal(max=pack_factor)
                self.sync.pix += [
-                       If(self.vsync & ~vsync_r, fifo_in.parity.eq(~fifo_in.parity)),
-                       vsync_r.eq(self.vsync)
+                       cur_word_valid.eq(0),
+                       If(new_frame, 
+                               pack_counter.eq(0)
+                       ).Elif(self.valid_i & self.de,
+                               [If(pack_counter == (pack_factor-i-1),
+                                       cur_word[24*i:24*(i+1)].eq(encoded_pixel)) for i in range(pack_factor)],
+                               Cat(pack_counter, cur_word_valid).eq(pack_counter + 1)
+                       )
                ]
 
-               fifo = RenameClockDomains(AsyncFIFO(layout_len(frame_layout), 512),
+               # FIFO
+               fifo = RenameClockDomains(AsyncFIFO(word_layout, 512),
                        {"write": "pix", "read": "sys"})
                self.submodules += fifo
                self.comb += [
-                       fifo.we.eq(fifo_stb),
-                       fifo.din.eq(fifo_in.raw_bits()),
+                       fifo.din.pixels.eq(cur_word),
+                       fifo.we.eq(cur_word_valid)
+               ]
+               self.sync.pix += If(new_frame, fifo.din.parity.eq(~fifo.din.parity))
+               self.comb += [
                        self.frame.stb.eq(fifo.readable),
-                       self.frame.payload.raw_bits().eq(fifo.dout),
+                       self.frame.payload.eq(fifo.dout),
                        fifo.re.eq(self.frame.ack),
                        self.busy.eq(0)
                ]
-
+               
                # overflow detection
                pix_overflow = Signal()
                pix_overflow_reset = Signal()
index f053237f3f7a101b1a11d895170fdedde9a738d2..7fb9a420a2d28f176b1d4e82ffecf068922fa270 100644 (file)
@@ -1,3 +1,2 @@
 control_tokens = [0b1101010100, 0b0010101011, 0b0101010100, 0b1010101011]
 channel_layout = [("d", 8), ("c", 2), ("de", 1)]
-frame_layout = [("parity", 1), ("r", 8), ("g", 8), ("b", 8)]
index 2f7565dba7cf7bafc60da9b07f5bbd6130cf19c4..412b718f0bc2e87ea260011f7d5546dea138a1f4 100644 (file)
@@ -5,8 +5,6 @@ from migen.bank.eventmanager import *
 from migen.flow.actor import *
 from migen.actorlib import dma_lasmi
 
-from misoclib.dvisampler.common import frame_layout
-
 # Slot status: EMPTY=0 LOADED=1 PENDING=2
 class _Slot(Module, AutoCSR):
        def __init__(self, addr_bits, alignment_bits):
@@ -65,7 +63,8 @@ class DMA(Module):
                bus_dw = lasmim.dw
                alignment_bits = bits_for(bus_dw//8) - 1
 
-               self.frame = Sink(frame_layout)
+               fifo_word_width = 24*bus_dw//32
+               self.frame = Sink([("parity", 1), ("pixels", fifo_word_width)])
                self._r_frame_size = CSRStorage(bus_aw + alignment_bits, alignment_bits=alignment_bits)
                self.submodules._slot_array = _SlotArray(nslots, bus_aw, alignment_bits)
                self.ev = self._slot_array.ev
@@ -98,32 +97,23 @@ class DMA(Module):
                        )
                ]
 
-               # pack pixels into memory words
-               write_pixel = Signal()
-               last_pixel = Signal()
-               cur_memory_word = Signal(bus_dw)
-               encoded_pixel = Signal(32)
-               self.comb += [
-                       encoded_pixel.eq(Cat(
-                               self.frame.payload.b[6:], self.frame.payload.b,
-                               self.frame.payload.g[6:], self.frame.payload.g,
-                               self.frame.payload.r[6:], self.frame.payload.r))
-               ]
-               pack_factor = bus_dw//32
-               assert(pack_factor & (pack_factor - 1) == 0) # only support powers of 2
-               pack_counter = Signal(max=pack_factor)
-               self.comb += last_pixel.eq(pack_counter == (pack_factor - 1))
-               self.sync += If(write_pixel,
-                               [If(pack_counter == (pack_factor-i-1),
-                                       cur_memory_word[32*i:32*(i+1)].eq(encoded_pixel)) for i in range(pack_factor)],
-                               pack_counter.eq(pack_counter + 1)
-                       )
+               # 24bpp -> 32bpp
+               memory_word = Signal(bus_dw)
+               pixbits = []
+               for i in range(bus_dw//32):
+                       for j in range(3):
+                               b = (i*3+j)*8
+                               pixbits.append(self.frame.payload.pixels[b+6:b+8])
+                               pixbits.append(self.frame.payload.pixels[b:b+8])
+                       pixbits.append(0)
+                       pixbits.append(0)
+               self.comb += memory_word.eq(Cat(*pixbits))
 
                # bus accessor
                self.submodules._bus_accessor = dma_lasmi.Writer(lasmim)
                self.comb += [
                        self._bus_accessor.address_data.payload.a.eq(current_address),
-                       self._bus_accessor.address_data.payload.d.eq(cur_memory_word)
+                       self._bus_accessor.address_data.payload.d.eq(memory_word)
                ]
 
                # control FSM
@@ -133,23 +123,15 @@ class DMA(Module):
                fsm.act("WAIT_SOF",
                        reset_words.eq(1),
                        self.frame.ack.eq(~self._slot_array.address_valid | ~sof),
-                       If(self._slot_array.address_valid & sof & self.frame.stb, NextState("TRANSFER_PIXEL"))
+                       If(self._slot_array.address_valid & sof & self.frame.stb, NextState("TRANSFER_PIXELS"))
                )
-               fsm.act("TRANSFER_PIXEL",
-                       self.frame.ack.eq(1),
+               fsm.act("TRANSFER_PIXELS",
+                       self.frame.ack.eq(self._bus_accessor.address_data.ack),
                        If(self.frame.stb,
-                               write_pixel.eq(1),
-                               If(last_pixel, NextState("TO_MEMORY"))
-                       )
-               )
-               fsm.act("TO_MEMORY",
-                       self._bus_accessor.address_data.stb.eq(1),
-                       If(self._bus_accessor.address_data.ack,
-                               count_word.eq(1),
-                               If(last_word,
-                                       NextState("EOF")
-                               ).Else(
-                                       NextState("TRANSFER_PIXEL")
+                               self._bus_accessor.address_data.stb.eq(1),
+                               If(self._bus_accessor.address_data.ack,
+                                       count_word.eq(1),
+                                       If(last_word, NextState("EOF"))
                                )
                        )
                )