From eae0e00496741b6afffb157b44576695237d7696 Mon Sep 17 00:00:00 2001 From: bunnie Date: Sun, 19 Jan 2020 20:55:10 +0100 Subject: [PATCH] cores/clock/xadc: ease DRP timings Hard IP blocks are fixed in location, so long/deep combinational paths routing to multiple hard IP blocks can lead to timing closure problems. XADC and MMCM DRPs currently have their DEN pins triggered by the ".re" output of a CSR. This is asynchronously derived from a fairly complicated set of logic that involves a logic path that goes all the way back through the cache and arbitration mechanisms of the wishbone bus. On more complex designs, this is leading to a failure of timing closure for these paths, because the hard IP blocks can be located in disparate portions of the chip which "pulls" the logic cluster in opposite directions in an attempt to absorb the routing delays to these IP blocks, leading to non-optimal placement for everything else and thus timing closure problems. This pull request proposes that we add a pipeline delay on these critical paths. This delays the commit of the data to the DRP by one cycle, but greatly relieves timing because the pipeline register can be placed close to the cluster of logic that computes addresses, caching, and arbitration, allowing for the routing slack to the hard IP blocks to be absorbed by the path between the pipe register and the hard IP block. In general, this shouldn't be a problem because the algorithm to program the DRP is to hit the write or read CSR, and then poll the drdy bit until it is asserted (so the process is already pretty slow). The MMCM in particular should have almost no impact, because MMCM updates are infrequent and the subsequent lock time of the MMCM is pretty long. The XADC is potentially more problematic because it can produce data at up to 1MSPS; but if sysclk is around 100MHz, adding 10ns to the read latency is relatively small compared to the theoretical maximum data rate of one every 1,000ns. Note that the xadc patch requires introducing a bit of logic into the non-DRP path. This is because without explicitly putting an "if" statement around the logic, you fall back to the non-blocking semantics of the verilog operator, which ultimately leads to a pretty hefty combinational path. By having a default "if" that should get optimized out when DRP is not enabled, when the DRP path /is/ enabled the synthesizer knows it can safely push the async signal into a simple mux as opposed to worrying about enforcing the non-blocking operator semantics to get the desired result. --- litex/soc/cores/clock.py | 9 +++++++-- litex/soc/cores/xadc.py | 21 ++++++++++++++------- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/litex/soc/cores/clock.py b/litex/soc/cores/clock.py index 252cd491..a36f9a2f 100644 --- a/litex/soc/cores/clock.py +++ b/litex/soc/cores/clock.py @@ -102,17 +102,22 @@ class XilinxClocking(Module, AutoCSR): # # # + den_pipe = Signal() + dwe_pipe = Signal() + drp_drdy = Signal() self.params.update( i_DCLK = ClockSignal(), - i_DWE = self.drp_write.re, - i_DEN = self.drp_read.re | self.drp_write.re, + i_DWE = dwe_pipe, + i_DEN = den_pipe, o_DRDY = drp_drdy, i_DADDR = self.drp_adr.storage, i_DI = self.drp_dat_w.storage, o_DO = self.drp_dat_r.status ) self.sync += [ + den_pipe.eq(self.drp_read.re | self.drp_write.re), + dwe_pipe.eq(self.drp_write.re), If(self.drp_read.re | self.drp_write.re, self.drp_drdy.status.eq(0) ).Elif(drp_drdy, diff --git a/litex/soc/cores/xadc.py b/litex/soc/cores/xadc.py index 178313b7..b6337fbe 100644 --- a/litex/soc/cores/xadc.py +++ b/litex/soc/cores/xadc.py @@ -43,6 +43,7 @@ class XADC(Module, AutoCSR): self.dadr = Signal(7) self.di = Signal(16) self.do = Signal(16) + self.drp_en = Signal() self.specials += Instance("XADC", # From ug480 p_INIT_40=0x9000, p_INIT_41=0x2ef0, p_INIT_42=0x0400, @@ -77,8 +78,10 @@ class XADC(Module, AutoCSR): o_DO = self.do ) self.comb += [ - self.den.eq(eoc), - self.dadr.eq(channel), + If(~self.drp_en, + self.den.eq(eoc), + self.dadr.eq(channel), + ) ] # Channels update -------------------------------------------------------------------------- @@ -113,16 +116,20 @@ class XADC(Module, AutoCSR): # # # + den_pipe = Signal() # add a register to ease timing closure of den + self.comb += [ - self.dwe.eq(self.drp_write.re), self.di.eq(self.drp_dat_w.storage), self.drp_dat_r.status.eq(self.do), - If(self.drp_enable.storage, - self.den.eq(self.drp_read.re | self.drp_write.re), - self.dadr.eq(self.drp_adr.storage), - ), + If(self.drp_en, + self.den.eq(den_pipe), + self.dadr.eq(self.drp_adr.storage), + ) ] self.sync += [ + self.dwe.eq(self.drp_write.re), + self.drp_en.eq(self.drp_enable.storage), + den_pipe.eq(self.drp_read.re | self.drp_write.re), If(self.drp_read.re | self.drp_write.re, self.drp_drdy.status.eq(0) ).Elif(self.drdy, -- 2.30.2