1 # This file is Copyright (c) 2019 David Shah <dave@ds0.me>
2 # This file is Copyright (c) 2019-2020 Florent Kermarrec <florent@enjoy-digital.fr>
5 # 1:2 frequency-ratio DDR3 PHY for Lattice's ECP5
10 # from litex.soc.interconnect.csr import *
13 from nmigen
.lib
.cdc
import FFSynchronizer
14 from nmigen
.utils
import log2_int
16 from lambdasoc
.periph
import Peripheral
18 import gram
.stream
as stream
19 from gram
.common
import *
20 from gram
.phy
.dfi
import *
21 from gram
.timeline
import Timeline
23 # Lattice ECP5 DDR PHY Initialization --------------------------------------------------------------
25 class ECP5DDRPHYInit(Elaboratable
):
26 def __init__(self
, eclk_cd
):
30 self
._eclk
_cd
= eclk_cd
32 def elaborate(self
, platform
):
42 # DDRDLLA instance -------------------------------------------------------------------------
45 m
.submodules
+= Instance("DDRDLLA",
46 i_CLK
= ClockSignal("sys2x"),
47 i_RST
= ResetSignal(),
55 m
.submodules
+= FFSynchronizer(_lock
, lock
)
56 m
.d
.sync
+= lock_d
.eq(lock
)
57 m
.d
.sync
+= new_lock
.eq(lock
& ~lock_d
)
59 # DDRDLLA/DDQBUFM/ECLK initialization sequence ---------------------------------------------
62 (1*t
, [freeze
.eq(1)]), # Freeze DDRDLLA
63 (2*t
, [stop
.eq(1)]), # Stop ECLK domain
64 (3*t
, [reset
.eq(1)]), # Reset ECLK domain
65 (4*t
, [reset
.eq(0)]), # Release ECLK domain reset
66 (5*t
, [stop
.eq(0)]), # Release ECLK domain stop
67 (6*t
, [freeze
.eq(0)]), # Release DDRDLLA freeze
68 (7*t
, [pause
.eq(1)]), # Pause DQSBUFM
69 (8*t
, [update
.eq(1)]), # Update DDRDLLA
70 (9*t
, [update
.eq(0)]), # Release DDRDMMA update
71 (10*t
, [pause
.eq(0)]), # Release DQSBUFM pause
75 m
.d
.comb
+= tl
.trigger
.eq(new_lock
)
77 # ------------------------------------------------------------------------------------------
82 ResetSignal(self
._eclk
_cd
).eq(reset
)
87 # Lattice ECP5 DDR PHY -----------------------------------------------------------------------------
89 class ECP5DDRPHY(Peripheral
, Elaboratable
):
90 def __init__(self
, pads
, sys_clk_freq
=100e6
):
91 super().__init
__() # Peripheral init
93 #self.pads = PHYPadsCombiner(pads)
95 self
._sys
_clk
_freq
= sys_clk_freq
97 databits
= len(self
.pads
.dq
.o
)
98 assert databits
%8 == 0
101 bank
= self
.csr_bank()
103 self
._dly
_sel
= bank
.csr(databits
//8, "rw")
105 self
._rdly
_dq
_rst
= bank
.csr(1, "rw")
106 self
._rdly
_dq
_inc
= bank
.csr(1, "rw")
107 self
._rdly
_dq
_bitslip
_rst
= bank
.csr(1, "rw")
108 self
._rdly
_dq
_bitslip
= bank
.csr(1, "rw")
110 self
._burstdet
_clr
= bank
.csr(1, "rw")
111 self
._burstdet
_seen
= bank
.csr(databits
//8, "r")
113 self
._zero
_ev
= self
.event(mode
="rise")
115 self
._bridge
= self
.bridge(data_width
=32, granularity
=8, alignment
=2)
116 self
.bus
= self
._bridge
.bus
117 self
.irq
= self
._bridge
.irq
119 def elaborate(self
, platform
):
123 tck
= 2/(2*2*self
._sys
_clk
_freq
)
124 addressbits
= len(self
.pads
.a
.o
)
125 bankbits
= len(self
.pads
.ba
.o
)
126 nranks
= 1 if not hasattr(self
.pads
, "cs_n") else len(self
.pads
.cs_n
)
127 databits
= len(self
.pads
.dq
.oe
)
130 # Init -------------------------------------------------------------------------------------
131 m
.submodules
.init
= DomainRenamer("init")(ECP5DDRPHYInit("sys2x"))
133 # Parameters -------------------------------------------------------------------------------
134 cl
, cwl
= get_cl_cw(memtype
, tck
)
135 cl_sys_latency
= get_sys_latency(nphases
, cl
)
136 cwl_sys_latency
= get_sys_latency(nphases
, cwl
)
139 self
.datavalid
= Signal(databits
//8)
141 # PHY settings -----------------------------------------------------------------------------
142 rdcmdphase
, rdphase
= get_sys_phases(nphases
, cl_sys_latency
, cl
)
143 wrcmdphase
, wrphase
= get_sys_phases(nphases
, cwl_sys_latency
, cwl
)
144 self
.settings
= PhySettings(
145 phytype
= "ECP5DDRPHY",
148 dfi_databits
= 4*databits
,
153 rdcmdphase
= rdcmdphase
,
154 wrcmdphase
= wrcmdphase
,
157 read_latency
= 2 + cl_sys_latency
+ 2 + log2_int(4//nphases
) + 4,
158 write_latency
= cwl_sys_latency
161 # DFI Interface ----------------------------------------------------------------------------
162 self
.dfi
= dfi
= Interface(addressbits
, bankbits
, nranks
, 4*databits
, 4)
167 rddata_en
= Signal(self
.settings
.read_latency
)
169 # Clock --------------------------------------------------------------------------------
170 for i
in range(len(self
.pads
.clk
.o
)):
172 m
.submodules
+= Instance("ODDRX2F",
173 i_RST
= ResetSignal("sys2x"),
174 i_ECLK
= ClockSignal("sys2x"),
175 i_SCLK
= ClockSignal(),
180 o_Q
= self
.pads
.clk
.o
[i
]
184 # Addresses and Commands ---------------------------------------------------------------
185 for i
in range(addressbits
):
186 m
.submodules
+= Instance("ODDRX2F",
187 i_RST
= ResetSignal("sys2x"),
188 i_ECLK
= ClockSignal("sys2x"),
189 i_SCLK
= ClockSignal(),
190 i_D0
= dfi
.phases
[0].address
[i
],
191 i_D1
= dfi
.phases
[0].address
[i
],
192 i_D2
= dfi
.phases
[1].address
[i
],
193 i_D3
= dfi
.phases
[1].address
[i
],
194 o_Q
= self
.pads
.a
.o
[i
]
196 for i
in range(bankbits
):
197 m
.submodules
+= Instance("ODDRX2F",
198 i_RST
= ResetSignal("sys2x"),
199 i_ECLK
= ClockSignal("sys2x"),
200 i_SCLK
= ClockSignal(),
201 i_D0
= dfi
.phases
[0].bank
[i
],
202 i_D1
= dfi
.phases
[0].bank
[i
],
203 i_D2
= dfi
.phases
[1].bank
[i
],
204 i_D3
= dfi
.phases
[1].bank
[i
],
205 o_Q
= self
.pads
.ba
.o
[i
]
207 controls
= ["ras_n", "cas_n", "we_n", "cke", "odt"]
208 if hasattr(self
.pads
, "reset_n"):
209 controls
.append("reset_n")
210 if hasattr(self
.pads
, "cs_n"):
211 controls
.append("cs_n")
212 for name
in controls
:
213 for i
in range(len(getattr(self
.pads
, name
))):
214 m
.submodules
+= Instance("ODDRX2F",
215 i_RST
= ResetSignal("sys2x"),
216 i_ECLK
= ClockSignal("sys2x"),
217 i_SCLK
= ClockSignal(),
218 i_D0
= getattr(dfi
.phases
[0], name
)[i
],
219 i_D1
= getattr(dfi
.phases
[0], name
)[i
],
220 i_D2
= getattr(dfi
.phases
[1], name
)[i
],
221 i_D3
= getattr(dfi
.phases
[1], name
)[i
],
222 o_Q
= getattr(self
.pads
, name
)[i
]
225 # DQ ---------------------------------------------------------------------------------------
228 dqs_pattern
= DQSPattern()
229 m
.submodules
+= dqs_pattern
230 for i
in range(databits
//8):
239 with m
.If(self
._dly
_sel
.storage
[i
]):
240 with m
.If(self
._rdly
_dq
_rst
.re
):
241 m
.d
.sync
+= rdly
.eq(0)
242 with m
.Elif(self
._rdly
_dq
_inc
.re
):
243 m
.d
.sync
+= rdly
.eq(rdly
+ 1)
247 dqs_bitslip
= Signal(2)
248 with m
.If(self
._dly
_sel
.storage
[i
]):
249 with m
.If(self
._rdly
_dq
_bitslip
_rst
.re
):
250 m
.d
.sync
+= dqs_bitslip
.eq(0)
251 with m
.Elif(self
._rdly
_dq
_bitslip
.re
):
252 m
.d
.sync
+= dqs_bitslip
.eq(dqs_bitslip
+ 1)
254 for j
, b
in enumerate(range(-2, 2)):
255 dqs_cases
[j
] = dqs_read
.eq(rddata_en
[cl_sys_latency
+ b
:cl_sys_latency
+ b
+ 2] != 0)
256 m
.d
.sync
+= Case(dqs_bitslip
, dqs_cases
)
257 m
.submodules
+= Instance("DQSBUFM",
258 p_DQS_LI_DEL_ADJ
= "MINUS",
259 p_DQS_LI_DEL_VAL
= 1,
260 p_DQS_LO_DEL_ADJ
= "MINUS",
261 p_DQS_LO_DEL_VAL
= 4,
263 i_SCLK
= ClockSignal("sys"),
264 i_ECLK
= ClockSignal("sys2x"),
265 i_RST
= ResetSignal("sys2x"),
266 i_DDRDEL
= self
.init
.delay
,
267 i_PAUSE
= self
.init
.pause | self
._dly
_sel
.storage
[i
],
270 # Assert LOADNs to use DDRDEL control
278 # Reads (generate shifted DQS clock for reads)
281 i_READCLKSEL0
= rdly
[0],
282 i_READCLKSEL1
= rdly
[1],
283 i_READCLKSEL2
= rdly
[2],
286 o_RDPNTR0
= rdpntr
[0],
287 o_RDPNTR1
= rdpntr
[1],
288 o_RDPNTR2
= rdpntr
[2],
289 o_WRPNTR0
= wrpntr
[0],
290 o_WRPNTR1
= wrpntr
[1],
291 o_WRPNTR2
= wrpntr
[2],
292 o_DATAVALID
= self
.datavalid
[i
],
293 o_BURSTDET
= burstdet
,
295 # Writes (generate shifted ECLK clock for writes)
299 burstdet_d
= Signal()
300 m
.d
.sync
+= burstdet_d
.eq(burstdet
)
301 with m
.If(self
._burstdet
_clr
.re
):
302 m
.d
.sync
+= self
._burstdet
_seen
.status
[i
].eq(0)
303 with m
.If(burstdet
& ~burstdet_d
):
304 m
.d
.sync
+= self
._burstdet
_seen
.status
[i
].eq(1)
306 # DQS and DM ---------------------------------------------------------------------------
307 dm_o_data
= Signal(8)
308 dm_o_data_d
= Signal(8)
309 dm_o_data_muxed
= Signal(4)
310 m
.d
.comb
+= dm_o_data
.eq(Cat(
311 dfi
.phases
[0].wrdata_mask
[0*databits
//8+i
],
312 dfi
.phases
[0].wrdata_mask
[1*databits
//8+i
],
313 dfi
.phases
[0].wrdata_mask
[2*databits
//8+i
],
314 dfi
.phases
[0].wrdata_mask
[3*databits
//8+i
],
316 dfi
.phases
[1].wrdata_mask
[0*databits
//8+i
],
317 dfi
.phases
[1].wrdata_mask
[1*databits
//8+i
],
318 dfi
.phases
[1].wrdata_mask
[2*databits
//8+i
],
319 dfi
.phases
[1].wrdata_mask
[3*databits
//8+i
]),
321 m
.d
.sync
+= dm_o_data_d
.eq(dm_o_data
)
323 dm_bl8_cases
[0] = dm_o_data_muxed
.eq(dm_o_data
[:4])
324 dm_bl8_cases
[1] = dm_o_data_muxed
.eq(dm_o_data_d
[4:])
325 m
.d
.sync
+= Case(bl8_chunk
, dm_bl8_cases
) # FIXME: use self.comb?
326 m
.submodules
+= Instance("ODDRX2DQA",
327 i_RST
= ResetSignal("sys2x"),
328 i_ECLK
= ClockSignal("sys2x"),
329 i_SCLK
= ClockSignal(),
331 i_D0
= dm_o_data_muxed
[0],
332 i_D1
= dm_o_data_muxed
[1],
333 i_D2
= dm_o_data_muxed
[2],
334 i_D3
= dm_o_data_muxed
[3],
341 Instance("ODDRX2DQSB",
342 i_RST
= ResetSignal("sys2x"),
343 i_ECLK
= ClockSignal("sys2x"),
344 i_SCLK
= ClockSignal(),
346 i_D0
= 0, # FIXME: dqs_pattern.o[3],
347 i_D1
= 1, # FIXME: dqs_pattern.o[2],
348 i_D2
= 0, # FIXME: dqs_pattern.o[1],
349 i_D3
= 1, # FIXME: dqs_pattern.o[0],
352 Instance("TSHX2DQSA",
353 i_RST
= ResetSignal("sys2x"),
354 i_ECLK
= ClockSignal("sys2x"),
355 i_SCLK
= ClockSignal(),
357 i_T0
= ~
(dqs_pattern
.preamble | dqs_oe | dqs_pattern
.postamble
),
358 i_T1
= ~
(dqs_pattern
.preamble | dqs_oe | dqs_pattern
.postamble
),
361 Tristate(pads
.dqs_p
[i
], dqs
, ~dqs_oe_n
, dqs_i
)
364 for j
in range(8*i
, 8*(i
+1)):
368 dq_i_delayed
= Signal()
369 dq_i_data
= Signal(8)
370 dq_o_data
= Signal(8)
371 dq_o_data_d
= Signal(8)
372 dq_o_data_muxed
= Signal(4)
373 m
.d
.comb
+= dq_o_data
.eq(Cat(
374 dfi
.phases
[0].wrdata
[0*databits
+j
],
375 dfi
.phases
[0].wrdata
[1*databits
+j
],
376 dfi
.phases
[0].wrdata
[2*databits
+j
],
377 dfi
.phases
[0].wrdata
[3*databits
+j
],
379 dfi
.phases
[1].wrdata
[0*databits
+j
],
380 dfi
.phases
[1].wrdata
[1*databits
+j
],
381 dfi
.phases
[1].wrdata
[2*databits
+j
],
382 dfi
.phases
[1].wrdata
[3*databits
+j
])
384 m
.d
.sync
+= dq_o_data_d
.eq(dq_o_data
)
386 dq_bl8_cases
[0] = dq_o_data_muxed
.eq(dq_o_data
[:4])
387 dq_bl8_cases
[1] = dq_o_data_muxed
.eq(dq_o_data_d
[4:])
388 m
.d
.sync
+= Case(bl8_chunk
, dq_bl8_cases
) # FIXME: use self.comb?
389 _dq_i_data
= Signal(4)
391 Instance("ODDRX2DQA",
392 i_RST
= ResetSignal("sys2x"),
393 i_ECLK
= ClockSignal("sys2x"),
394 i_SCLK
= ClockSignal(),
396 i_D0
= dq_o_data_muxed
[0],
397 i_D1
= dq_o_data_muxed
[1],
398 i_D2
= dq_o_data_muxed
[2],
399 i_D3
= dq_o_data_muxed
[3],
403 p_DEL_MODE
= "DQS_ALIGNED_X2",
410 Instance("IDDRX2DQA",
411 i_RST
= ResetSignal("sys2x"),
412 i_ECLK
= ClockSignal("sys2x"),
413 i_SCLK
= ClockSignal(),
415 i_RDPNTR0
= rdpntr
[0],
416 i_RDPNTR1
= rdpntr
[1],
417 i_RDPNTR2
= rdpntr
[2],
418 i_WRPNTR0
= wrpntr
[0],
419 i_WRPNTR1
= wrpntr
[1],
420 i_WRPNTR2
= wrpntr
[2],
422 o_Q0
= _dq_i_data
[0],
423 o_Q1
= _dq_i_data
[1],
424 o_Q2
= _dq_i_data
[2],
425 o_Q3
= _dq_i_data
[3],
428 m
.d
.sync
+= dq_i_data
[:4].eq(dq_i_data
[4:])
429 m
.d
.sync
+= dq_i_data
[4:].eq(_dq_i_data
)
431 dfi
.phases
[0].rddata
[0*databits
+j
].eq(dq_i_data
[0]),
432 dfi
.phases
[0].rddata
[1*databits
+j
].eq(dq_i_data
[1]),
433 dfi
.phases
[0].rddata
[2*databits
+j
].eq(dq_i_data
[2]),
434 dfi
.phases
[0].rddata
[3*databits
+j
].eq(dq_i_data
[3]),
435 dfi
.phases
[1].rddata
[0*databits
+j
].eq(dq_i_data
[4]),
436 dfi
.phases
[1].rddata
[1*databits
+j
].eq(dq_i_data
[5]),
437 dfi
.phases
[1].rddata
[2*databits
+j
].eq(dq_i_data
[6]),
438 dfi
.phases
[1].rddata
[3*databits
+j
].eq(dq_i_data
[7]),
442 i_RST
= ResetSignal("sys2x"),
443 i_ECLK
= ClockSignal("sys2x"),
444 i_SCLK
= ClockSignal(),
446 i_T0
= ~
(dqs_pattern
.preamble | dq_oe | dqs_pattern
.postamble
),
447 i_T1
= ~
(dqs_pattern
.preamble | dq_oe | dqs_pattern
.postamble
),
450 Tristate(pads
.dq
[j
], dq_o
, ~dq_oe_n
, dq_i
)
453 # Read Control Path ------------------------------------------------------------------------
454 # Creates a shift register of read commands coming from the DFI interface. This shift register
455 # is used to control DQS read (internal read pulse of the DQSBUF) and to indicate to the
456 # DFI interface that the read data is valid.
458 # The DQS read must be asserted for 2 sys_clk cycles before the read data is coming back from
459 # the DRAM (see 6.2.4 READ Pulse Positioning Optimization of FPGA-TN-02035-1.2)
461 # The read data valid is asserted for 1 sys_clk cycle when the data is available on the DFI
462 # interface, the latency is the sum of the ODDRX2DQA, CAS, IDDRX2DQA latencies.
463 rddata_en_last
= Signal
.like(rddata_en
)
464 m
.d
.comb
+= rddata_en
.eq(Cat(dfi
.phases
[self
.settings
.rdphase
].rddata_en
, rddata_en_last
))
465 m
.d
.sync
+= rddata_en_last
.eq(rddata_en
)
466 m
.d
.sync
+= [phase
.rddata_valid
.eq(rddata_en
[-1]) for phase
in dfi
.phases
]
468 # Write Control Path -----------------------------------------------------------------------
469 # Creates a shift register of write commands coming from the DFI interface. This shift register
470 # is used to control DQ/DQS tristates and to select write data of the DRAM burst from the DFI
471 # interface: The PHY is operating in halfrate mode (so provide 4 datas every sys_clk cycles:
472 # 2x for DDR, 2x for halfrate) but DDR3 requires a burst of 8 datas (BL8) for best efficiency.
473 # Writes are then performed in 2 sys_clk cycles and data needs to be selected for each cycle.
474 # FIXME: understand +2
475 wrdata_en
= Signal(cwl_sys_latency
+ 5)
476 wrdata_en_last
= Signal
.like(wrdata_en
)
477 m
.d
.comb
+= wrdata_en
.eq(Cat(dfi
.phases
[self
.settings
.wrphase
].wrdata_en
, wrdata_en_last
))
478 m
.d
.sync
+= wrdata_en_last
.eq(wrdata_en
)
479 m
.d
.comb
+= dq_oe
.eq(wrdata_en
[cwl_sys_latency
+ 2] | wrdata_en
[cwl_sys_latency
+ 3])
480 m
.d
.comb
+= bl8_chunk
.eq(wrdata_en
[cwl_sys_latency
+ 1])
481 m
.d
.comb
+= dqs_oe
.eq(dq_oe
)
483 # Write DQS Postamble/Preamble Control Path ------------------------------------------------
484 # Generates DQS Preamble 1 cycle before the first write and Postamble 1 cycle after the last
485 # write. During writes, DQS tristate is configured as output for at least 4 sys_clk cycles:
486 # 1 for Preamble, 2 for the Write and 1 for the Postamble.
487 m
.d
.comb
+= dqs_pattern
.preamble
.eq( wrdata_en
[cwl_sys_latency
+ 1] & ~wrdata_en
[cwl_sys_latency
+ 2])
488 m
.d
.comb
+= dqs_pattern
.postamble
.eq(wrdata_en
[cwl_sys_latency
+ 4] & ~wrdata_en
[cwl_sys_latency
+ 3])