1 # This file is Copyright (c) 2019 David Shah <dave@ds0.me>
2 # This file is Copyright (c) 2019-2020 Florent Kermarrec <florent@enjoy-digital.fr>
3 # This file is Copyright (c) 2020 LambdaConcept <contact@lambdaconcept.com>
6 # 1:2 frequency-ratio DDR3 PHY for Lattice's ECP5
12 from nmigen
.lib
.cdc
import FFSynchronizer
13 from nmigen
.utils
import log2_int
15 from lambdasoc
.periph
import Peripheral
17 import gram
.stream
as stream
18 from gram
.common
import *
19 from gram
.phy
.dfi
import Interface
20 from gram
.compat
import Timeline
22 # Lattice ECP5 DDR PHY Initialization --------------------------------------------------------------
25 class ECP5DDRPHYInit(Elaboratable
):
32 def elaborate(self
, platform
):
39 # DDRDLLA instance -------------------------------------------------------------------------
42 m
.submodules
+= Instance("DDRDLLA",
43 i_CLK
=ClockSignal("sync2x"),
44 i_RST
=ResetSignal("init"),
52 m
.submodules
+= FFSynchronizer(_lock
, lock
, o_domain
="init")
53 m
.d
.init
+= lock_d
.eq(lock
)
54 m
.d
.sync
+= new_lock
.eq(lock
& ~lock_d
)
56 # DDRDLLA/DDQBUFM/ECLK initialization sequence ---------------------------------------------
59 (1*t
, [freeze
.eq(1)]), # Freeze DDRDLLA
60 (2*t
, [self
.stop
.eq(1)]), # Stop ECLK domain
61 (3*t
, [self
.reset
.eq(1)]), # Reset ECLK domain
62 (4*t
, [self
.reset
.eq(0)]), # Release ECLK domain reset
63 (5*t
, [self
.stop
.eq(0)]), # Release ECLK domain stop
64 (6*t
, [freeze
.eq(0)]), # Release DDRDLLA freeze
65 (7*t
, [self
.pause
.eq(1)]), # Pause DQSBUFM
66 (8*t
, [update
.eq(1)]), # Update DDRDLLA
67 (9*t
, [update
.eq(0)]), # Release DDRDMMA update
68 (10*t
, [self
.pause
.eq(0)]), # Release DQSBUFM pause
72 m
.d
.comb
+= tl
.trigger
.eq(new_lock
)
74 m
.d
.comb
+= self
.delay
.eq(delay
)
78 # Lattice ECP5 DDR PHY -----------------------------------------------------------------------------
81 class ECP5DDRPHY(Peripheral
, Elaboratable
):
82 def __init__(self
, pads
, sys_clk_freq
=100e6
):
83 super().__init
__(name
="phy")
85 #self.pads = PHYPadsCombiner(pads)
87 self
._sys
_clk
_freq
= sys_clk_freq
89 databits
= len(self
.pads
.dq
.io
)
90 assert databits
% 8 == 0
93 bank
= self
.csr_bank()
95 self
._dly
_sel
= bank
.csr(databits
//8, "rw")
97 self
._rdly
_dq
_rst
= bank
.csr(1, "rw")
98 self
._rdly
_dq
_inc
= bank
.csr(1, "rw")
99 self
._rdly
_dq
_bitslip
_rst
= bank
.csr(1, "rw")
100 self
._rdly
_dq
_bitslip
= bank
.csr(1, "rw")
102 self
._burstdet
_clr
= bank
.csr(1, "rw")
103 self
._burstdet
_seen
= bank
.csr(databits
//8, "r")
105 self
._zero
_ev
= self
.event(mode
="rise")
107 self
._bridge
= self
.bridge(data_width
=32, granularity
=8, alignment
=2)
108 self
.bus
= self
._bridge
.bus
109 self
.irq
= self
._bridge
.irq
111 addressbits
= len(self
.pads
.a
.o
)
112 bankbits
= len(self
.pads
.ba
.o
)
113 nranks
= 1 if not hasattr(self
.pads
, "cs_n") else len(self
.pads
.cs_n
.o
)
114 databits
= len(self
.pads
.dq
.io
)
115 self
.dfi
= Interface(addressbits
, bankbits
, nranks
, 4*databits
, 4)
117 # PHY settings -----------------------------------------------------------------------------
118 tck
= 2/(2*2*self
._sys
_clk
_freq
)
120 databits
= len(self
.pads
.dq
.io
)
121 nranks
= 1 if not hasattr(self
.pads
, "cs_n") else len(self
.pads
.cs_n
.o
)
122 addressbits
= len(self
.pads
.a
.o
)
123 bankbits
= len(self
.pads
.ba
.o
)
124 cl
, cwl
= get_cl_cw("DDR3", tck
)
125 cl_sys_latency
= get_sys_latency(nphases
, cl
)
126 cwl_sys_latency
= get_sys_latency(nphases
, cwl
)
127 rdcmdphase
, rdphase
= get_sys_phases(nphases
, cl_sys_latency
, cl
)
128 wrcmdphase
, wrphase
= get_sys_phases(nphases
, cwl_sys_latency
, cwl
)
129 self
.settings
= PhySettings(
130 phytype
="ECP5DDRPHY",
133 dfi_databits
=4*databits
,
138 rdcmdphase
=rdcmdphase
,
139 wrcmdphase
=wrcmdphase
,
142 read_latency
=2 + cl_sys_latency
+ 2 + log2_int(4//nphases
) + 4,
143 write_latency
=cwl_sys_latency
146 def elaborate(self
, platform
):
149 m
.submodules
+= self
._bridge
151 tck
= 2/(2*2*self
._sys
_clk
_freq
)
153 databits
= len(self
.pads
.dq
.io
)
154 nranks
= 1 if not hasattr(self
.pads
, "cs_n") else len(self
.pads
.cs_n
.o
)
155 addressbits
= len(self
.pads
.a
.o
)
156 bankbits
= len(self
.pads
.ba
.o
)
158 # Init -------------------------------------------------------------------------------------
159 m
.submodules
.init
= init
= ECP5DDRPHYInit()
161 # Parameters -------------------------------------------------------------------------------
162 cl
, cwl
= get_cl_cw("DDR3", tck
)
163 cl_sys_latency
= get_sys_latency(nphases
, cl
)
164 cwl_sys_latency
= get_sys_latency(nphases
, cwl
)
167 self
.datavalid
= Signal(databits
//8)
169 # DFI Interface ----------------------------------------------------------------------------
173 rddata_en
= Signal(self
.settings
.read_latency
)
175 # Clock --------------------------------------------------------------------------------
176 for i
in range(len(self
.pads
.clk
.o
)):
178 m
.submodules
+= Instance("ODDRX2F",
179 i_RST
=ResetSignal("dramsync"),
180 i_ECLK
=ClockSignal("sync2x"),
181 i_SCLK
=ClockSignal(),
186 o_Q
=self
.pads
.clk
.o
[i
]
189 # Addresses and Commands ---------------------------------------------------------------
190 for i
in range(addressbits
):
191 m
.submodules
+= Instance("ODDRX2F",
192 i_RST
=ResetSignal("dramsync"),
193 i_ECLK
=ClockSignal("sync2x"),
194 i_SCLK
=ClockSignal(),
195 i_D0
=dfi
.phases
[0].address
[i
],
196 i_D1
=dfi
.phases
[0].address
[i
],
197 i_D2
=dfi
.phases
[1].address
[i
],
198 i_D3
=dfi
.phases
[1].address
[i
],
201 for i
in range(bankbits
):
202 m
.submodules
+= Instance("ODDRX2F",
203 i_RST
=ResetSignal("dramsync"),
204 i_ECLK
=ClockSignal("sync2x"),
205 i_SCLK
=ClockSignal(),
206 i_D0
=dfi
.phases
[0].bank
[i
],
207 i_D1
=dfi
.phases
[0].bank
[i
],
208 i_D2
=dfi
.phases
[1].bank
[i
],
209 i_D3
=dfi
.phases
[1].bank
[i
],
210 o_Q
=self
.pads
.ba
.o
[i
]
212 controls
= ["ras_n", "cas_n", "we_n", "clk_en", "odt"]
213 if hasattr(self
.pads
, "reset_n"):
214 controls
.append("reset_n")
215 if hasattr(self
.pads
, "cs_n"):
216 controls
.append("cs_n")
217 for name
in controls
:
218 for i
in range(len(getattr(self
.pads
, name
))):
219 m
.submodules
+= Instance("ODDRX2F",
220 i_RST
=ResetSignal("dramsync"),
221 i_ECLK
=ClockSignal("sync2x"),
222 i_SCLK
=ClockSignal(),
223 i_D0
=getattr(dfi
.phases
[0], name
)[i
],
224 i_D1
=getattr(dfi
.phases
[0], name
)[i
],
225 i_D2
=getattr(dfi
.phases
[1], name
)[i
],
226 i_D3
=getattr(dfi
.phases
[1], name
)[i
],
227 o_Q
=getattr(self
.pads
, name
).o
[i
]
230 # DQ ---------------------------------------------------------------------------------------
233 dqs_pattern
= DQSPattern()
234 m
.submodules
+= dqs_pattern
235 for i
in range(databits
//8):
244 with m
.If(self
._dly
_sel
.w_data
[i
]):
245 with m
.If(self
._rdly
_dq
_rst
.w_stb
):
246 m
.d
.sync
+= rdly
.eq(0)
247 with m
.Elif(self
._rdly
_dq
_inc
.w_stb
):
248 m
.d
.sync
+= rdly
.eq(rdly
+ 1)
252 dqs_bitslip
= Signal(2)
253 with m
.If(self
._dly
_sel
.w_data
[i
]):
254 with m
.If(self
._rdly
_dq
_bitslip
_rst
.w_stb
):
255 m
.d
.sync
+= dqs_bitslip
.eq(0)
256 with m
.Elif(self
._rdly
_dq
_bitslip
.w_stb
):
257 m
.d
.sync
+= dqs_bitslip
.eq(dqs_bitslip
+ 1)
258 with m
.Switch(dqs_bitslip
):
259 for j
, b
in enumerate(range(-2, 2)):
261 m
.d
.sync
+= dqs_read
.eq(rddata_en
[cl_sys_latency
+ b
:cl_sys_latency
+ b
+ 2] != 0)
263 m
.submodules
+= Instance("DQSBUFM",
264 p_DQS_LI_DEL_ADJ
="MINUS",
266 p_DQS_LO_DEL_ADJ
="MINUS",
280 i_SCLK
=ClockSignal("sync"),
281 i_ECLK
=ClockSignal("sync2x"),
282 i_RST
=ResetSignal("dramsync"),
284 i_PAUSE
=init
.pause | self
._dly
_sel
.w_data
[i
],
287 # Assert LOADNs to use DDRDEL control
295 # Reads (generate shifted DQS clock for reads)
298 i_READCLKSEL0
=rdly
[0],
299 i_READCLKSEL1
=rdly
[1],
300 i_READCLKSEL2
=rdly
[2],
309 o_DATAVALID
=self
.datavalid
[i
],
312 # Writes (generate shifted ECLK clock for writes)
316 burstdet_d
= Signal()
317 m
.d
.sync
+= burstdet_d
.eq(burstdet
)
318 with m
.If(self
._burstdet
_clr
.w_stb
):
319 m
.d
.sync
+= self
._burstdet
_seen
.r_data
[i
].eq(0)
320 with m
.If(burstdet
& ~burstdet_d
):
321 m
.d
.sync
+= self
._burstdet
_seen
.r_data
[i
].eq(1)
323 # DQS and DM ---------------------------------------------------------------------------
324 dm_o_data
= Signal(8)
325 dm_o_data_d
= Signal(8)
326 dm_o_data_muxed
= Signal(4)
327 m
.d
.comb
+= dm_o_data
.eq(Cat(
328 dfi
.phases
[0].wrdata_mask
[0*databits
//8+i
],
329 dfi
.phases
[0].wrdata_mask
[1*databits
//8+i
],
330 dfi
.phases
[0].wrdata_mask
[2*databits
//8+i
],
331 dfi
.phases
[0].wrdata_mask
[3*databits
//8+i
],
333 dfi
.phases
[1].wrdata_mask
[0*databits
//8+i
],
334 dfi
.phases
[1].wrdata_mask
[1*databits
//8+i
],
335 dfi
.phases
[1].wrdata_mask
[2*databits
//8+i
],
336 dfi
.phases
[1].wrdata_mask
[3*databits
//8+i
]),
338 m
.d
.sync
+= dm_o_data_d
.eq(dm_o_data
)
339 with m
.Switch(bl8_chunk
):
341 m
.d
.sync
+= dm_o_data_muxed
.eq(dm_o_data
[:4])
343 m
.d
.sync
+= dm_o_data_muxed
.eq(dm_o_data_d
[4:])
344 m
.submodules
+= Instance("ODDRX2DQA",
345 i_RST
=ResetSignal("dramsync"),
346 i_ECLK
=ClockSignal("sync2x"),
347 i_SCLK
=ClockSignal("sync"),
349 i_D0
=dm_o_data_muxed
[0],
350 i_D1
=dm_o_data_muxed
[1],
351 i_D2
=dm_o_data_muxed
[2],
352 i_D3
=dm_o_data_muxed
[3],
353 o_Q
=self
.pads
.dm
.o
[i
]
359 Instance("ODDRX2DQSB",
360 i_RST
=ResetSignal("dramsync"),
361 i_ECLK
=ClockSignal("sync2x"),
362 i_SCLK
=ClockSignal(),
364 i_D0
=0, # FIXME: dqs_pattern.o[3],
365 i_D1
=1, # FIXME: dqs_pattern.o[2],
366 i_D2
=0, # FIXME: dqs_pattern.o[1],
367 i_D3
=1, # FIXME: dqs_pattern.o[0],
370 Instance("TSHX2DQSA",
371 i_RST
=ResetSignal("dramsync"),
372 i_ECLK
=ClockSignal("sync2x"),
373 i_SCLK
=ClockSignal(),
375 i_T0
=~
(dqs_pattern
.preamble | dqs_oe |
376 dqs_pattern
.postamble
),
377 i_T1
=~
(dqs_pattern
.preamble | dqs_oe |
378 dqs_pattern
.postamble
),
385 io_B
=self
.pads
.dqs
.io
[i
]
389 for j
in range(8*i
, 8*(i
+1)):
393 dq_i_delayed
= Signal()
394 dq_i_data
= Signal(8)
395 dq_o_data
= Signal(8)
396 dq_o_data_d
= Signal(8)
397 dq_o_data_muxed
= Signal(4)
398 m
.d
.comb
+= dq_o_data
.eq(Cat(
399 dfi
.phases
[0].wrdata
[0*databits
+j
],
400 dfi
.phases
[0].wrdata
[1*databits
+j
],
401 dfi
.phases
[0].wrdata
[2*databits
+j
],
402 dfi
.phases
[0].wrdata
[3*databits
+j
],
404 dfi
.phases
[1].wrdata
[0*databits
+j
],
405 dfi
.phases
[1].wrdata
[1*databits
+j
],
406 dfi
.phases
[1].wrdata
[2*databits
+j
],
407 dfi
.phases
[1].wrdata
[3*databits
+j
])
409 m
.d
.sync
+= dq_o_data_d
.eq(dq_o_data
)
410 # FIXME: use self.comb?
411 with m
.Switch(bl8_chunk
):
413 m
.d
.sync
+= dq_o_data_muxed
.eq(dq_o_data
[:4])
415 m
.d
.sync
+= dq_o_data_muxed
.eq(dq_o_data_d
[4:])
416 _dq_i_data
= Signal(4)
418 Instance("ODDRX2DQA",
419 i_RST
=ResetSignal("dramsync"),
420 i_ECLK
=ClockSignal("sync2x"),
421 i_SCLK
=ClockSignal(),
423 i_D0
=dq_o_data_muxed
[0],
424 i_D1
=dq_o_data_muxed
[1],
425 i_D2
=dq_o_data_muxed
[2],
426 i_D3
=dq_o_data_muxed
[3],
430 p_DEL_MODE
="DQS_ALIGNED_X2",
437 Instance("IDDRX2DQA",
438 i_RST
=ResetSignal("dramsync"),
439 i_ECLK
=ClockSignal("sync2x"),
440 i_SCLK
=ClockSignal(),
455 m
.d
.sync
+= dq_i_data
[:4].eq(dq_i_data
[4:])
456 m
.d
.sync
+= dq_i_data
[4:].eq(_dq_i_data
)
458 dfi
.phases
[0].rddata
[0*databits
+j
].eq(dq_i_data
[0]),
459 dfi
.phases
[0].rddata
[1*databits
+j
].eq(dq_i_data
[1]),
460 dfi
.phases
[0].rddata
[2*databits
+j
].eq(dq_i_data
[2]),
461 dfi
.phases
[0].rddata
[3*databits
+j
].eq(dq_i_data
[3]),
462 dfi
.phases
[1].rddata
[0*databits
+j
].eq(dq_i_data
[4]),
463 dfi
.phases
[1].rddata
[1*databits
+j
].eq(dq_i_data
[5]),
464 dfi
.phases
[1].rddata
[2*databits
+j
].eq(dq_i_data
[6]),
465 dfi
.phases
[1].rddata
[3*databits
+j
].eq(dq_i_data
[7]),
469 i_RST
=ResetSignal("dramsync"),
470 i_ECLK
=ClockSignal("sync2x"),
471 i_SCLK
=ClockSignal(),
473 i_T0
=~
(dqs_pattern
.preamble | dq_oe |
474 dqs_pattern
.postamble
),
475 i_T1
=~
(dqs_pattern
.preamble | dq_oe |
476 dqs_pattern
.postamble
),
483 io_B
=self
.pads
.dq
.io
[j
]
487 # Read Control Path ------------------------------------------------------------------------
488 # Creates a shift register of read commands coming from the DFI interface. This shift register
489 # is used to control DQS read (internal read pulse of the DQSBUF) and to indicate to the
490 # DFI interface that the read data is valid.
492 # The DQS read must be asserted for 2 sys_clk cycles before the read data is coming back from
493 # the DRAM (see 6.2.4 READ Pulse Positioning Optimization of FPGA-TN-02035-1.2)
495 # The read data valid is asserted for 1 sys_clk cycle when the data is available on the DFI
496 # interface, the latency is the sum of the ODDRX2DQA, CAS, IDDRX2DQA latencies.
497 rddata_en_last
= Signal
.like(rddata_en
)
498 m
.d
.comb
+= rddata_en
.eq(
499 Cat(dfi
.phases
[self
.settings
.rdphase
].rddata_en
, rddata_en_last
))
500 m
.d
.sync
+= rddata_en_last
.eq(rddata_en
)
501 m
.d
.sync
+= [phase
.rddata_valid
.eq(rddata_en
[-1])
502 for phase
in dfi
.phases
]
504 # Write Control Path -----------------------------------------------------------------------
505 # Creates a shift register of write commands coming from the DFI interface. This shift register
506 # is used to control DQ/DQS tristates and to select write data of the DRAM burst from the DFI
507 # interface: The PHY is operating in halfrate mode (so provide 4 datas every sys_clk cycles:
508 # 2x for DDR, 2x for halfrate) but DDR3 requires a burst of 8 datas (BL8) for best efficiency.
509 # Writes are then performed in 2 sys_clk cycles and data needs to be selected for each cycle.
510 # FIXME: understand +2
511 wrdata_en
= Signal(cwl_sys_latency
+ 5)
512 wrdata_en_last
= Signal
.like(wrdata_en
)
513 m
.d
.comb
+= wrdata_en
.eq(
514 Cat(dfi
.phases
[self
.settings
.wrphase
].wrdata_en
, wrdata_en_last
))
515 m
.d
.sync
+= wrdata_en_last
.eq(wrdata_en
)
516 m
.d
.comb
+= dq_oe
.eq(wrdata_en
[cwl_sys_latency
+ 2]
517 | wrdata_en
[cwl_sys_latency
+ 3])
518 m
.d
.comb
+= bl8_chunk
.eq(wrdata_en
[cwl_sys_latency
+ 1])
519 m
.d
.comb
+= dqs_oe
.eq(dq_oe
)
521 # Write DQS Postamble/Preamble Control Path ------------------------------------------------
522 # Generates DQS Preamble 1 cycle before the first write and Postamble 1 cycle after the last
523 # write. During writes, DQS tristate is configured as output for at least 4 sys_clk cycles:
524 # 1 for Preamble, 2 for the Write and 1 for the Postamble.
525 m
.d
.comb
+= dqs_pattern
.preamble
.eq(
526 wrdata_en
[cwl_sys_latency
+ 1] & ~wrdata_en
[cwl_sys_latency
+ 2])
527 m
.d
.comb
+= dqs_pattern
.postamble
.eq(
528 wrdata_en
[cwl_sys_latency
+ 4] & ~wrdata_en
[cwl_sys_latency
+ 3])