Update switch to nMigen syntax
[gram.git] / gram / phy / ecp5ddrphy.py
1 # This file is Copyright (c) 2019 David Shah <dave@ds0.me>
2 # This file is Copyright (c) 2019-2020 Florent Kermarrec <florent@enjoy-digital.fr>
3 # This file is Copyright (c) 2020 LambdaConcept <contact@lambdaconcept.com>
4 # License: BSD
5
6 # 1:2 frequency-ratio DDR3 PHY for Lattice's ECP5
7 # DDR3: 800 MT/s
8
9 import math
10
11 from nmigen import *
12 from nmigen.lib.cdc import FFSynchronizer
13 from nmigen.utils import log2_int
14
15 from lambdasoc.periph import Peripheral
16
17 import gram.stream as stream
18 from gram.common import *
19 from gram.phy.dfi import Interface
20 from gram.compat import Timeline
21
22 # Lattice ECP5 DDR PHY Initialization --------------------------------------------------------------
23
24
25 class ECP5DDRPHYInit(Elaboratable):
26 def __init__(self, eclk_cd):
27 self.pause = Signal()
28 self.stop = Signal()
29 self.delay = Signal()
30 self._eclk_cd = eclk_cd
31
32 def elaborate(self, platform):
33 m = Module()
34
35 new_lock = Signal()
36 update = Signal()
37 stop = Signal()
38 freeze = Signal()
39 pause = Signal()
40 reset = Signal()
41
42 # DDRDLLA instance -------------------------------------------------------------------------
43 _lock = Signal()
44 delay = Signal()
45 m.submodules += Instance("DDRDLLA",
46 i_CLK=ClockSignal("sync2x"),
47 i_RST=ResetSignal(),
48 i_UDDCNTLN=~update,
49 i_FREEZE=freeze,
50 o_DDRDEL=delay,
51 o_LOCK=_lock
52 )
53 lock = Signal()
54 lock_d = Signal()
55 m.submodules += FFSynchronizer(_lock, lock)
56 m.d.sync += lock_d.eq(lock)
57 m.d.sync += new_lock.eq(lock & ~lock_d)
58
59 # DDRDLLA/DDQBUFM/ECLK initialization sequence ---------------------------------------------
60 t = 8 # in cycles
61 tl = Timeline([
62 (1*t, [freeze.eq(1)]), # Freeze DDRDLLA
63 (2*t, [stop.eq(1)]), # Stop ECLK domain
64 (3*t, [reset.eq(1)]), # Reset ECLK domain
65 (4*t, [reset.eq(0)]), # Release ECLK domain reset
66 (5*t, [stop.eq(0)]), # Release ECLK domain stop
67 (6*t, [freeze.eq(0)]), # Release DDRDLLA freeze
68 (7*t, [pause.eq(1)]), # Pause DQSBUFM
69 (8*t, [update.eq(1)]), # Update DDRDLLA
70 (9*t, [update.eq(0)]), # Release DDRDMMA update
71 (10*t, [pause.eq(0)]), # Release DQSBUFM pause
72 ])
73 m.submodules += tl
74 # Wait DDRDLLA Lock
75 m.d.comb += tl.trigger.eq(new_lock)
76
77 # ------------------------------------------------------------------------------------------
78 m.d.comb += [
79 self.pause.eq(pause),
80 self.stop.eq(stop),
81 self.delay.eq(delay),
82 ResetSignal(self._eclk_cd).eq(reset)
83 ]
84
85 return m
86
87 # Lattice ECP5 DDR PHY -----------------------------------------------------------------------------
88
89
90 class ECP5DDRPHY(Peripheral, Elaboratable):
91 def __init__(self, pads, sys_clk_freq=100e6):
92 super().__init__()
93
94 #self.pads = PHYPadsCombiner(pads)
95 self.pads = pads
96 self._sys_clk_freq = sys_clk_freq
97
98 databits = len(self.pads.dq.o)
99 assert databits % 8 == 0
100
101 # CSR
102 bank = self.csr_bank()
103
104 self._dly_sel = bank.csr(databits//8, "rw")
105
106 self._rdly_dq_rst = bank.csr(1, "rw")
107 self._rdly_dq_inc = bank.csr(1, "rw")
108 self._rdly_dq_bitslip_rst = bank.csr(1, "rw")
109 self._rdly_dq_bitslip = bank.csr(1, "rw")
110
111 self._burstdet_clr = bank.csr(1, "rw")
112 self._burstdet_seen = bank.csr(databits//8, "r")
113
114 self._zero_ev = self.event(mode="rise")
115
116 self._bridge = self.bridge(data_width=32, granularity=8, alignment=2)
117 self.bus = self._bridge.bus
118 self.irq = self._bridge.irq
119
120 addressbits = len(self.pads.a.o)
121 bankbits = len(self.pads.ba.o)
122 nranks = 1 if not hasattr(self.pads, "cs_n") else len(self.pads.cs_n)
123 databits = len(self.pads.dq.o)
124 self.dfi = Interface(addressbits, bankbits, nranks, 4*databits, 4)
125
126 # PHY settings -----------------------------------------------------------------------------
127 tck = 2/(2*2*self._sys_clk_freq)
128 nphases = 2
129 databits = len(self.pads.dq.o)
130 nranks = 1 if not hasattr(self.pads, "cs_n") else len(self.pads.cs_n)
131 addressbits = len(self.pads.a.o)
132 bankbits = len(self.pads.ba.o)
133 cl, cwl = get_cl_cw("DDR3", tck)
134 cl_sys_latency = get_sys_latency(nphases, cl)
135 cwl_sys_latency = get_sys_latency(nphases, cwl)
136 rdcmdphase, rdphase = get_sys_phases(nphases, cl_sys_latency, cl)
137 wrcmdphase, wrphase = get_sys_phases(nphases, cwl_sys_latency, cwl)
138 self.settings = PhySettings(
139 phytype="ECP5DDRPHY",
140 memtype="DDR3",
141 databits=databits,
142 dfi_databits=4*databits,
143 nranks=nranks,
144 nphases=nphases,
145 rdphase=rdphase,
146 wrphase=wrphase,
147 rdcmdphase=rdcmdphase,
148 wrcmdphase=wrcmdphase,
149 cl=cl,
150 cwl=cwl,
151 read_latency=2 + cl_sys_latency + 2 + log2_int(4//nphases) + 4,
152 write_latency=cwl_sys_latency
153 )
154
155 def elaborate(self, platform):
156 m = Module()
157
158 m.submodules += self._bridge
159
160 tck = 2/(2*2*self._sys_clk_freq)
161 nphases = 2
162 databits = len(self.pads.dq.o)
163 nranks = 1 if not hasattr(self.pads, "cs_n") else len(self.pads.cs_n)
164 addressbits = len(self.pads.a.o)
165 bankbits = len(self.pads.ba.o)
166
167 # Init -------------------------------------------------------------------------------------
168 m.submodules.init = DomainRenamer("init")(ECP5DDRPHYInit("sync2x"))
169
170 # Parameters -------------------------------------------------------------------------------
171 cl, cwl = get_cl_cw("DDR3", tck)
172 cl_sys_latency = get_sys_latency(nphases, cl)
173 cwl_sys_latency = get_sys_latency(nphases, cwl)
174
175 # Observation
176 self.datavalid = Signal(databits//8)
177
178 # DFI Interface ----------------------------------------------------------------------------
179 dfi = self.dfi
180
181 bl8_chunk = Signal()
182 rddata_en = Signal(self.settings.read_latency)
183
184 # Clock --------------------------------------------------------------------------------
185 for i in range(len(self.pads.clk.o)):
186 sd_clk_se = Signal()
187 m.submodules += Instance("ODDRX2F",
188 i_RST=ResetSignal("sync2x"),
189 i_ECLK=ClockSignal("sync2x"),
190 i_SCLK=ClockSignal(),
191 i_D0=0,
192 i_D1=1,
193 i_D2=0,
194 i_D3=1,
195 o_Q=self.pads.clk.o[i]
196 )
197
198 # Addresses and Commands ---------------------------------------------------------------
199 for i in range(addressbits):
200 m.submodules += Instance("ODDRX2F",
201 i_RST=ResetSignal("sync2x"),
202 i_ECLK=ClockSignal("sync2x"),
203 i_SCLK=ClockSignal(),
204 i_D0=dfi.phases[0].address[i],
205 i_D1=dfi.phases[0].address[i],
206 i_D2=dfi.phases[1].address[i],
207 i_D3=dfi.phases[1].address[i],
208 o_Q=self.pads.a.o[i]
209 )
210 for i in range(bankbits):
211 m.submodules += Instance("ODDRX2F",
212 i_RST=ResetSignal("sync2x"),
213 i_ECLK=ClockSignal("sync2x"),
214 i_SCLK=ClockSignal(),
215 i_D0=dfi.phases[0].bank[i],
216 i_D1=dfi.phases[0].bank[i],
217 i_D2=dfi.phases[1].bank[i],
218 i_D3=dfi.phases[1].bank[i],
219 o_Q=self.pads.ba.o[i]
220 )
221 controls = ["ras_n", "cas_n", "we_n", "cke", "odt"]
222 if hasattr(self.pads, "reset_n"):
223 controls.append("reset_n")
224 if hasattr(self.pads, "cs_n"):
225 controls.append("cs_n")
226 for name in controls:
227 for i in range(len(getattr(self.pads, name))):
228 m.submodules += Instance("ODDRX2F",
229 i_RST=ResetSignal("sync2x"),
230 i_ECLK=ClockSignal("sync2x"),
231 i_SCLK=ClockSignal(),
232 i_D0=getattr(dfi.phases[0], name)[i],
233 i_D1=getattr(dfi.phases[0], name)[i],
234 i_D2=getattr(dfi.phases[1], name)[i],
235 i_D3=getattr(dfi.phases[1], name)[i],
236 o_Q=getattr(self.pads, name)[i]
237 )
238
239 # DQ ---------------------------------------------------------------------------------------
240 dq_oe = Signal()
241 dqs_oe = Signal()
242 dqs_pattern = DQSPattern()
243 m.submodules += dqs_pattern
244 for i in range(databits//8):
245 # DQSBUFM
246 dqs_i = Signal()
247 dqsr90 = Signal()
248 dqsw270 = Signal()
249 dqsw = Signal()
250 rdpntr = Signal(3)
251 wrpntr = Signal(3)
252 rdly = Signal(7)
253 with m.If(self._dly_sel.w_data[i]):
254 with m.If(self._rdly_dq_rst.w_stb):
255 m.d.sync += rdly.eq(0)
256 with m.Elif(self._rdly_dq_inc.w_stb):
257 m.d.sync += rdly.eq(rdly + 1)
258 datavalid = Signal()
259 burstdet = Signal()
260 dqs_read = Signal()
261 dqs_bitslip = Signal(2)
262 with m.If(self._dly_sel.w_data[i]):
263 with m.If(self._rdly_dq_bitslip_rst.w_stb):
264 m.d.sync += dqs_bitslip.eq(0)
265 with m.Elif(self._rdly_dq_bitslip.w_stb):
266 m.d.sync += dqs_bitslip.eq(dqs_bitslip + 1)
267 with m.Switch(dqs_bitslip):
268 for j, b in enumerate(range(-2, 2)):
269 with m.Case(j):
270 m.d.sync += dqs_read.eq(rddata_en[cl_sys_latency + b:cl_sys_latency + b + 2] != 0)
271
272 m.submodules += Instance("DQSBUFM",
273 p_DQS_LI_DEL_ADJ="MINUS",
274 p_DQS_LI_DEL_VAL=1,
275 p_DQS_LO_DEL_ADJ="MINUS",
276 p_DQS_LO_DEL_VAL=4,
277 # Clocks / Reset
278 i_SCLK=ClockSignal("sync"),
279 i_ECLK=ClockSignal("sync2x"),
280 i_RST=ResetSignal("sync2x"),
281 i_DDRDEL=self.init.delay,
282 i_PAUSE=self.init.pause | self._dly_sel.w_data[i],
283
284 # Control
285 # Assert LOADNs to use DDRDEL control
286 i_RDLOADN=0,
287 i_RDMOVE=0,
288 i_RDDIRECTION=1,
289 i_WRLOADN=0,
290 i_WRMOVE=0,
291 i_WRDIRECTION=1,
292
293 # Reads (generate shifted DQS clock for reads)
294 i_READ0=dqs_read,
295 i_READ1=dqs_read,
296 i_READCLKSEL0=rdly[0],
297 i_READCLKSEL1=rdly[1],
298 i_READCLKSEL2=rdly[2],
299 i_DQSI=dqs_i,
300 o_DQSR90=dqsr90,
301 o_RDPNTR0=rdpntr[0],
302 o_RDPNTR1=rdpntr[1],
303 o_RDPNTR2=rdpntr[2],
304 o_WRPNTR0=wrpntr[0],
305 o_WRPNTR1=wrpntr[1],
306 o_WRPNTR2=wrpntr[2],
307 o_DATAVALID=self.datavalid[i],
308 o_BURSTDET=burstdet,
309
310 # Writes (generate shifted ECLK clock for writes)
311 o_DQSW270=dqsw270,
312 o_DQSW=dqsw
313 )
314 burstdet_d = Signal()
315 m.d.sync += burstdet_d.eq(burstdet)
316 with m.If(self._burstdet_clr.w_stb):
317 m.d.sync += self._burstdet_seen.status[i].eq(0)
318 with m.If(burstdet & ~burstdet_d):
319 m.d.sync += self._burstdet_seen.status[i].eq(1)
320
321 # DQS and DM ---------------------------------------------------------------------------
322 dm_o_data = Signal(8)
323 dm_o_data_d = Signal(8)
324 dm_o_data_muxed = Signal(4)
325 m.d.comb += dm_o_data.eq(Cat(
326 dfi.phases[0].wrdata_mask[0*databits//8+i],
327 dfi.phases[0].wrdata_mask[1*databits//8+i],
328 dfi.phases[0].wrdata_mask[2*databits//8+i],
329 dfi.phases[0].wrdata_mask[3*databits//8+i],
330
331 dfi.phases[1].wrdata_mask[0*databits//8+i],
332 dfi.phases[1].wrdata_mask[1*databits//8+i],
333 dfi.phases[1].wrdata_mask[2*databits//8+i],
334 dfi.phases[1].wrdata_mask[3*databits//8+i]),
335 )
336 m.d.sync += dm_o_data_d.eq(dm_o_data)
337 dm_bl8_cases = {}
338 dm_bl8_cases[0] = dm_o_data_muxed.eq(dm_o_data[:4])
339 dm_bl8_cases[1] = dm_o_data_muxed.eq(dm_o_data_d[4:])
340 m.d.sync += Case(bl8_chunk, dm_bl8_cases) # FIXME: use self.comb?
341 m.submodules += Instance("ODDRX2DQA",
342 i_RST=ResetSignal("sync2x"),
343 i_ECLK=ClockSignal("sync2x"),
344 i_SCLK=ClockSignal(),
345 i_DQSW270=dqsw270,
346 i_D0=dm_o_data_muxed[0],
347 i_D1=dm_o_data_muxed[1],
348 i_D2=dm_o_data_muxed[2],
349 i_D3=dm_o_data_muxed[3],
350 o_Q=pads.dm[i]
351 )
352
353 dqs = Signal()
354 dqs_oe_n = Signal()
355 m.submodules += [
356 Instance("ODDRX2DQSB",
357 i_RST=ResetSignal("sync2x"),
358 i_ECLK=ClockSignal("sync2x"),
359 i_SCLK=ClockSignal(),
360 i_DQSW=dqsw,
361 i_D0=0, # FIXME: dqs_pattern.o[3],
362 i_D1=1, # FIXME: dqs_pattern.o[2],
363 i_D2=0, # FIXME: dqs_pattern.o[1],
364 i_D3=1, # FIXME: dqs_pattern.o[0],
365 o_Q=dqs
366 ),
367 Instance("TSHX2DQSA",
368 i_RST=ResetSignal("sync2x"),
369 i_ECLK=ClockSignal("sync2x"),
370 i_SCLK=ClockSignal(),
371 i_DQSW=dqsw,
372 i_T0=~(dqs_pattern.preamble | dqs_oe |
373 dqs_pattern.postamble),
374 i_T1=~(dqs_pattern.preamble | dqs_oe |
375 dqs_pattern.postamble),
376 o_Q=dqs_oe_n
377 ),
378 Tristate(pads.dqs_p[i], dqs, ~dqs_oe_n, dqs_i)
379 ]
380
381 for j in range(8*i, 8*(i+1)):
382 dq_o = Signal()
383 dq_i = Signal()
384 dq_oe_n = Signal()
385 dq_i_delayed = Signal()
386 dq_i_data = Signal(8)
387 dq_o_data = Signal(8)
388 dq_o_data_d = Signal(8)
389 dq_o_data_muxed = Signal(4)
390 m.d.comb += dq_o_data.eq(Cat(
391 dfi.phases[0].wrdata[0*databits+j],
392 dfi.phases[0].wrdata[1*databits+j],
393 dfi.phases[0].wrdata[2*databits+j],
394 dfi.phases[0].wrdata[3*databits+j],
395
396 dfi.phases[1].wrdata[0*databits+j],
397 dfi.phases[1].wrdata[1*databits+j],
398 dfi.phases[1].wrdata[2*databits+j],
399 dfi.phases[1].wrdata[3*databits+j])
400 )
401 m.d.sync += dq_o_data_d.eq(dq_o_data)
402 dq_bl8_cases = {}
403 dq_bl8_cases[0] = dq_o_data_muxed.eq(dq_o_data[:4])
404 dq_bl8_cases[1] = dq_o_data_muxed.eq(dq_o_data_d[4:])
405 # FIXME: use self.comb?
406 m.d.sync += Case(bl8_chunk, dq_bl8_cases)
407 _dq_i_data = Signal(4)
408 m.submodules += [
409 Instance("ODDRX2DQA",
410 i_RST=ResetSignal("sync2x"),
411 i_ECLK=ClockSignal("sync2x"),
412 i_SCLK=ClockSignal(),
413 i_DQSW270=dqsw270,
414 i_D0=dq_o_data_muxed[0],
415 i_D1=dq_o_data_muxed[1],
416 i_D2=dq_o_data_muxed[2],
417 i_D3=dq_o_data_muxed[3],
418 o_Q=dq_o
419 ),
420 Instance("DELAYF",
421 p_DEL_MODE="DQS_ALIGNED_X2",
422 i_LOADN=1,
423 i_MOVE=0,
424 i_DIRECTION=0,
425 i_A=dq_i,
426 o_Z=dq_i_delayed
427 ),
428 Instance("IDDRX2DQA",
429 i_RST=ResetSignal("sync2x"),
430 i_ECLK=ClockSignal("sync2x"),
431 i_SCLK=ClockSignal(),
432 i_DQSR90=dqsr90,
433 i_RDPNTR0=rdpntr[0],
434 i_RDPNTR1=rdpntr[1],
435 i_RDPNTR2=rdpntr[2],
436 i_WRPNTR0=wrpntr[0],
437 i_WRPNTR1=wrpntr[1],
438 i_WRPNTR2=wrpntr[2],
439 i_D=dq_i_delayed,
440 o_Q0=_dq_i_data[0],
441 o_Q1=_dq_i_data[1],
442 o_Q2=_dq_i_data[2],
443 o_Q3=_dq_i_data[3],
444 )
445 ]
446 m.d.sync += dq_i_data[:4].eq(dq_i_data[4:])
447 m.d.sync += dq_i_data[4:].eq(_dq_i_data)
448 m.d.comb += [
449 dfi.phases[0].rddata[0*databits+j].eq(dq_i_data[0]),
450 dfi.phases[0].rddata[1*databits+j].eq(dq_i_data[1]),
451 dfi.phases[0].rddata[2*databits+j].eq(dq_i_data[2]),
452 dfi.phases[0].rddata[3*databits+j].eq(dq_i_data[3]),
453 dfi.phases[1].rddata[0*databits+j].eq(dq_i_data[4]),
454 dfi.phases[1].rddata[1*databits+j].eq(dq_i_data[5]),
455 dfi.phases[1].rddata[2*databits+j].eq(dq_i_data[6]),
456 dfi.phases[1].rddata[3*databits+j].eq(dq_i_data[7]),
457 ]
458 m.submodules += [
459 Instance("TSHX2DQA",
460 i_RST=ResetSignal("sync2x"),
461 i_ECLK=ClockSignal("sync2x"),
462 i_SCLK=ClockSignal(),
463 i_DQSW270=dqsw270,
464 i_T0=~(dqs_pattern.preamble | dq_oe |
465 dqs_pattern.postamble),
466 i_T1=~(dqs_pattern.preamble | dq_oe |
467 dqs_pattern.postamble),
468 o_Q=dq_oe_n,
469 ),
470 Tristate(pads.dq[j], dq_o, ~dq_oe_n, dq_i)
471 ]
472
473 # Read Control Path ------------------------------------------------------------------------
474 # Creates a shift register of read commands coming from the DFI interface. This shift register
475 # is used to control DQS read (internal read pulse of the DQSBUF) and to indicate to the
476 # DFI interface that the read data is valid.
477 #
478 # The DQS read must be asserted for 2 sys_clk cycles before the read data is coming back from
479 # the DRAM (see 6.2.4 READ Pulse Positioning Optimization of FPGA-TN-02035-1.2)
480 #
481 # The read data valid is asserted for 1 sys_clk cycle when the data is available on the DFI
482 # interface, the latency is the sum of the ODDRX2DQA, CAS, IDDRX2DQA latencies.
483 rddata_en_last = Signal.like(rddata_en)
484 m.d.comb += rddata_en.eq(
485 Cat(dfi.phases[self.settings.rdphase].rddata_en, rddata_en_last))
486 m.d.sync += rddata_en_last.eq(rddata_en)
487 m.d.sync += [phase.rddata_valid.eq(rddata_en[-1])
488 for phase in dfi.phases]
489
490 # Write Control Path -----------------------------------------------------------------------
491 # Creates a shift register of write commands coming from the DFI interface. This shift register
492 # is used to control DQ/DQS tristates and to select write data of the DRAM burst from the DFI
493 # interface: The PHY is operating in halfrate mode (so provide 4 datas every sys_clk cycles:
494 # 2x for DDR, 2x for halfrate) but DDR3 requires a burst of 8 datas (BL8) for best efficiency.
495 # Writes are then performed in 2 sys_clk cycles and data needs to be selected for each cycle.
496 # FIXME: understand +2
497 wrdata_en = Signal(cwl_sys_latency + 5)
498 wrdata_en_last = Signal.like(wrdata_en)
499 m.d.comb += wrdata_en.eq(
500 Cat(dfi.phases[self.settings.wrphase].wrdata_en, wrdata_en_last))
501 m.d.sync += wrdata_en_last.eq(wrdata_en)
502 m.d.comb += dq_oe.eq(wrdata_en[cwl_sys_latency + 2]
503 | wrdata_en[cwl_sys_latency + 3])
504 m.d.comb += bl8_chunk.eq(wrdata_en[cwl_sys_latency + 1])
505 m.d.comb += dqs_oe.eq(dq_oe)
506
507 # Write DQS Postamble/Preamble Control Path ------------------------------------------------
508 # Generates DQS Preamble 1 cycle before the first write and Postamble 1 cycle after the last
509 # write. During writes, DQS tristate is configured as output for at least 4 sys_clk cycles:
510 # 1 for Preamble, 2 for the Write and 1 for the Postamble.
511 m.d.comb += dqs_pattern.preamble.eq(
512 wrdata_en[cwl_sys_latency + 1] & ~wrdata_en[cwl_sys_latency + 2])
513 m.d.comb += dqs_pattern.postamble.eq(
514 wrdata_en[cwl_sys_latency + 4] & ~wrdata_en[cwl_sys_latency + 3])
515
516 return m