More nMigen conversion and fixes
[gram.git] / gram / phy / ecp5ddrphy.py
1 # This file is Copyright (c) 2019 David Shah <dave@ds0.me>
2 # This file is Copyright (c) 2019-2020 Florent Kermarrec <florent@enjoy-digital.fr>
3 # License: BSD
4
5 # 1:2 frequency-ratio DDR3 PHY for Lattice's ECP5
6 # DDR3: 800 MT/s
7
8 import math
9
10 # from litex.soc.interconnect.csr import *
11
12 from nmigen import *
13 from nmigen.lib.cdc import FFSynchronizer
14 from nmigen.utils import log2_int
15
16 from lambdasoc.periph import Peripheral
17
18 import gram.stream as stream
19 from gram.common import *
20 from gram.phy.dfi import *
21 from gram.timeline import Timeline
22
23 # Lattice ECP5 DDR PHY Initialization --------------------------------------------------------------
24
25 class ECP5DDRPHYInit(Elaboratable):
26 def __init__(self, eclk_cd):
27 self.pause = Signal()
28 self.stop = Signal()
29 self.delay = Signal()
30 self._eclk_cd = eclk_cd
31
32 def elaborate(self, platform):
33 m = Module()
34
35 new_lock = Signal()
36 update = Signal()
37 stop = Signal()
38 freeze = Signal()
39 pause = Signal()
40 reset = Signal()
41
42 # DDRDLLA instance -------------------------------------------------------------------------
43 _lock = Signal()
44 delay = Signal()
45 m.submodules += Instance("DDRDLLA",
46 i_CLK = ClockSignal("sys2x"),
47 i_RST = ResetSignal(),
48 i_UDDCNTLN = ~update,
49 i_FREEZE = freeze,
50 o_DDRDEL = delay,
51 o_LOCK = _lock
52 )
53 lock = Signal()
54 lock_d = Signal()
55 m.submodules += FFSynchronizer(_lock, lock)
56 m.d.sync += lock_d.eq(lock)
57 m.d.sync += new_lock.eq(lock & ~lock_d)
58
59 # DDRDLLA/DDQBUFM/ECLK initialization sequence ---------------------------------------------
60 t = 8 # in cycles
61 tl = Timeline([
62 (1*t, [freeze.eq(1)]), # Freeze DDRDLLA
63 (2*t, [stop.eq(1)]), # Stop ECLK domain
64 (3*t, [reset.eq(1)]), # Reset ECLK domain
65 (4*t, [reset.eq(0)]), # Release ECLK domain reset
66 (5*t, [stop.eq(0)]), # Release ECLK domain stop
67 (6*t, [freeze.eq(0)]), # Release DDRDLLA freeze
68 (7*t, [pause.eq(1)]), # Pause DQSBUFM
69 (8*t, [update.eq(1)]), # Update DDRDLLA
70 (9*t, [update.eq(0)]), # Release DDRDMMA update
71 (10*t, [pause.eq(0)]), # Release DQSBUFM pause
72 ])
73 m.submodules += tl
74 # Wait DDRDLLA Lock
75 m.d.comb += tl.trigger.eq(new_lock)
76
77 # ------------------------------------------------------------------------------------------
78 m.d.comb += [
79 self.pause.eq(pause),
80 self.stop.eq(stop),
81 self.delay.eq(delay),
82 ResetSignal(self._eclk_cd).eq(reset)
83 ]
84
85 return m
86
87 # Lattice ECP5 DDR PHY -----------------------------------------------------------------------------
88
89 class ECP5DDRPHY(Peripheral, Elaboratable):
90 def __init__(self, pads, sys_clk_freq=100e6):
91 super().__init__() # Peripheral init
92
93 #self.pads = PHYPadsCombiner(pads)
94 self.pads = pads
95 self._sys_clk_freq = sys_clk_freq
96
97 databits = len(self.pads.dq.o)
98 assert databits%8 == 0
99
100 # CSR
101 bank = self.csr_bank()
102
103 self._dly_sel = bank.csr(databits//8, "rw")
104
105 self._rdly_dq_rst = bank.csr(1, "rw")
106 self._rdly_dq_inc = bank.csr(1, "rw")
107 self._rdly_dq_bitslip_rst = bank.csr(1, "rw")
108 self._rdly_dq_bitslip = bank.csr(1, "rw")
109
110 self._burstdet_clr = bank.csr(1, "rw")
111 self._burstdet_seen = bank.csr(databits//8, "r")
112
113 self._zero_ev = self.event(mode="rise")
114
115 self._bridge = self.bridge(data_width=32, granularity=8, alignment=2)
116 self.bus = self._bridge.bus
117 self.irq = self._bridge.irq
118
119 def elaborate(self, platform):
120 m = Module()
121
122 memtype = "DDR3"
123 tck = 2/(2*2*self._sys_clk_freq)
124 addressbits = len(self.pads.a.o)
125 bankbits = len(self.pads.ba.o)
126 nranks = 1 if not hasattr(self.pads, "cs_n") else len(self.pads.cs_n)
127 databits = len(self.pads.dq.oe)
128 nphases = 2
129
130 # Init -------------------------------------------------------------------------------------
131 m.submodules.init = DomainRenamer("init")(ECP5DDRPHYInit("sys2x"))
132
133 # Parameters -------------------------------------------------------------------------------
134 cl, cwl = get_cl_cw(memtype, tck)
135 cl_sys_latency = get_sys_latency(nphases, cl)
136 cwl_sys_latency = get_sys_latency(nphases, cwl)
137
138 # Observation
139 self.datavalid = Signal(databits//8)
140
141 # PHY settings -----------------------------------------------------------------------------
142 rdcmdphase, rdphase = get_sys_phases(nphases, cl_sys_latency, cl)
143 wrcmdphase, wrphase = get_sys_phases(nphases, cwl_sys_latency, cwl)
144 self.settings = PhySettings(
145 phytype = "ECP5DDRPHY",
146 memtype = memtype,
147 databits = databits,
148 dfi_databits = 4*databits,
149 nranks = nranks,
150 nphases = nphases,
151 rdphase = rdphase,
152 wrphase = wrphase,
153 rdcmdphase = rdcmdphase,
154 wrcmdphase = wrcmdphase,
155 cl = cl,
156 cwl = cwl,
157 read_latency = 2 + cl_sys_latency + 2 + log2_int(4//nphases) + 4,
158 write_latency = cwl_sys_latency
159 )
160
161 # DFI Interface ----------------------------------------------------------------------------
162 self.dfi = dfi = Interface(addressbits, bankbits, nranks, 4*databits, 4)
163
164 # # #
165
166 bl8_chunk = Signal()
167 rddata_en = Signal(self.settings.read_latency)
168
169 # Clock --------------------------------------------------------------------------------
170 for i in range(len(self.pads.clk.o)):
171 sd_clk_se = Signal()
172 m.submodules += Instance("ODDRX2F",
173 i_RST = ResetSignal("sys2x"),
174 i_ECLK = ClockSignal("sys2x"),
175 i_SCLK = ClockSignal(),
176 i_D0 = 0,
177 i_D1 = 1,
178 i_D2 = 0,
179 i_D3 = 1,
180 o_Q = self.pads.clk.o[i]
181 )
182
183
184 # Addresses and Commands ---------------------------------------------------------------
185 for i in range(addressbits):
186 m.submodules += Instance("ODDRX2F",
187 i_RST = ResetSignal("sys2x"),
188 i_ECLK = ClockSignal("sys2x"),
189 i_SCLK = ClockSignal(),
190 i_D0 = dfi.phases[0].address[i],
191 i_D1 = dfi.phases[0].address[i],
192 i_D2 = dfi.phases[1].address[i],
193 i_D3 = dfi.phases[1].address[i],
194 o_Q = self.pads.a.o[i]
195 )
196 for i in range(bankbits):
197 m.submodules += Instance("ODDRX2F",
198 i_RST = ResetSignal("sys2x"),
199 i_ECLK = ClockSignal("sys2x"),
200 i_SCLK = ClockSignal(),
201 i_D0 = dfi.phases[0].bank[i],
202 i_D1 = dfi.phases[0].bank[i],
203 i_D2 = dfi.phases[1].bank[i],
204 i_D3 = dfi.phases[1].bank[i],
205 o_Q = self.pads.ba.o[i]
206 )
207 controls = ["ras_n", "cas_n", "we_n", "cke", "odt"]
208 if hasattr(self.pads, "reset_n"):
209 controls.append("reset_n")
210 if hasattr(self.pads, "cs_n"):
211 controls.append("cs_n")
212 for name in controls:
213 for i in range(len(getattr(self.pads, name))):
214 m.submodules += Instance("ODDRX2F",
215 i_RST = ResetSignal("sys2x"),
216 i_ECLK = ClockSignal("sys2x"),
217 i_SCLK = ClockSignal(),
218 i_D0 = getattr(dfi.phases[0], name)[i],
219 i_D1 = getattr(dfi.phases[0], name)[i],
220 i_D2 = getattr(dfi.phases[1], name)[i],
221 i_D3 = getattr(dfi.phases[1], name)[i],
222 o_Q = getattr(self.pads, name)[i]
223 )
224
225 # DQ ---------------------------------------------------------------------------------------
226 dq_oe = Signal()
227 dqs_oe = Signal()
228 dqs_pattern = DQSPattern()
229 m.submodules += dqs_pattern
230 for i in range(databits//8):
231 # DQSBUFM
232 dqs_i = Signal()
233 dqsr90 = Signal()
234 dqsw270 = Signal()
235 dqsw = Signal()
236 rdpntr = Signal(3)
237 wrpntr = Signal(3)
238 rdly = Signal(7)
239 with m.If(self._dly_sel.storage[i]):
240 with m.If(self._rdly_dq_rst.re):
241 m.d.sync += rdly.eq(0)
242 with m.Elif(self._rdly_dq_inc.re):
243 m.d.sync += rdly.eq(rdly + 1)
244 datavalid = Signal()
245 burstdet = Signal()
246 dqs_read = Signal()
247 dqs_bitslip = Signal(2)
248 with m.If(self._dly_sel.storage[i]):
249 with m.If(self._rdly_dq_bitslip_rst.re):
250 m.d.sync += dqs_bitslip.eq(0)
251 with m.Elif(self._rdly_dq_bitslip.re):
252 m.d.sync += dqs_bitslip.eq(dqs_bitslip + 1)
253 dqs_cases = {}
254 for j, b in enumerate(range(-2, 2)):
255 dqs_cases[j] = dqs_read.eq(rddata_en[cl_sys_latency + b:cl_sys_latency + b + 2] != 0)
256 m.d.sync += Case(dqs_bitslip, dqs_cases)
257 m.submodules += Instance("DQSBUFM",
258 p_DQS_LI_DEL_ADJ = "MINUS",
259 p_DQS_LI_DEL_VAL = 1,
260 p_DQS_LO_DEL_ADJ = "MINUS",
261 p_DQS_LO_DEL_VAL = 4,
262 # Clocks / Reset
263 i_SCLK = ClockSignal("sys"),
264 i_ECLK = ClockSignal("sys2x"),
265 i_RST = ResetSignal("sys2x"),
266 i_DDRDEL = self.init.delay,
267 i_PAUSE = self.init.pause | self._dly_sel.storage[i],
268
269 # Control
270 # Assert LOADNs to use DDRDEL control
271 i_RDLOADN = 0,
272 i_RDMOVE = 0,
273 i_RDDIRECTION = 1,
274 i_WRLOADN = 0,
275 i_WRMOVE = 0,
276 i_WRDIRECTION = 1,
277
278 # Reads (generate shifted DQS clock for reads)
279 i_READ0 = dqs_read,
280 i_READ1 = dqs_read,
281 i_READCLKSEL0 = rdly[0],
282 i_READCLKSEL1 = rdly[1],
283 i_READCLKSEL2 = rdly[2],
284 i_DQSI = dqs_i,
285 o_DQSR90 = dqsr90,
286 o_RDPNTR0 = rdpntr[0],
287 o_RDPNTR1 = rdpntr[1],
288 o_RDPNTR2 = rdpntr[2],
289 o_WRPNTR0 = wrpntr[0],
290 o_WRPNTR1 = wrpntr[1],
291 o_WRPNTR2 = wrpntr[2],
292 o_DATAVALID = self.datavalid[i],
293 o_BURSTDET = burstdet,
294
295 # Writes (generate shifted ECLK clock for writes)
296 o_DQSW270 = dqsw270,
297 o_DQSW = dqsw
298 )
299 burstdet_d = Signal()
300 m.d.sync += burstdet_d.eq(burstdet)
301 with m.If(self._burstdet_clr.re):
302 m.d.sync += self._burstdet_seen.status[i].eq(0)
303 with m.If(burstdet & ~burstdet_d):
304 m.d.sync += self._burstdet_seen.status[i].eq(1)
305
306 # DQS and DM ---------------------------------------------------------------------------
307 dm_o_data = Signal(8)
308 dm_o_data_d = Signal(8)
309 dm_o_data_muxed = Signal(4)
310 m.d.comb += dm_o_data.eq(Cat(
311 dfi.phases[0].wrdata_mask[0*databits//8+i],
312 dfi.phases[0].wrdata_mask[1*databits//8+i],
313 dfi.phases[0].wrdata_mask[2*databits//8+i],
314 dfi.phases[0].wrdata_mask[3*databits//8+i],
315
316 dfi.phases[1].wrdata_mask[0*databits//8+i],
317 dfi.phases[1].wrdata_mask[1*databits//8+i],
318 dfi.phases[1].wrdata_mask[2*databits//8+i],
319 dfi.phases[1].wrdata_mask[3*databits//8+i]),
320 )
321 m.d.sync += dm_o_data_d.eq(dm_o_data)
322 dm_bl8_cases = {}
323 dm_bl8_cases[0] = dm_o_data_muxed.eq(dm_o_data[:4])
324 dm_bl8_cases[1] = dm_o_data_muxed.eq(dm_o_data_d[4:])
325 m.d.sync += Case(bl8_chunk, dm_bl8_cases) # FIXME: use self.comb?
326 m.submodules += Instance("ODDRX2DQA",
327 i_RST = ResetSignal("sys2x"),
328 i_ECLK = ClockSignal("sys2x"),
329 i_SCLK = ClockSignal(),
330 i_DQSW270 = dqsw270,
331 i_D0 = dm_o_data_muxed[0],
332 i_D1 = dm_o_data_muxed[1],
333 i_D2 = dm_o_data_muxed[2],
334 i_D3 = dm_o_data_muxed[3],
335 o_Q = pads.dm[i]
336 )
337
338 dqs = Signal()
339 dqs_oe_n = Signal()
340 m.submodules += [
341 Instance("ODDRX2DQSB",
342 i_RST = ResetSignal("sys2x"),
343 i_ECLK = ClockSignal("sys2x"),
344 i_SCLK = ClockSignal(),
345 i_DQSW = dqsw,
346 i_D0 = 0, # FIXME: dqs_pattern.o[3],
347 i_D1 = 1, # FIXME: dqs_pattern.o[2],
348 i_D2 = 0, # FIXME: dqs_pattern.o[1],
349 i_D3 = 1, # FIXME: dqs_pattern.o[0],
350 o_Q = dqs
351 ),
352 Instance("TSHX2DQSA",
353 i_RST = ResetSignal("sys2x"),
354 i_ECLK = ClockSignal("sys2x"),
355 i_SCLK = ClockSignal(),
356 i_DQSW = dqsw,
357 i_T0 = ~(dqs_pattern.preamble | dqs_oe | dqs_pattern.postamble),
358 i_T1 = ~(dqs_pattern.preamble | dqs_oe | dqs_pattern.postamble),
359 o_Q = dqs_oe_n
360 ),
361 Tristate(pads.dqs_p[i], dqs, ~dqs_oe_n, dqs_i)
362 ]
363
364 for j in range(8*i, 8*(i+1)):
365 dq_o = Signal()
366 dq_i = Signal()
367 dq_oe_n = Signal()
368 dq_i_delayed = Signal()
369 dq_i_data = Signal(8)
370 dq_o_data = Signal(8)
371 dq_o_data_d = Signal(8)
372 dq_o_data_muxed = Signal(4)
373 m.d.comb += dq_o_data.eq(Cat(
374 dfi.phases[0].wrdata[0*databits+j],
375 dfi.phases[0].wrdata[1*databits+j],
376 dfi.phases[0].wrdata[2*databits+j],
377 dfi.phases[0].wrdata[3*databits+j],
378
379 dfi.phases[1].wrdata[0*databits+j],
380 dfi.phases[1].wrdata[1*databits+j],
381 dfi.phases[1].wrdata[2*databits+j],
382 dfi.phases[1].wrdata[3*databits+j])
383 )
384 m.d.sync += dq_o_data_d.eq(dq_o_data)
385 dq_bl8_cases = {}
386 dq_bl8_cases[0] = dq_o_data_muxed.eq(dq_o_data[:4])
387 dq_bl8_cases[1] = dq_o_data_muxed.eq(dq_o_data_d[4:])
388 m.d.sync += Case(bl8_chunk, dq_bl8_cases) # FIXME: use self.comb?
389 _dq_i_data = Signal(4)
390 m.submodules += [
391 Instance("ODDRX2DQA",
392 i_RST = ResetSignal("sys2x"),
393 i_ECLK = ClockSignal("sys2x"),
394 i_SCLK = ClockSignal(),
395 i_DQSW270 = dqsw270,
396 i_D0 = dq_o_data_muxed[0],
397 i_D1 = dq_o_data_muxed[1],
398 i_D2 = dq_o_data_muxed[2],
399 i_D3 = dq_o_data_muxed[3],
400 o_Q = dq_o
401 ),
402 Instance("DELAYF",
403 p_DEL_MODE = "DQS_ALIGNED_X2",
404 i_LOADN = 1,
405 i_MOVE = 0,
406 i_DIRECTION = 0,
407 i_A = dq_i,
408 o_Z = dq_i_delayed
409 ),
410 Instance("IDDRX2DQA",
411 i_RST = ResetSignal("sys2x"),
412 i_ECLK = ClockSignal("sys2x"),
413 i_SCLK = ClockSignal(),
414 i_DQSR90 = dqsr90,
415 i_RDPNTR0 = rdpntr[0],
416 i_RDPNTR1 = rdpntr[1],
417 i_RDPNTR2 = rdpntr[2],
418 i_WRPNTR0 = wrpntr[0],
419 i_WRPNTR1 = wrpntr[1],
420 i_WRPNTR2 = wrpntr[2],
421 i_D = dq_i_delayed,
422 o_Q0 = _dq_i_data[0],
423 o_Q1 = _dq_i_data[1],
424 o_Q2 = _dq_i_data[2],
425 o_Q3 = _dq_i_data[3],
426 )
427 ]
428 m.d.sync += dq_i_data[:4].eq(dq_i_data[4:])
429 m.d.sync += dq_i_data[4:].eq(_dq_i_data)
430 m.d.comb += [
431 dfi.phases[0].rddata[0*databits+j].eq(dq_i_data[0]),
432 dfi.phases[0].rddata[1*databits+j].eq(dq_i_data[1]),
433 dfi.phases[0].rddata[2*databits+j].eq(dq_i_data[2]),
434 dfi.phases[0].rddata[3*databits+j].eq(dq_i_data[3]),
435 dfi.phases[1].rddata[0*databits+j].eq(dq_i_data[4]),
436 dfi.phases[1].rddata[1*databits+j].eq(dq_i_data[5]),
437 dfi.phases[1].rddata[2*databits+j].eq(dq_i_data[6]),
438 dfi.phases[1].rddata[3*databits+j].eq(dq_i_data[7]),
439 ]
440 m.submodules += [
441 Instance("TSHX2DQA",
442 i_RST = ResetSignal("sys2x"),
443 i_ECLK = ClockSignal("sys2x"),
444 i_SCLK = ClockSignal(),
445 i_DQSW270 = dqsw270,
446 i_T0 = ~(dqs_pattern.preamble | dq_oe | dqs_pattern.postamble),
447 i_T1 = ~(dqs_pattern.preamble | dq_oe | dqs_pattern.postamble),
448 o_Q = dq_oe_n,
449 ),
450 Tristate(pads.dq[j], dq_o, ~dq_oe_n, dq_i)
451 ]
452
453 # Read Control Path ------------------------------------------------------------------------
454 # Creates a shift register of read commands coming from the DFI interface. This shift register
455 # is used to control DQS read (internal read pulse of the DQSBUF) and to indicate to the
456 # DFI interface that the read data is valid.
457 #
458 # The DQS read must be asserted for 2 sys_clk cycles before the read data is coming back from
459 # the DRAM (see 6.2.4 READ Pulse Positioning Optimization of FPGA-TN-02035-1.2)
460 #
461 # The read data valid is asserted for 1 sys_clk cycle when the data is available on the DFI
462 # interface, the latency is the sum of the ODDRX2DQA, CAS, IDDRX2DQA latencies.
463 rddata_en_last = Signal.like(rddata_en)
464 m.d.comb += rddata_en.eq(Cat(dfi.phases[self.settings.rdphase].rddata_en, rddata_en_last))
465 m.d.sync += rddata_en_last.eq(rddata_en)
466 m.d.sync += [phase.rddata_valid.eq(rddata_en[-1]) for phase in dfi.phases]
467
468 # Write Control Path -----------------------------------------------------------------------
469 # Creates a shift register of write commands coming from the DFI interface. This shift register
470 # is used to control DQ/DQS tristates and to select write data of the DRAM burst from the DFI
471 # interface: The PHY is operating in halfrate mode (so provide 4 datas every sys_clk cycles:
472 # 2x for DDR, 2x for halfrate) but DDR3 requires a burst of 8 datas (BL8) for best efficiency.
473 # Writes are then performed in 2 sys_clk cycles and data needs to be selected for each cycle.
474 # FIXME: understand +2
475 wrdata_en = Signal(cwl_sys_latency + 5)
476 wrdata_en_last = Signal.like(wrdata_en)
477 m.d.comb += wrdata_en.eq(Cat(dfi.phases[self.settings.wrphase].wrdata_en, wrdata_en_last))
478 m.d.sync += wrdata_en_last.eq(wrdata_en)
479 m.d.comb += dq_oe.eq(wrdata_en[cwl_sys_latency + 2] | wrdata_en[cwl_sys_latency + 3])
480 m.d.comb += bl8_chunk.eq(wrdata_en[cwl_sys_latency + 1])
481 m.d.comb += dqs_oe.eq(dq_oe)
482
483 # Write DQS Postamble/Preamble Control Path ------------------------------------------------
484 # Generates DQS Preamble 1 cycle before the first write and Postamble 1 cycle after the last
485 # write. During writes, DQS tristate is configured as output for at least 4 sys_clk cycles:
486 # 1 for Preamble, 2 for the Write and 1 for the Postamble.
487 m.d.comb += dqs_pattern.preamble.eq( wrdata_en[cwl_sys_latency + 1] & ~wrdata_en[cwl_sys_latency + 2])
488 m.d.comb += dqs_pattern.postamble.eq(wrdata_en[cwl_sys_latency + 4] & ~wrdata_en[cwl_sys_latency + 3])
489
490 return m