1 # Copyright (c) 2020 LambdaConcept <contact@lambdaconcept.com>
2 # Copyright (c) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
3 # Copyright (C) 2022 Raptor Engineering, LLC <support@raptorengineering.com>
5 # Based on code from LambaConcept, from the gram example which is BSD-2-License
6 # https://github.com/jeanthom/gram/tree/master/examples
8 # Modifications for the Libre-SOC Project funded by NLnet and NGI POINTER
9 # under EU Grants 871528 and 957073, under the LGPLv3+ License
11 from nmigen
import (Module
, Elaboratable
, DomainRenamer
, Record
,
12 Signal
, Cat
, Const
, ClockSignal
, ResetSignal
)
13 from nmigen
.cli
import verilog
14 from nmigen
.lib
.cdc
import ResetSynchronizer
15 from nmigen_soc
import wishbone
, memory
16 from nmigen_soc
.memory
import MemoryMap
18 from nmigen_stdio
.serial
import AsyncSerial
19 from nmigen_boards
.resources
.memory
import HyperRAMResource
21 from lambdasoc
.periph
.intc
import GenericInterruptController
22 from lambdasoc
.periph
.sram
import SRAMPeripheral
23 from lambdasoc
.periph
.timer
import TimerPeripheral
24 from lambdasoc
.periph
import Peripheral
25 from lambdasoc
.soc
.base
import SoC
26 from soc
.bus
.uart_16550
import UART16550
# opencores 16550 uart
27 from soc
.bus
.tercel
import Tercel
# SPI XIP master
28 from soc
.bus
.external_core
import ExternalCore
# external libresoc/microwatt
29 from soc
.bus
.wb_downconvert
import WishboneDownConvert
30 from soc
.bus
.syscon
import MicrowattSYSCON
32 from gram
.common
import (PhySettings
, get_cl_cw
, get_sys_latency
,
34 from nmigen
.utils
import log2_int
35 from gram
.core
import gramCore
36 from gram
.phy
.ecp5ddrphy
import ECP5DDRPHY
37 from gram
.phy
.fakephy
import FakePHY
, SDRAM_VERBOSE_STD
, SDRAM_VERBOSE_DBG
38 from gram
.modules
import MT41K256M16
, MT41K64M16
39 from gram
.frontend
.wishbone
import gramWishbone
41 from nmigen_boards
.versa_ecp5
import VersaECP5Platform
42 from nmigen_boards
.ulx3s
import ULX3S_85F_Platform
43 from nmigen_boards
.arty_a7
import ArtyA7_100Platform
44 from nmigen_boards
.test
.blinky
import Blinky
46 from crg
import ECPIX5CRG
47 from icarusversa
import IcarusVersaPlatform
52 def sim_ddr3_settings(clk_freq
=100e6
):
53 tck
= 2/(2*2*clk_freq
)
59 cl
, cwl
= get_cl_cw("DDR3", tck
)
60 cl_sys_latency
= get_sys_latency(nphases
, cl
)
61 cwl_sys_latency
= get_sys_latency(nphases
, cwl
)
62 rdcmdphase
, rdphase
= get_sys_phases(nphases
, cl_sys_latency
, cl
)
63 wrcmdphase
, wrphase
= get_sys_phases(nphases
, cwl_sys_latency
, cwl
)
68 dfi_databits
=4*databits
,
73 rdcmdphase
=rdcmdphase
,
74 wrcmdphase
=wrcmdphase
,
77 read_latency
=2 + cl_sys_latency
+ 2 + log2_int(4//nphases
) + 4,
78 write_latency
=cwl_sys_latency
82 class WB64to32Convert(Elaboratable
):
83 """Microwatt IO wishbone slave 64->32 bits converter
85 For timing reasons, this adds a one cycle latch on the way both
86 in and out. This relaxes timing and routing pressure on the "main"
87 memory bus by moving all simple IOs to a slower 32-bit bus.
89 This implementation is rather dumb at the moment, no stash buffer,
90 so we stall whenever that latch is busy. This can be improved.
92 def __init__(self
, master
, slave
):
96 def elaborate(self
, platform
):
98 comb
, sync
= m
.d
.comb
, m
.d
.sync
99 master
, slave
= self
.master
, self
.slave
106 with m
.State("IDLE"):
107 # Clear ACK (and has_top_r) in case it was set
108 sync
+= master
.ack
.eq(0)
109 sync
+= has_top_r
.eq(0)
111 # Do we have a cycle ?
112 with m
.If(master
.cyc
& master
.stb
):
113 # Stall master until we are done, we are't (yet) pipelining
114 # this, it's all slow IOs.
115 sync
+= master
.stall
.eq(1)
117 # Start cycle downstream
118 sync
+= slave
.cyc
.eq(1)
119 sync
+= slave
.stb
.eq(1)
121 # Do we have a top word and/or a bottom word ?
122 comb
+= has_top
.eq(master
.sel
[4:].bool())
123 comb
+= has_bot
.eq(master
.sel
[:4].bool())
124 # record the has_top flag for the next FSM state
125 sync
+= has_top_r
.eq(has_top
)
127 # Copy write enable to IO out, copy address as well,
128 # LSB is set later based on HI/LO
129 sync
+= slave
.we
.eq(master
.we
)
130 sync
+= slave
.adr
.eq(Cat(0, master
.adr
))
132 # If we have a bottom word, handle it first, otherwise
133 # send the top word down. XXX Split the actual mux out
134 # and only generate a control signal.
136 with m
.If(master
.we
):
137 sync
+= slave
.dat_w
.eq(master
.dat_w
[:32])
138 sync
+= slave
.sel
.eq(master
.sel
[:4])
140 # Wait for ack on BOTTOM half
141 m
.next
= "WAIT_ACK_BOT"
144 with m
.If(master
.we
):
145 sync
+= slave
.dat_w
.eq(master
.dat_w
[32:])
146 sync
+= slave
.sel
.eq(master
.sel
[4:])
148 # Bump LSB of address
149 sync
+= slave
.adr
[0].eq(1)
151 # Wait for ack on TOP half
152 m
.next
= "WAIT_ACK_TOP"
155 with m
.State("WAIT_ACK_BOT"):
156 # If we aren't stalled by the device, clear stb
157 if hasattr(slave
, "stall"):
158 with m
.If(~slave
.stall
):
159 sync
+= slave
.stb
.eq(0)
162 with m
.If(slave
.ack
):
163 # If it's a read, latch the data
164 with m
.If(~slave
.we
):
165 sync
+= master
.dat_r
[:32].eq(slave
.dat_r
)
167 # Do we have a "top" part as well ?
168 with m
.If(has_top_r
):
170 with m
.If(master
.we
):
171 sync
+= slave
.dat_w
.eq(master
.dat_w
[32:])
172 sync
+= slave
.sel
.eq(master
.sel
[4:])
174 # Bump address and set STB
175 sync
+= slave
.adr
[0].eq(1)
176 sync
+= slave
.stb
.eq(1)
179 m
.next
= "WAIT_ACK_TOP"
182 # We are done, ack up, clear cyc downstram
183 sync
+= slave
.cyc
.eq(0)
184 sync
+= slave
.stb
.eq(0)
186 # And ack & unstall upstream
187 sync
+= master
.ack
.eq(1)
188 if hasattr(master
, "stall"):
189 sync
+= master
.stall
.eq(0)
194 with m
.State("WAIT_ACK_TOP"):
195 # If we aren't stalled by the device, clear stb
196 if hasattr(slave
, "stall"):
197 with m
.If(~slave
.stall
):
198 sync
+= slave
.stb
.eq(0)
201 with m
.If(slave
.ack
):
202 # If it's a read, latch the data
203 with m
.If(~slave
.we
):
204 sync
+= master
.dat_r
[32:].eq(slave
.dat_r
)
206 # We are done, ack up, clear cyc downstram
207 sync
+= slave
.cyc
.eq(0)
208 sync
+= slave
.stb
.eq(0)
210 # And ack & unstall upstream
211 sync
+= master
.ack
.eq(1)
212 if hasattr(master
, "stall"):
213 sync
+= master
.stall
.eq(0)
221 class DDR3SoC(SoC
, Elaboratable
):
222 def __init__(self
, *,
225 uart_pins
, spi_0_pins
,
226 ddr_pins
, ddrphy_addr
, dramcore_addr
, ddr_addr
,
229 spi0_addr
, spi0_cfg_addr
,
231 hyperram_pinset
=None,
235 # wishbone routing is as follows:
248 # +---decoder----+--------+---------+
250 # uart XICS CSRs DRAM XIP SPI
252 # set up wishbone bus arbiter and decoder. arbiter routes,
253 # decoder maps local-relative addressed satellites to global addresses
254 self
._arbiter
= wishbone
.Arbiter(addr_width
=30, data_width
=32,
256 features
={"cti", "bte", "stall"})
257 self
._decoder
= wishbone
.Decoder(addr_width
=30, data_width
=32,
259 features
={"cti", "bte", "stall"})
261 # default firmware name
263 firmware
= "firmware/main.bin"
265 # set up clock request generator
266 self
.crg
= ECPIX5CRG(clk_freq
)
268 # set up CPU, with 64-to-32-bit downconverters
270 self
.cpu
= ExternalCore(name
="ext_core")
271 cvtdbus
= wishbone
.Interface(addr_width
=30, data_width
=32,
272 granularity
=8, features
={'stall'})
273 cvtibus
= wishbone
.Interface(addr_width
=30, data_width
=32,
274 granularity
=8, features
={'stall'})
275 self
.dbusdowncvt
= WB64to32Convert(self
.cpu
.dbus
, cvtdbus
)
276 self
.ibusdowncvt
= WB64to32Convert(self
.cpu
.ibus
, cvtibus
)
277 self
._arbiter
.add(cvtibus
) # I-Cache Master
278 self
._arbiter
.add(cvtdbus
) # D-Cache Master. TODO JTAG master
279 self
.cvtibus
= cvtibus
280 self
.cvtdbus
= cvtdbus
282 # CPU interrupt controller
283 self
.intc
= GenericInterruptController(width
=len(self
.cpu
.irq
))
285 # SRAM (but actually a ROM, for firmware), at address 0x0
286 if fw_addr
is not None:
288 self
.bootmem
= SRAMPeripheral(size
=0x8000, data_width
=sram_width
,
290 if firmware
is not None:
291 with
open(firmware
, "rb") as f
:
292 words
= iter(lambda: f
.read(sram_width
// 8), b
'')
293 bios
= [int.from_bytes(w
, "little") for w
in words
]
294 self
.bootmem
.init
= bios
295 self
._decoder
.add(self
.bootmem
.bus
, addr
=fw_addr
) # ROM at fw_addr
297 # System Configuration info
298 self
.syscon
= MicrowattSYSCON(sys_clk_freq
=clk_freq
,
299 has_uart
=(uart_pins
is not None))
300 self
._decoder
.add(self
.syscon
.bus
, addr
=0xc0000000) # at 0xc000_0000
303 # SRAM (read-writeable BRAM)
304 self
.ram
= SRAMPeripheral(size
=4096)
305 self
._decoder
.add(self
.ram
.bus
, addr
=0x8000000) # at 0x8000_0000
307 # UART at 0xC000_2000, convert 32-bit bus down to 8-bit in an odd way
308 if uart_pins
is not None:
309 # sigh actual UART in microwatt is 8-bit
310 self
.uart
= UART16550(data_width
=8, pins
=uart_pins
,
312 # but (see soc.vhdl) 8-bit regs are addressed at 32-bit locations
313 cvtuartbus
= wishbone
.Interface(addr_width
=5, data_width
=32,
316 umap
= MemoryMap(addr_width
=7, data_width
=8, name
="uart_map")
317 cvtuartbus
.memory_map
= umap
318 self
._decoder
.add(cvtuartbus
, addr
=0xc0002000) # 16550 UART addr
319 self
.cvtuartbus
= cvtuartbus
321 # SDRAM module using opencores sdr_ctrl
323 class MT48LC16M16(SDRModule):
329 technology_timings = _TechnologyTimings(tREFI=64e6/8192,
333 speedgrade_timings = {"default": _SpeedgradeTimings(tRP=20,
342 if ddr_pins
is not None or fpga
== 'sim':
343 ddrmodule
= dram_cls(clk_freq
, "1:2") # match DDR3 ASIC P/N
346 drs
= DomainRenamer("dramsync")
349 self
.ddrphy
= FakePHY(module
=ddrmodule
,
350 settings
=sim_ddr3_settings(clk_freq
),
351 verbosity
=SDRAM_VERBOSE_DBG
,
354 self
.ddrphy
= drs(ECP5DDRPHY(ddr_pins
, sys_clk_freq
=clk_freq
))
355 self
._decoder
.add(self
.ddrphy
.bus
, addr
=ddrphy_addr
)
357 dramcore
= gramCore(phy
=self
.ddrphy
,
358 geom_settings
=ddrmodule
.geom_settings
,
359 timing_settings
=ddrmodule
.timing_settings
,
362 self
.dramcore
= dramcore
364 self
.dramcore
= drs(dramcore
)
365 self
._decoder
.add(self
.dramcore
.bus
, addr
=dramcore_addr
)
367 # map the DRAM onto Wishbone, XXX use stall but set classic below
368 drambone
= gramWishbone(dramcore
, features
={'stall'})
370 self
.drambone
= drambone
372 self
.drambone
= drs(drambone
)
373 self
._decoder
.add(self
.drambone
.bus
, addr
=ddr_addr
)
376 if spi_0_pins
is not None and fpga
in ['sim',
377 'rcs_arctic_tern_bmc_card']:
378 # The Lattice ECP5 devices require special handling on the
379 # dedicated SPI clock line, which is shared with the internal
380 # SPI controller used for FPGA bitstream loading.
381 spi0_is_lattice_ecp5_clk
= False
382 if platform
is not None and fpga
in ['versa_ecp5',
383 'rcs_arctic_tern_bmc_card',
385 spi0_is_lattice_ecp5_clk
= True
387 # Tercel contains two independent Wishbone regions, a
388 # configuration region and the direct API access region,
389 # Set the SPI 0 access region to 16MB, as the FPGA
390 # bitstream Flash device is unlikely to be larger than this.
391 # The main SPI Flash (SPI 1) should be set to at
392 # least 28 bits (256MB) to allow the use of large 4BA devices.
393 self
.spi0
= Tercel(data_width
=32, spi_region_addr_width
=24,
396 lattice_ecp5_usrmclk
=spi0_is_lattice_ecp5_clk
)
397 self
._decoder
.add(self
.spi0
.bus
, addr
=spi0_addr
)
398 self
._decoder
.add(self
.spi0
.cfg_bus
, addr
=spi0_cfg_addr
)
400 # HyperRAM modules *plural*. Assumes using a Quad PMOD by Piotr
401 # Esden, sold by 1bitsquared
403 self
.memory_map
= self
._decoder
.bus
.memory_map
405 self
.clk_freq
= clk_freq
407 def elaborate(self
, platform
):
411 # add the peripherals and clock-reset-generator
412 if platform
is not None:
413 m
.submodules
.sysclk
= self
.crg
415 if hasattr(self
, "bootmem"):
416 m
.submodules
.bootmem
= self
.bootmem
417 m
.submodules
.syscon
= self
.syscon
418 if hasattr(self
, "ram"):
419 m
.submodules
.ram
= self
.ram
420 if hasattr(self
, "uart"):
421 m
.submodules
.uart
= self
.uart
422 comb
+= self
.uart
.cts_i
.eq(1)
423 comb
+= self
.uart
.dsr_i
.eq(1)
424 comb
+= self
.uart
.ri_i
.eq(0)
425 comb
+= self
.uart
.dcd_i
.eq(1)
426 # sigh connect up the wishbone bus manually to deal with
427 # the mis-match on the data
428 uartbus
= self
.uart
.bus
429 comb
+= uartbus
.adr
.eq(self
.cvtuartbus
.adr
)
430 comb
+= uartbus
.stb
.eq(self
.cvtuartbus
.stb
)
431 comb
+= uartbus
.cyc
.eq(self
.cvtuartbus
.cyc
)
432 comb
+= uartbus
.sel
.eq(self
.cvtuartbus
.sel
)
433 comb
+= uartbus
.we
.eq(self
.cvtuartbus
.we
)
434 comb
+= uartbus
.dat_w
.eq(self
.cvtuartbus
.dat_w
) # drops 8..31
435 comb
+= self
.cvtuartbus
.dat_r
.eq(uartbus
.dat_r
) # drops 8..31
436 comb
+= self
.cvtuartbus
.ack
.eq(uartbus
.ack
)
437 # aaand with the WB4-pipeline-to-WB3-classic mismatch, sigh
438 comb
+= uartbus
.stall
.eq(uartbus
.cyc
& ~uartbus
.ack
)
439 comb
+= self
.cvtuartbus
.stall
.eq(uartbus
.stall
)
440 if hasattr(self
, "cpu"):
441 m
.submodules
.intc
= self
.intc
442 m
.submodules
.extcore
= self
.cpu
443 m
.submodules
.dbuscvt
= self
.dbusdowncvt
444 m
.submodules
.ibuscvt
= self
.ibusdowncvt
445 # create stall sigs, assume wishbone classic
446 #ibus, dbus = self.cvtibus, self.cvtdbus
447 #comb += ibus.stall.eq(ibus.stb & ~ibus.ack)
448 #comb += dbus.stall.eq(dbus.stb & ~dbus.ack)
450 m
.submodules
.arbiter
= self
._arbiter
451 m
.submodules
.decoder
= self
._decoder
452 if hasattr(self
, "ddrphy"):
453 m
.submodules
.ddrphy
= self
.ddrphy
454 m
.submodules
.dramcore
= self
.dramcore
455 m
.submodules
.drambone
= drambone
= self
.drambone
456 # grrr, same problem with drambone: not WB4-pipe compliant
457 comb
+= drambone
.bus
.stall
.eq(drambone
.bus
.cyc
& ~drambone
.bus
.ack
)
459 # add blinky lights so we know FPGA is alive
460 if platform
is not None:
461 m
.submodules
.blinky
= Blinky()
463 # connect the arbiter (of wishbone masters)
464 # to the decoder (addressing wishbone slaves)
465 comb
+= self
._arbiter
.bus
.connect(self
._decoder
.bus
)
467 if hasattr(self
, "cpu"):
468 # wire up the CPU interrupts
469 comb
+= self
.cpu
.irq
.eq(self
.intc
.ip
)
474 # add uart16550 verilog source. assumes a directory
475 # structure where ls2 has been checked out in a common
476 # subdirectory as https://github.com/freecores/uart16550
477 opencores_16550
= "../../uart16550/rtl/verilog"
478 pth
= os
.path
.split(__file__
)[0]
479 pth
= os
.path
.join(pth
, opencores_16550
)
480 fname
= os
.path
.abspath(pth
)
482 self
.uart
.add_verilog_source(fname
, platform
)
484 # add Tercel verilog source. assumes a directory
485 # structure where ls2 has been checked out in a common
486 # subdirectory as https://git.libre-soc.org/git/microwatt.git
487 raptor_tercel
= "../../microwatt/tercel"
488 pth
= os
.path
.split(__file__
)[0]
489 pth
= os
.path
.join(pth
, raptor_tercel
)
490 fname
= os
.path
.abspath(pth
)
492 self
.spi0
.add_verilog_source(fname
, platform
)
495 pth
= os
.path
.split(__file__
)[0]
496 pth
= os
.path
.join(pth
, '../external_core_top.v')
497 fname
= os
.path
.abspath(pth
)
498 with
open(fname
) as f
:
499 platform
.add_file(fname
, f
)
504 # puzzlingly the only IO ports needed are peripheral pins,
505 # and at the moment that's just UART tx/rx.
507 ports
+= [self
.uart
.tx_o
, self
.uart
.rx_i
]
508 if hasattr(self
, "ddrphy"):
509 if hasattr(self
.ddrphy
, "pads"): # real PHY
510 ports
+= list(self
.ddrphy
.pads
.fields
.values())
511 else: # FakePHY, get at the dfii pads, stops deletion of nets
512 for phase
in self
.dramcore
.dfii
.master
.phases
:
513 print ("dfi master", phase
)
514 ports
+= list(phase
.fields
.values())
515 for phase
in self
.dramcore
.dfii
.slave
.phases
:
516 print ("dfi master", phase
)
517 ports
+= list(phase
.fields
.values())
518 for phase
in self
.dramcore
.dfii
._inti
.phases
:
519 print ("dfi master", phase
)
520 ports
+= list(phase
.fields
.values())
521 ports
+= [ClockSignal(), ResetSignal()]
524 if __name__
== "__main__":
526 # create a platform selected from the toolchain. defaults to VERSA_ECP5
527 # only VERSA_ECP5 will work for now because of the DDR3 module
529 if len(sys
.argv
) >= 2:
531 platform_kls
= {'versa_ecp5': VersaECP5Platform
,
532 'ulx3s': ULX3S_85F_Platform
,
533 'arty_a7': ArtyA7_100Platform
,
534 'isim': IcarusVersaPlatform
,
537 toolchain
= {'arty_a7': "yosys_nextpnr",
538 'versa_ecp5': 'Trellis',
543 dram_cls
= {'arty_a7': None,
544 'versa_ecp5': MT41K64M16
,
545 #'versa_ecp5': MT41K256M16,
550 if platform_kls
is not None:
551 platform
= platform_kls(toolchain
=toolchain
)
555 # set clock frequency
559 if fpga
== 'versa_ecp5':
562 # select a firmware file
565 if len(sys
.argv
) >= 3:
566 firmware
= sys
.argv
[2]
567 fw_addr
= 0x0000_0000
569 # get UART resource pins
570 if platform
is not None:
571 uart_pins
= platform
.request("uart", 0)
573 uart_pins
= Record([('tx', 1), ('rx', 1)], name
="uart_0")
575 # get DDR resource pins
577 if platform
is not None and fpga
in ['versa_ecp5', 'arty_a7', 'isim']:
578 ddr_pins
= platform
.request("ddr3", 0,
579 dir={"dq":"-", "dqs":"-"},
580 xdr
={"rst": 4, "clk":4, "a":4,
582 "odt":4, "ras":4, "cas":4, "we":4,
585 # Get SPI resource pins
587 if platform
is not None and fpga
in ['rcs_arctic_tern_bmc_card']:
588 if toolchain
== 'Trellis':
589 # The ECP5 series FPGAs handle the SPI clock directly on
590 # the FPGA configuration Flash device
591 spi_0_pins
= platform
.request("spi_0", 0,
592 dir={"dq":"io", "cs_n":"o"},
593 xdr
={"dq": 1, "cs_n": 1})
595 spi_0_pins
= platform
.request("spi_0", 0,
596 dir={"dq":"io", "cs_n":"o", "clk":"o"},
597 xdr
={"dq": 1, "cs_n": 1, "clk": 0})
599 # Get HyperRAM pinsets, there are multiple of these!
600 hyperram_pinset
= None
601 if platform
is not None and fpga
in ['versa_ecp5']:
602 hyperram_ios
= HyperRAMResources(cs_n
="B1",
603 dq
="D0 D1 D2 D3 D4 D7 D6 D7",
604 rwds
="B2", rst_n
="B3", clk_p
="B4",
605 attrs
=IOStandard("LVCMOS33"))
606 self
.platform
.add_extension(hyperram_ios
)
607 hyperram_pinset
= self
.platform
.request("hyperram")
610 soc
= DDR3SoC(fpga
=fpga
, dram_cls
=dram_cls
,
611 # check microwatt_soc.h for these
612 ddrphy_addr
=0xff000000, # DRAM_INIT_BASE firmware base
613 dramcore_addr
=0xc8000000, # DRAM_CTRL_BASE
614 ddr_addr
=0x40000000, # DRAM_BASE
615 spi0_addr
=0x10000000, # SPI0_BASE
616 spi0_cfg_addr
=0xc0003000, # SPI0_CTRL_BASE
617 hyperram_addr
=0xa0000000, # HYPERRAM_BASE
622 spi_0_pins
=spi_0_pins
,
623 hyperram_pinset
=hyperram_pinset
,
628 if toolchain
== 'Trellis':
629 # add -abc9 option to yosys synth_ecp5
630 #os.environ['NMIGEN_synth_opts'] = '-abc9 -nowidelut'
631 #os.environ['NMIGEN_synth_opts'] = '-abc9'
632 os
.environ
['NMIGEN_synth_opts'] = '-nowidelut'
634 if platform
is not None:
635 # build and upload it
637 platform
.build(soc
, do_program
=False,
638 do_build
=True, build_dir
="build_simsoc")
640 platform
.build(soc
, do_program
=True)
642 # for now, generate verilog
643 vl
= verilog
.convert(soc
, ports
=soc
.ports())
644 with
open("ls2.v", "w") as f
: