lambdasoc/periph/hyperram.py

   1 # Basic Implementation of HyperRAM
   2 #
   3 # Copyright (c) 2019 Antti Lukats <antti.lukats@gmail.com>
   4 # Copyright (c) 2019 Florent Kermarrec <florent@enjoy-digital.fr>
   5 # Copyright (c) 2021 gatecat <gatecat@ds0.me> [nmigen-soc port]
   6 # Copyright (C) 2022 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
   7 #
   8 # Code from Lukats, Kermarrec and gatecat is Licensed BSD-2-Clause
   9 #
  10 # Modifications for the Libre-SOC Project funded by NLnet and NGI POINTER
  11 # under EU Grants 871528 and 957073, and Licensed under the LGPLv3+ License
  12
  13 """
  14 Usage example when wiring up an external pmod.
  15 (thanks to daveshah for this tip)
  16 use platform.add_extension to first define the pins:
  17
  18     from nmigen.resources.memory import HyperRAMResources
  19     hyperram_ios = HyperRAMResources(cs="B1", # or cs="C0 C1 C2 C3" for Quad
  20                                      dq="D0 D1 D2 D3 D4 D7 D6 D7",
  21                                      rwds="B2", rst_n="B3", ck_p="B4",
  22                                      attrs=Attrs(IOSTANDARD="LVCMOS33"))
  23     self.platform.add_resources(hyperram_ios)
  24     io = self.platform.request("hyperram")
  25
  26 and then declare the instance using those pins:
  27
  28     hyperram = HyperRAM(io=io, phy_kls=HyperRAMPHY,
  29                     latency=7) # Winbond W956D8MBYA
  30                                # latency=6 for Cypress S27KL0641DABHI020
  31
  32 this trick will work with the 1-IC HyperRAM PMOD by Piotr Esden, sold
  33 by 1bitsquared.  however for the *four* IC HyperRAM PMOD, *four* cs_n pins
  34 are needed. These are then used to select, in turn, each IC, sequentially:
  35     * Access to 0x00000-0xfffff will activate CS0n,
  36     * Access to 0x100000-0x1fffff will activate CS1n,
  37     * Access to 0x200000-0x2fffff will activate CS2n,
  38     * Access to 0x300000-0x3fffff will activate CS3n
  39
  40 TODO: interleave multiple HyperRAM cs_n's to give striped (like RAID)
  41 memory accesses behind one single Wishbone interface.
  42 TODO: investigate whether HyperBUS can do CSn-striping in hardware
  43 (it should do, but this will require configuration registers to be written)
  44 """
  45
  46
  47 from nmigen import (Elaboratable, Module, Signal, Record, Cat, Const)
  48 from nmigen.cli import rtlil
  49
  50 from nmigen_soc import wishbone
  51 from nmigen_soc.memory import MemoryMap
  52 from lambdasoc.periph import Peripheral
  53
  54
  55 # HyperRAM ASIC PHY -----------------------------------------------------------
  56
  57 class HyperRAMASICPhy(Elaboratable):
  58     def __init__(self, io):
  59         self.io = io
  60         self.ck = ck = Signal()
  61         self.cs  = cs = Signal(len(self.io.cs_n))
  62         self.rst_n = rst_n = Signal()
  63
  64         self.dq_o  = dq_o  = Signal(8)
  65         self.dq_i  = dq_i  = Signal(8)
  66         self.dq_oe = dq_oe = Signal()
  67
  68         self.rwds_o  = rwds_o  = Signal.like(self.io["rwds_o"])
  69         self.rwds_oe = rwds_oe = Signal()
  70
  71     def elaborate(self, platform):
  72         m = Module()
  73         comb = m.d.comb
  74         ck, cs, rst_n = self.ck, self.cs, self.rst_n
  75         dq_o, dq_i, dq_oe = self.dq_o, self.dq_i, self.dq_oe
  76         rwds_o, rwds_oe = self.rwds_o, self.rwds_oe
  77
  78         comb += [
  79             self.io["rwds_o"].eq(rwds_o),
  80             self.io["cs_n"].eq(~cs),
  81             self.io["csn_oe"].eq(0),
  82             self.io["ck_o"].eq(ck),
  83             self.io["ck_oe"].eq(0),
  84             self.io["rwds_oe"].eq(~rwds_oe),
  85             self.io["rst_n"].eq(rst_n),
  86         ]
  87
  88         for i in range(8):
  89             comb += [
  90                 self.io[f"d{i}_o"].eq(dq_o[i]),
  91                 self.io[f"d{i}_oe"].eq(~dq_oe),
  92                 dq_i[i].eq(self.io[f"d{i}_i"])
  93             ]
  94
  95         return m
  96
  97     def ports(self):
  98         return list(self.io.fields.values())
  99
 100
 101 # HyperRAM pads class (PHY) which can be used for testing and simulation
 102 # (without needing a platform instance). use as:
 103 #   dut = HyperRAM(io=HyperRamPads(), phy_kls=TestHyperRAMPHY)
 104
 105 class HyperRAMPads:
 106     def __init__(self, dw=8, n_cs=1):
 107         self.rst_n = Signal()
 108         self.ck  = Signal()
 109         self.cs_n = Signal(n_cs)
 110         self.dq   = Record([("oe", 1), ("o", dw),     ("i", dw)])
 111         self.rwds = Record([("oe", 1), ("o", dw//8),  ("i", dw//8)])
 112         self.dq.o.name = "dq_o"
 113         self.dq.i.name = "dq_i"
 114         self.dq.oe.name = "dq_oe"
 115         self.rwds.o.name = "rwds_o"
 116         self.rwds.i.name = "rwds_i"
 117         self.rwds.oe.name = "rwds_oe"
 118
 119     def ports(self):
 120         return [self.ck, self.cs_n, self.dq.o, self.dq.i, self.dq.oe,
 121                 self.rwds.o, self.rwds.oe, self.rst_n]
 122
 123
 124 class HyperRAMPHY(Elaboratable):
 125     def __init__(self, pads):
 126         self.pads = pads
 127         self.ck = pads.ck
 128         self.cs = Signal(len(self.pads.cs_n))
 129         self.rst_n = pads.rst_n
 130         self.dq_o = pads.dq.o
 131         self.dq_i = pads.dq.i
 132         self.dq_oe = pads.dq.oe
 133         self.rwds_o = pads.rwds.o
 134         self.rwds_oe = Signal()
 135
 136     def elaborate(self, platform):
 137         m = Module()
 138         m.d.comb += self.pads.cs_n.eq(self.cs)
 139         m.d.comb += self.pads.rwds.oe.eq(self.rwds_oe)
 140         return m
 141
 142     def ports(self):
 143         return self.pads.ports()
 144
 145
 146 # HyperRAM --------------------------------------------------------------------
 147
 148 class HyperRAM(Peripheral, Elaboratable):
 149     """HyperRAM
 150
 151     Provides a very simple/minimal HyperRAM core that should work with all
 152     FPGA/HyperRam chips:
 153     - FPGA vendor agnostic.
 154     - no setup/chip configuration (use default latency).
 155
 156     This core favors portability and ease of use over performance.
 157     Tested: Winbond W956D8MBYA latency=7
 158     Cypress S27KL0641DABHI020 requires latency=6
 159     """
 160     def __init__(self, *, io, phy_kls,
 161                           name=None,
 162                           latency=6,
 163                           addr_width=23, # 8 GBytes, per IC
 164                           bus=None, features=frozenset()):
 165         super().__init__(name=name)
 166         self.n_cs = n_cs = len(io.cs_n)
 167         self.cs_bits = cs_bits = n_cs.bit_length()-1
 168         self.io = io
 169         self.phy = phy_kls(io)
 170         self.latency = latency
 171         # per IC, times n_cs
 172         addr_width += cs_bits
 173         self.bus = wishbone.Interface(addr_width=addr_width-2,
 174                                       data_width=32, granularity=8,
 175                                       features=features)
 176         self.size = 2**addr_width
 177         mmap = MemoryMap(addr_width=addr_width, data_width=8)
 178         if name is None:
 179             name = "hyperram"
 180         mmap.add_resource(object(), name=name, size=self.size)
 181         self.bus.memory_map = mmap
 182         # # #
 183
 184     def elaborate(self, platform):
 185         m = Module()
 186         m.submodules.phy = self.phy
 187         bus = self.bus
 188         cs_bits = self.cs_bits
 189         comb, sync = m.d.comb, m.d.sync
 190
 191         ck       = self.phy.ck
 192         clk_phase = Signal(2)
 193         ck_active = Signal()
 194         cs        = self.phy.cs
 195         ca        = Signal(48)
 196         ca_active = Signal()
 197         sr        = Signal(48)
 198         sr_new    = Signal(48)
 199
 200         dq_o = self.phy.dq_o
 201         dq_i = self.phy.dq_i
 202         dq_oe = self.phy.dq_oe
 203         dw = len(dq_o) # data width
 204
 205         rwds_o = self.phy.rwds_o
 206         rwds_oe = self.phy.rwds_oe
 207
 208         # chip&address selection: use the MSBs of the address for chip-select
 209         # (bus_adr_hi) by doing "1<<bus_adr_hi". this has to be captured
 210         # (cs_latch) and asserted as part of bus_latch.  therefore *before*
 211         # that happens (SEND-COMMAND-ADDRESS and WAIT-STATE) cs has to be
 212         # set to the "unlatched" version.
 213         bus_adr_lo = self.bus.adr[:-cs_bits]
 214         if cs_bits != 0:
 215             bus_adr_hi = self.bus.adr[-cs_bits:]
 216         else:
 217             bus_adr_hi = 0
 218
 219         # Clock Generation (sys_clk/4) -----------------------------------
 220         # this is a cheap-and-cheerful way to create phase-offsetted DDR:
 221         # simply divide the main clock into 4 phases.  it does mean that
 222         # the HyperRAM IC is being run at 1/4 rate. sigh.
 223         sync += clk_phase.eq(clk_phase + 1)
 224         with m.Switch(clk_phase):
 225             with m.Case(1):
 226                 sync += ck.eq(ck_active)
 227             with m.Case(3):
 228                 sync += ck.eq(0)
 229
 230         # Data Shift Register (for write and read) ------------------------
 231         dqi = Signal(dw)
 232         sync += dqi.eq(dq_i) # Sample on 90° and 270°
 233         with m.If(ca_active):
 234             comb += sr_new.eq(Cat(dqi[:8], sr[:-dw]))
 235         with m.Else():
 236             comb += sr_new.eq(Cat(dqi, sr[:-8]))
 237         with m.If(~clk_phase[0]):
 238             sync += sr.eq(sr_new) # Shift on 0° and 180°
 239
 240         # Data shift-out register ----------------------------------------
 241         comb += self.bus.dat_r.eq(sr_new), # To Wisbone
 242         with m.If(dq_oe):
 243             comb += dq_o.eq(sr[-dw:]), # To HyperRAM
 244         with m.If(dq_oe & ca_active):
 245             comb += dq_o.eq(sr[-8:]), # To HyperRAM, Only 8-bit during CMD/Addr.
 246
 247         # Command generation ----------------------------------------------
 248         ashift = {8:1, 16:0}[dw]
 249         la = 3-ashift
 250         comb += [
 251             ca[47].eq(~self.bus.we),     # R/W#
 252             ca[45].eq(1),                # Burst Type (Linear)
 253             ca[16:45].eq(bus_adr_lo[la:]),  # Row & Upper Column Address
 254             ca[ashift:3].eq(bus_adr_lo),    # Lower Column Address
 255         ]
 256
 257         # Latency count starts from the middle of the command (thus the -4).
 258         # In fixed latency mode (default), latency is 2 x Latency count.
 259         # We have 4 x sys_clk per RAM clock:
 260         latency_cycles = (self.latency * 2 * 4) - 4
 261
 262         # Bus Latch ----------------------------------------------------
 263         bus_adr   = Signal(32)
 264         bus_we    = Signal()
 265         bus_sel   = Signal(4)
 266         bus_latch = Signal()
 267         cs_latch  = Signal.like(cs)
 268         with m.If(bus_latch):
 269             with m.If(bus.we):
 270                 sync += sr.eq(Cat(Const(0, 16), bus.dat_w))
 271             sync += [ bus_we.eq(bus.we),
 272                       bus_sel.eq(bus.sel),
 273                       bus_adr.eq(bus_adr_lo),
 274                       cs_latch.eq(cs)
 275                     ]
 276
 277         # Sequencer -------------------------------------------------------
 278         cycles = Signal(8)
 279         first  = Signal()
 280         nfirst  = Signal() # not-first
 281         count_inc = Signal()
 282         dbg_cyc = Signal(8)
 283         comb += nfirst.eq(~first) # convenience
 284
 285         # when not idle run a cycles counter
 286         with m.If(count_inc):
 287             sync += dbg_cyc.eq(dbg_cyc+1)
 288         with m.Else():
 289             sync += dbg_cyc.eq(0)
 290
 291         # Main FSM
 292         with m.FSM() as fsm:
 293             comb += count_inc.eq(~fsm.ongoing("IDLE"))
 294             with m.State("IDLE"):
 295                 sync += first.eq(1)
 296                 with m.If(bus.cyc & bus.stb & (clk_phase == 0)):
 297                     sync += sr.eq(ca)
 298                     m.next = "SEND-COMMAND-ADDRESS"
 299                     sync += cycles.eq(0)
 300
 301             with m.State("SEND-COMMAND-ADDRESS"):
 302                 sync += cycles.eq(cycles+1)
 303                 comb += cs.eq(1<<bus_adr_hi) # Set CSn direct (not via latch)
 304                 comb += ck_active.eq(1) # Activate clock
 305                 comb += ca_active.eq(1) # Send Command on DQ.
 306                 comb += dq_oe.eq(1),    # Wait for 6*2 cycles...
 307                 with m.If(cycles == (6*2 - 1)):
 308                     m.next = "WAIT-LATENCY"
 309                     sync += cycles.eq(0)
 310
 311             with m.State("WAIT-LATENCY"):
 312                 sync += cycles.eq(cycles+1)
 313                 comb += cs.eq(1<<bus_adr_hi) # Set CSn directly (not via latch)
 314                 comb += ck_active.eq(1) # Activate clock
 315                 # Wait for Latency cycles...
 316                 with m.If(cycles == (latency_cycles - 1)):
 317                     comb += bus_latch.eq(1) # Latch Bus (and cs)
 318                     # Early Write Ack (to allow bursting).
 319                     comb += bus.ack.eq(bus.we)
 320                     m.next = "READ-WRITE-DATA0"
 321                     sync += cycles.eq(0)
 322
 323             # for-loop creates multple READ-WRITE-DATA states, to send/get
 324             # dw bits at a time.
 325             states = {8:4, 16:2}[dw]
 326             for n in range(states):
 327                 with m.State("READ-WRITE-DATA%d" % n):
 328                     sync += cycles.eq(cycles+1)
 329                     comb += cs.eq(cs_latch), # *now* set CSn from Latch
 330                     comb += ck_active.eq(1) # Activate clock
 331                     # Send Data on DQ/RWDS (for write).
 332                     with m.If(bus_we):
 333                         comb += dq_oe.eq(1)
 334                         comb += rwds_oe.eq(1)
 335                         for i in range(dw//8):
 336                             seli = ~bus_sel[4-1-n*dw//8-i]
 337                             comb += rwds_o[dw//8-1-i].eq(seli)
 338                     # Wait for 2 cycles (since HyperRAM's Clk = sys_clk/4).
 339                     with m.If(cycles == (2 - 1)):
 340                         # Set next default state (with rollover for bursts).
 341                         m.next = "READ-WRITE-DATA%d" % ((n + 1) % states)
 342                         sync += cycles.eq(0)
 343                         # On last state, see if we can continue the burst
 344                         # or if we should end it.
 345                         if n == states - 1:
 346                             sync += first.eq(0)
 347                             # Continue burst when consecutive access ready.
 348                             with m.If(bus.stb & bus.cyc &
 349                                       (bus.we == bus_we) &
 350                                       (bus_adr_lo == (bus_adr + 1)) &
 351                                       ((1<<bus_adr_hi) == cs_latch)):
 352                                 comb += bus_latch.eq(1), # Latch Bus (and cs)
 353                                 # Early Write Ack (to allow bursting).
 354                                 comb += bus.ack.eq(bus.we)
 355                             # Else end the burst.
 356                             with m.Elif(bus_we | nfirst):
 357                                 m.next = "IDLE"
 358                                 sync += cycles.eq(0)   # reset to start
 359                                 sync += cs_latch.eq(0) # helps debugging
 360                         # Read Ack (when dat_r ready).
 361                         if n == 0:
 362                             comb += bus.ack.eq(nfirst & ~bus_we)
 363
 364         return m
 365
 366     def ports(self):
 367         return self.phy.ports() + list(self.bus.fields.values())
 368
 369
 370 if __name__ == '__main__':
 371     layout=[('rwds_o', 1), ('rwds_oe', 1),
 372             ('cs_n', 1), ('csn_oe', 1),
 373             ('ck_o', 1), ('ck_oe', 1),
 374             ('rst_n', 1)]
 375     for i in range(8):
 376         layout += [('d%d_o' % i, 1), ('d%d_oe' % i, 1), ('d%d_i' % i, 1)]
 377     io = Record(layout=layout)
 378     dut = HyperRAM(io=io, phy_kls=HyperRAMASICPhy)
 379     vl = rtlil.convert(dut, ports=dut.ports())
 380     with open("test_hyperram.il", "w") as f:
 381         f.write(vl)
 382