use platform.add_extension to first define the pins:
from nmigen.resources.memory import HyperRAMResources
- hyperram_ios = HyperRAMResources(cs_n="B1",
+ hyperram_ios = HyperRAMResources(cs="B1", # or cs="C0 C1 C2 C3" for Quad
dq="D0 D1 D2 D3 D4 D7 D6 D7",
- rwds="B2", rst_n="B3", clk_p="B4",
- attrs=IOStandard("LVCMOS33"))
- self.platform.add_extension(hyperram_ios)
+ rwds="B2", rst_n="B3", ck_p="B4",
+ attrs=Attrs(IOSTANDARD="LVCMOS33"))
+ self.platform.add_resources(hyperram_ios)
io = self.platform.request("hyperram")
+and then declare the instance using those pins:
+
+ hyperram = HyperRAM(io=io, phy_kls=HyperRAMPHY,
+ latency=7) # Winbond W956D8MBYA
+ # latency=6 for Cypress S27KL0641DABHI020
+
this trick will work with the 1-IC HyperRAM PMOD by Piotr Esden, sold
-by 1bitsquared. however for the *four* IC HyperRAM PMOD, *four*
-separate and distinct instances are needed, each with a different
-cs_n pin. on the TODO list for this module: interleave multiple HyperRAM
-cs_n's to give striped (like RAID) memory accesses behind one single
-Wishbone interface.
+by 1bitsquared. however for the *four* IC HyperRAM PMOD, *four* cs_n pins
+are needed. These are then used to select, in turn, each IC, sequentially:
+ * Access to 0x00000-0xfffff will activate CS0n,
+ * Access to 0x100000-0x1fffff will activate CS1n,
+ * Access to 0x200000-0x2fffff will activate CS2n,
+ * Access to 0x300000-0x3fffff will activate CS3n
+
+TODO: interleave multiple HyperRAM cs_n's to give striped (like RAID)
+memory accesses behind one single Wishbone interface.
+TODO: investigate whether HyperBUS can do CSn-striping in hardware
+(it should do, but this will require configuration registers to be written)
"""
class HyperRAMASICPhy(Elaboratable):
def __init__(self, io):
self.io = io
- self.clk = clk = Signal()
- self.cs = cs = Signal()
+ self.ck = ck = Signal()
+ self.cs = cs = Signal(len(self.io.cs_n))
+ self.rst_n = rst_n = Signal()
self.dq_o = dq_o = Signal(8)
self.dq_i = dq_i = Signal(8)
def elaborate(self, platform):
m = Module()
comb = m.d.comb
- clk, cs = self.clk, self.cs
+ ck, cs, rst_n = self.ck, self.cs, self.rst_n
dq_o, dq_i, dq_oe = self.dq_o, self.dq_i, self.dq_oe
rwds_o, rwds_oe = self.rwds_o, self.rwds_oe
comb += [
self.io["rwds_o"].eq(rwds_o),
- self.io["csn_o"].eq(~cs),
+ self.io["cs_n"].eq(~cs),
self.io["csn_oe"].eq(0),
- self.io["clk_o"].eq(clk),
- self.io["clk_oe"].eq(0),
+ self.io["ck_o"].eq(ck),
+ self.io["ck_oe"].eq(0),
self.io["rwds_oe"].eq(~rwds_oe),
+ self.io["rst_n"].eq(rst_n),
]
for i in range(8):
# dut = HyperRAM(io=HyperRamPads(), phy_kls=TestHyperRAMPHY)
class HyperRAMPads:
- def __init__(self, dw=8):
- self.clk = Signal()
- self.cs_n = Signal()
+ def __init__(self, dw=8, n_cs=1):
+ self.rst_n = Signal()
+ self.ck = Signal()
+ self.cs_n = Signal(n_cs)
self.dq = Record([("oe", 1), ("o", dw), ("i", dw)])
self.rwds = Record([("oe", 1), ("o", dw//8), ("i", dw//8)])
+ self.dq.o.name = "dq_o"
+ self.dq.i.name = "dq_i"
+ self.dq.oe.name = "dq_oe"
+ self.rwds.o.name = "rwds_o"
+ self.rwds.i.name = "rwds_i"
+ self.rwds.oe.name = "rwds_oe"
+ def ports(self):
+ return [self.ck, self.cs_n, self.dq.o, self.dq.i, self.dq.oe,
+ self.rwds.o, self.rwds.oe, self.rst_n]
-class TestHyperRAMPHY(Elaboratable):
+
+class HyperRAMPHY(Elaboratable):
def __init__(self, pads):
self.pads = pads
- self.clk = pads.clk
- self.cs = Signal()
+ self.ck = pads.ck
+ self.cs = Signal(len(self.pads.cs_n))
+ self.rst_n = pads.rst_n
self.dq_o = pads.dq.o
self.dq_i = pads.dq.i
self.dq_oe = pads.dq.oe
def elaborate(self, platform):
m = Module()
- m.d.comb += self.pads.cs_n.eq(~self.cs)
+ m.d.comb += self.pads.cs_n.eq(self.cs)
m.d.comb += self.pads.rwds.oe.eq(self.rwds_oe)
return m
+ def ports(self):
+ return self.pads.ports()
+
# HyperRAM --------------------------------------------------------------------
- no setup/chip configuration (use default latency).
This core favors portability and ease of use over performance.
+ Tested: Winbond W956D8MBYA latency=7
+ Cypress S27KL0641DABHI020 requires latency=6
"""
- def __init__(self, *, io, phy_kls, latency=6):
- super().__init__()
+ def __init__(self, *, io, phy_kls,
+ name=None,
+ latency=6,
+ addr_width=23, # 8 GBytes, per IC
+ bus=None, features=frozenset()):
+ super().__init__(name=name)
+ self.n_cs = n_cs = len(io.cs_n)
+ self.cs_bits = cs_bits = n_cs.bit_length()-1
self.io = io
self.phy = phy_kls(io)
self.latency = latency
- self.bus = wishbone.Interface(addr_width=21,
- data_width=32, granularity=8)
- mmap = MemoryMap(addr_width=23, data_width=8)
- mmap.add_resource(object(), name="hyperram", size=2**23)
+ # per IC, times n_cs
+ addr_width += cs_bits
+ self.bus = wishbone.Interface(addr_width=addr_width-2,
+ data_width=32, granularity=8,
+ features=features)
+ self.size = 2**addr_width
+ mmap = MemoryMap(addr_width=addr_width, data_width=8)
+ if name is None:
+ name = "hyperram"
+ mmap.add_resource(object(), name=name, size=self.size)
self.bus.memory_map = mmap
- self.size = 2**23
# # #
def elaborate(self, platform):
m = Module()
m.submodules.phy = self.phy
bus = self.bus
+ cs_bits = self.cs_bits
comb, sync = m.d.comb, m.d.sync
- clk = self.phy.clk
+ ck = self.phy.ck
clk_phase = Signal(2)
+ ck_active = Signal()
cs = self.phy.cs
ca = Signal(48)
ca_active = Signal()
rwds_o = self.phy.rwds_o
rwds_oe = self.phy.rwds_oe
+ # chip&address selection: use the MSBs of the address for chip-select
+ # (bus_adr_hi) by doing "1<<bus_adr_hi". this has to be captured
+ # (cs_latch) and asserted as part of bus_latch. therefore *before*
+ # that happens (SEND-COMMAND-ADDRESS and WAIT-STATE) cs has to be
+ # set to the "unlatched" version.
+ bus_adr_lo = self.bus.adr[:-cs_bits]
+ if cs_bits != 0:
+ bus_adr_hi = self.bus.adr[-cs_bits:]
+ else:
+ bus_adr_hi = 0
+
# Clock Generation (sys_clk/4) -----------------------------------
+ # this is a cheap-and-cheerful way to create phase-offsetted DDR:
+ # simply divide the main clock into 4 phases. it does mean that
+ # the HyperRAM IC is being run at 1/4 rate. sigh.
sync += clk_phase.eq(clk_phase + 1)
with m.Switch(clk_phase):
with m.Case(1):
- sync += clk.eq(cs)
+ sync += ck.eq(ck_active)
with m.Case(3):
- sync += clk.eq(0)
+ sync += ck.eq(0)
# Data Shift Register (for write and read) ------------------------
dqi = Signal(dw)
ashift = {8:1, 16:0}[dw]
la = 3-ashift
comb += [
- ca[47].eq(~self.bus.we), # R/W#
- ca[45].eq(1), # Burst Type (Linear)
- ca[16:45].eq(self.bus.adr[la:]), # Row & Upper Column Address
- ca[1:3].eq(self.bus.adr[0:2]), # Lower Column Address
- ca[ashift:3].eq(bus.adr), # Lower Column Address
+ ca[47].eq(~self.bus.we), # R/W#
+ ca[45].eq(1), # Burst Type (Linear)
+ ca[16:45].eq(bus_adr_lo[la:]), # Row & Upper Column Address
+ ca[ashift:3].eq(bus_adr_lo), # Lower Column Address
]
# Latency count starts from the middle of the command (thus the -4).
bus_we = Signal()
bus_sel = Signal(4)
bus_latch = Signal()
+ cs_latch = Signal.like(cs)
with m.If(bus_latch):
with m.If(bus.we):
sync += sr.eq(Cat(Const(0, 16), bus.dat_w))
sync += [ bus_we.eq(bus.we),
bus_sel.eq(bus.sel),
- bus_adr.eq(bus.adr)
+ bus_adr.eq(bus_adr_lo),
+ cs_latch.eq(cs)
]
-
-
# Sequencer -------------------------------------------------------
cycles = Signal(8)
first = Signal()
+ nfirst = Signal() # not-first
count_inc = Signal()
dbg_cyc = Signal(8)
+ comb += nfirst.eq(~first) # convenience
# when not idle run a cycles counter
with m.If(count_inc):
with m.State("SEND-COMMAND-ADDRESS"):
sync += cycles.eq(cycles+1)
- comb += cs.eq(1) # Set CSn.
+ comb += cs.eq(1<<bus_adr_hi) # Set CSn direct (not via latch)
+ comb += ck_active.eq(1) # Activate clock
comb += ca_active.eq(1) # Send Command on DQ.
comb += dq_oe.eq(1), # Wait for 6*2 cycles...
with m.If(cycles == (6*2 - 1)):
with m.State("WAIT-LATENCY"):
sync += cycles.eq(cycles+1)
- comb += cs.eq(1) # Set CSn.
+ comb += cs.eq(1<<bus_adr_hi) # Set CSn directly (not via latch)
+ comb += ck_active.eq(1) # Activate clock
# Wait for Latency cycles...
with m.If(cycles == (latency_cycles - 1)):
- comb += bus_latch.eq(1) # Latch Bus.
+ comb += bus_latch.eq(1) # Latch Bus (and cs)
# Early Write Ack (to allow bursting).
comb += bus.ack.eq(bus.we)
m.next = "READ-WRITE-DATA0"
sync += cycles.eq(0)
+ # for-loop creates multple READ-WRITE-DATA states, to send/get
+ # dw bits at a time.
states = {8:4, 16:2}[dw]
for n in range(states):
with m.State("READ-WRITE-DATA%d" % n):
sync += cycles.eq(cycles+1)
- comb += cs.eq(1), # Set CSn.
+ comb += cs.eq(cs_latch), # *now* set CSn from Latch
+ comb += ck_active.eq(1) # Activate clock
# Send Data on DQ/RWDS (for write).
with m.If(bus_we):
comb += dq_oe.eq(1)
sync += cycles.eq(0)
# On last state, see if we can continue the burst
# or if we should end it.
- with m.If(n == (states - 1)):
+ if n == states - 1:
sync += first.eq(0)
# Continue burst when consecutive access ready.
with m.If(bus.stb & bus.cyc &
(bus.we == bus_we) &
- (bus.adr == (bus_adr + 1))):
- comb += bus_latch.eq(1), # Latch Bus.
+ (bus_adr_lo == (bus_adr + 1)) &
+ ((1<<bus_adr_hi) == cs_latch)):
+ comb += bus_latch.eq(1), # Latch Bus (and cs)
# Early Write Ack (to allow bursting).
comb += bus.ack.eq(bus.we)
# Else end the burst.
- with m.Elif(bus_we | ~first):
+ with m.Elif(bus_we | nfirst):
m.next = "IDLE"
- sync += cycles.eq(0)
+ sync += cycles.eq(0) # reset to start
+ sync += cs_latch.eq(0) # helps debugging
# Read Ack (when dat_r ready).
- with m.If((n == 0) & ~first):
- comb += bus.ack.eq(~bus_we)
+ if n == 0:
+ comb += bus.ack.eq(nfirst & ~bus_we)
return m
if __name__ == '__main__':
layout=[('rwds_o', 1), ('rwds_oe', 1),
- ('csn_o', 1), ('csn_oe', 1),
- ('clk_o', 1), ('clk_oe', 1)]
+ ('cs_n', 1), ('csn_oe', 1),
+ ('ck_o', 1), ('ck_oe', 1),
+ ('rst_n', 1)]
for i in range(8):
layout += [('d%d_o' % i, 1), ('d%d_oe' % i, 1), ('d%d_i' % i, 1)]
io = Record(layout=layout)