-# This stage is intended to do most of the work of executing multiply
-# instructions, as well as carry and overflow generation. This module
-# however should not gate the carry or overflow, that's up to the
-# output stage
-from nmigen import (Module, Signal, Cat, Repl, Mux, Const)
+# This stage is intended to do the main work of an actual multiply
+
+from nmigen import Module
from nmutil.pipemodbase import PipeModBase
-from soc.fu.alu.pipe_data import ALUOutputData
-from soc.fu.mul.pipe_data import MulInputData
+from soc.fu.mul.pipe_data import MulIntermediateData, MulOutputData
from ieee754.part.partsig import PartitionedSignal
-from soc.decoder.power_enums import InternalOp
-from soc.fu.shift_rot.rotator import Rotator
-
-from soc.decoder.power_fields import DecodeFields
-from soc.decoder.power_fieldsn import SignalBitRange
-class ShiftRotMainStage(PipeModBase):
+class MulMainStage2(PipeModBase):
def __init__(self, pspec):
- super().__init__(pspec, "main")
- self.fields = DecodeFields(SignalBitRange, [self.i.ctx.op.insn])
- self.fields.create_specs()
+ super().__init__(pspec, "mul2")
def ispec(self):
- return MulInputData(self.pspec)
+ return MulIntermediateData(self.pspec) # pipeline stage input format
def ospec(self):
- return ALUOutputData(self.pspec)
+ return MulOutputData(self.pspec) # pipeline stage output format
def elaborate(self, platform):
m = Module()
comb = m.d.comb
- # obtain me and mb fields from instruction.
- m_fields = self.fields.instrs['M']
- md_fields = self.fields.instrs['MD']
- mb = Signal(m_fields['MB'][0:-1].shape())
- me = Signal(m_fields['ME'][0:-1].shape())
- mb_extra = Signal(1, reset_less=True)
- comb += mb.eq(m_fields['MB'][0:-1])
- comb += me.eq(m_fields['ME'][0:-1])
- comb += mb_extra.eq(md_fields['mb'][0:-1][0])
-
- # set up microwatt rotator module
- m.submodules.rotator = rotator = Rotator()
- comb += [
- rotator.me.eq(me),
- rotator.mb.eq(mb),
- rotator.mb_extra.eq(mb_extra),
- rotator.rs.eq(self.i.rs),
- rotator.ra.eq(self.i.ra),
- rotator.shift.eq(self.i.rb),
- rotator.is_32bit.eq(self.i.ctx.op.is_32bit),
- rotator.arith.eq(self.i.ctx.op.is_signed),
- ]
+ # convenience variables
+ a, b, o = self.i.a, self.i.b, self.o.o
- # instruction rotate type
- mode = Signal(3, reset_less=True)
- with m.Switch(self.i.ctx.op.insn_type):
- with m.Case(InternalOp.OP_SHL): comb += mode.eq(0b000)
- with m.Case(InternalOp.OP_SHR): comb += mode.eq(0b001) # R-shift
- with m.Case(InternalOp.OP_RLC): comb += mode.eq(0b110) # clear LR
- with m.Case(InternalOp.OP_RLCL): comb += mode.eq(0b010) # clear L
- with m.Case(InternalOp.OP_RLCR): comb += mode.eq(0b100) # clear R
+ # actual multiply (TODO: split into stages)
+ comb += o.eq(a * b)
- comb += Cat(rotator.right_shift,
- rotator.clear_left,
- rotator.clear_right).eq(mode)
-
- # outputs from the microwatt rotator module
- # XXX TODO: carry32
- comb += [self.o.o.eq(rotator.result_o),
- self.o.xer_ca[0].eq(rotator.carry_out_o)]
-
- ###### sticky overflow and context, both pass-through #####
+ ###### xer and context, all pass-through #####
+ comb += self.o.xer_ca.data.eq(self.i.xer_ca)
+ comb += self.o.neg_res.data.eq(self.i.neg_res)
comb += self.o.xer_so.data.eq(self.i.xer_so)
comb += self.o.ctx.eq(self.i.ctx)
return m
+
from soc.fu.alu.alu_input_record import CompALUOpSubset
from soc.fu.pipe_data import IntegerData, CommonPipeSpec
-from soc.fu.alu.pipe_data import ALUOutputData
-from soc.fu.shift_rot.pipe_data import ShoftRotInputData
+from soc.fu.alu.pipe_data import ALUOutputData, ALUInputData
-# TODO: replace CompALUOpSubset with CompShiftRotOpSubset
-class ShiftRotPipeSpec(CommonPipeSpec):
- regspec = (ShiftRotInputData.regspec, ALUOutputData.regspec)
+class MulIntermediateData(ALUInputData):
+ def __init__(self, pspec):
+ super().__init__(pspec)
+
+ neg_result = Signal(reset_less=True)
+ self.data.append(neg_result)
+
+
+class MulOutputData(IntegerData):
+ regspec = [('INT', 'o', '0:128'),
+ ('XER', 'xer_so', '32'), # XER bit 32: SO
+ ('XER', 'xer_ca', '34,45')] # XER bit 34/45: CA/CA32
+ def __init__(self, pspec):
+ super().__init__(pspec, False)
+
+ neg_result = Signal(reset_less=True)
+ self.data.append(neg_result)
+
+
+class MulPipeSpec(CommonPipeSpec):
+ regspec = (ALUInputData.regspec, ALUOutputData.regspec)
opsubsetkls = CompALUOpSubset
from soc.fu.shift_rot.input_stage import ShiftRotInputStage
from soc.fu.shift_rot.main_stage import ShiftRotMainStage
from soc.fu.alu.output_stage import ALUOutputStage
+from soc.fu.mul.main_stage import MulMainStage1, MulMainStage2, MulMainStage3
+
class MulStages1(PipeModBaseChain):
def get_chain(self):
- inp = ALUInputStage(self.pspec)
- main = MulMainStage1(self.pspec)
+ inp = ALUInputStage(self.pspec) # a-invert, carry etc
+ main = MulMainStage1(self.pspec) # detect signed/32-bit
return [inp, main]
+
class MulStages2(PipeModBaseChain):
def get_chain(self):
- main2 = MulMainStage2(self.pspec)
- out = ALUOutputStage(self.pspec)
- return [main2, out]
+ main2 = MulMainStage2(self.pspec) # actual multiply
+ return [main2]
+
+
+class MulStages3(PipeModBaseChain):
+ def get_chain(self):
+ main3 = MulMainStage3(self.pspec) # select output bits, invert, set ov
+ out = ALUOutputStage(self.pspec) # do CR, XER and out-invert etc.
+ return [main3, out]
class ShiftRotBasePipe(ControlBase):
self.pspec = pspec
self.pipe1 = MulStages1(pspec)
self.pipe2 = MulStages2(pspec)
+ self.pipe2 = MulStages3(pspec)
self._eqs = self.connect([self.pipe1, self.pipe2])
def elaborate(self, platform):
--- /dev/null
+# This stage is intended to do most of the work of analysing the multiply result
+
+from nmigen import (Module, Signal, Cat, Repl, Mux, signed)
+from nmutil.pipemodbase import PipeModBase
+from soc.fu.alu.pipe_data import ALUOutputData
+from soc.fu.mul.pipe_data import MulOutputData
+from ieee754.part.partsig import PartitionedSignal
+from soc.decoder.power_enums import InternalOp
+
+
+class MulMainStage3(PipeModBase):
+ def __init__(self, pspec):
+ super().__init__(pspec, "mul3")
+
+ def ispec(self):
+ return MulOutputData(self.pspec) # pipeline stage output format
+
+ def ospec(self):
+ return ALUOutputData(self.pspec) # defines pipeline stage output format
+
+ def elaborate(self, platform):
+ m = Module()
+ comb = m.d.comb
+
+ # convenience variables
+ cry_o, o, cr0 = self.o.xer_ca, self.o.o, self.o.cr0
+ ov_o = self.o.xer_ov
+ o_i, cry_i, op = self.i.o, self.i.xer_ca, self.i.ctx.op
+
+ # check if op is 32-bit, and get sign bit from operand a
+ is_32bit = Signal(reset_less=True)
+ comb += is_32bit.eq(op.is_32bit)
+
+ # check negate: select signed/unsigned
+ o_s = Signal(signed(o.width * 2), reset_less=True)
+ mul_o = Signal(o.width * 2, reset_less=True)
+ comb += o_s.eq(-o_i)
+ comb += mul_o.eq(Mux(self.i.neg_res, o_s, o_i))
+ comb += o.ok.eq(1)
+
+ with m.Switch(op.insn_type):
+ # hi-32 replicated twice
+ with m.Case(InternalOp.OP_MUL_H32):
+ comb += o.data.eq(Repl(mul_o[32:64], 2))
+ # hi-64
+ with m.Case(InternalOp.OP_MUL_H64):
+ comb += o.data.eq(mul_o[64:128])
+ # lo-64 - overflow
+ with m.Default():
+ comb += o.data.eq(mul_o[0:64])
+
+ # compute overflow
+ mul_ov = Signal(reset_less=True)
+ with m.If(is_32bit):
+ m32 = mul_o[32:64]
+ comb += mul_ov.eq(m32.bool() & ~m32.all())
+ with m.Else():
+ m64 = mul_o[64:128]
+ comb += mul_ov.eq(m64.bool() & ~m64.all())
+
+ # 32-bit (ov[1]) and 64-bit (ov[0]) overflow
+ ov = Signal(2, reset_less=True)
+ comb += ov[0].eq(mul_ov)
+ comb += ov[1].eq(mul_ov)
+ comb += ov_o.data.eq(ov)
+ comb += ov_o.ok.eq(1)
+
+ # https://bugs.libre-soc.org/show_bug.cgi?id=319#c5
+ ca = Signal(2, reset_less=True)
+ comb += ca[0].eq(add_o[-1]) # XER.CA
+ comb += ca[1].eq(add_o[33] ^ (a[32] ^ b[32])) # XER.CA32
+ comb += cry_o.data.eq(ca)
+ comb += cry_o.ok.eq(1)
+
+ ###### sticky overflow and context, both pass-through #####
+
+ comb += self.o.xer_so.data.eq(self.i.xer_so)
+ comb += self.o.ctx.eq(self.i.ctx)
+
+ return m
+
--- /dev/null
+# This stage is intended to do most of the work of executing multiply
+from nmigen import (Module, Signal, Mux)
+from nmutil.pipemodbase import PipeModBase
+from soc.fu.alu.pipe_data import ALUInputData
+from soc.fu.mul.pipe_data import MulIntermediateData
+from ieee754.part.partsig import PartitionedSignal
+
+
+class MulMainStage1(PipeModBase):
+ def __init__(self, pspec):
+ super().__init__(pspec, "mul1")
+
+ def ispec(self):
+ return ALUInputData(self.pspec) # defines pipeline stage input format
+
+ def ospec(self):
+ return MulIntermediateData(self.pspec) # pipeline stage output format
+
+ def elaborate(self, platform):
+ m = Module()
+ comb = m.d.comb
+
+ # convenience variables
+ a, b = self.i.a, self.i.b
+ a_o, b_o, neg_res_o = self.o.a, self.o.b, self.o.neg_res
+
+ # check if op is 32-bit, and get sign bit from operand a
+ is_32bit = Signal(reset_less=True)
+ sign_a = Signal(reset_less=True)
+ sign_b = Signal(reset_less=True)
+ comb += is_32bit.eq(op.is_32bit)
+
+ # work out if a/b are negative (check 32-bit / signed)
+ comb += sign_a.eq(Mux(op.is_32bit, a[31], a[63]) & op.is_signed)
+ comb += sign_b.eq(Mux(op.is_32bit, b[31], b[63]) & op.is_signed)
+
+ # work out if result is negative sign
+ comb += neg_res_o.eq(sign_a ^ sign_b)
+
+ # negation of a 64-bit value produces the same lower 32-bit
+ # result as negation of just the lower 32-bits, so we don't
+ # need to do anything special before negating
+ comb += a_o.eq(Mux(sign_a, -a, a))
+ comb += b_o.eq(Mux(sign_b, -b, b))
+
+ ###### XER and context, both pass-through #####
+
+ comb += self.o.xer_ca.data.eq(self.i.xer_ca)
+ comb += self.o.xer_so.data.eq(self.i.xer_so)
+ comb += self.o.ctx.eq(self.i.ctx)
+
+ return m
+