src/soc/fu/alu/main_stage.py

   1 # This stage is intended to do most of the work of executing the Arithmetic
   2 # instructions. This would be like the additions, compares, and sign-extension
   3 # as well as carry and overflow generation. This module
   4 # however should not gate the carry or overflow, that's up to the
   5 # output stage
   6 from nmigen import (Module, Signal, Cat, Repl, Mux, Const)
   7 from nmutil.pipemodbase import PipeModBase
   8 from nmutil.extend import exts
   9 from soc.fu.alu.pipe_data import ALUInputData, ALUOutputData
  10 from ieee754.part.partsig import PartitionedSignal
  11 from soc.decoder.power_enums import InternalOp
  12
  13
  14 class ALUMainStage(PipeModBase):
  15     def __init__(self, pspec):
  16         super().__init__(pspec, "main")
  17
  18     def ispec(self):
  19         return ALUInputData(self.pspec) # defines pipeline stage input format
  20
  21     def ospec(self):
  22         return ALUOutputData(self.pspec) # defines pipeline stage output format
  23
  24     def elaborate(self, platform):
  25         m = Module()
  26         comb = m.d.comb
  27
  28         # convenience variables
  29         cry_o, o, cr0 = self.o.xer_ca, self.o.o, self.o.cr0
  30         ov_o = self.o.xer_ov
  31         a, b, cry_i, op = self.i.a, self.i.b, self.i.xer_ca, self.i.ctx.op
  32
  33         # check if op is 32-bit, and get sign bit from operand a
  34         is_32bit = Signal(reset_less=True)
  35         sign_bit = Signal(reset_less=True)
  36         comb += is_32bit.eq(op.is_32bit)
  37         comb += sign_bit.eq(Mux(is_32bit, a[31], a[63]))
  38
  39         # little trick: do the add using only one add (not 2)
  40         # LSB: carry-in [0].  op/result: [1:-1].  MSB: carry-out [-1]
  41         add_a = Signal(a.width + 2, reset_less=True)
  42         add_b = Signal(a.width + 2, reset_less=True)
  43         add_o = Signal(a.width + 2, reset_less=True)
  44         with m.If((op.insn_type == InternalOp.OP_ADD) |
  45                   (op.insn_type == InternalOp.OP_CMP)):
  46             # in bit 0, 1+carry_in creates carry into bit 1 and above
  47             comb += add_a.eq(Cat(cry_i[0], a, Const(0, 1)))
  48             comb += add_b.eq(Cat(Const(1, 1), b, Const(0, 1)))
  49             comb += add_o.eq(add_a + add_b)
  50
  51         ##########################
  52         # main switch-statement for handling arithmetic operations
  53
  54         with m.Switch(op.insn_type):
  55             #### CMP, CMPL ####
  56             with m.Case(InternalOp.OP_CMP):
  57                 # this is supposed to be inverted (b-a, not a-b)
  58                 # however we have a trick: instead of adding either 2x 64-bit
  59                 # MUXes to invert a and b, or messing with a 64-bit output,
  60                 # swap +ve and -ve test in the *output* stage using an XOR gate
  61                 comb += o.data.eq(add_o[1:-1])
  62                 comb += o.ok.eq(0) # use o.data but do *not* actually output
  63
  64             #### add ####
  65             with m.Case(InternalOp.OP_ADD):
  66                 # bit 0 is not part of the result, top bit is the carry-out
  67                 comb += o.data.eq(add_o[1:-1])
  68
  69                 # see microwatt OP_ADD code
  70                 # https://bugs.libre-soc.org/show_bug.cgi?id=319#c5
  71                 comb += cry_o.data[0].eq(add_o[-1]) # XER.CO
  72                 comb += cry_o.data[1].eq(add_o[33] ^ (a[32] ^ b[32])) # XER.CO32
  73                 comb += cry_o.ok.eq(1)
  74                 comb += ov_o.data[0].eq((add_o[-2] != a[-1]) & (a[-1] == b[-1]))
  75                 comb += ov_o.data[1].eq((add_o[32] != a[31]) & (a[31] == b[31]))
  76                 comb += ov_o.ok.eq(1)
  77                 comb += o.ok.eq(1) # output register
  78
  79             #### exts (sign-extend) ####
  80             with m.Case(InternalOp.OP_EXTS):
  81                 with m.If(op.data_len == 1):
  82                     comb += o.data.eq(exts(a, 8, 64))
  83                 with m.If(op.data_len == 2):
  84                     comb += o.data.eq(exts(a, 16, 64))
  85                 with m.If(op.data_len == 4):
  86                     comb += o.data.eq(exts(a, 32, 64))
  87                 comb += o.ok.eq(1) # output register
  88
  89             #### cmpeqb ####
  90             with m.Case(InternalOp.OP_CMPEQB):
  91                 eqs = Signal(8, reset_less=True)
  92                 src1 = Signal(8, reset_less=True)
  93                 comb += src1.eq(a[0:8])
  94                 for i in range(8):
  95                     comb += eqs[i].eq(src1 == b[8*i:8*(i+1)])
  96                 comb += o.data[0].eq(eqs.any())
  97                 comb += cr0.data.eq(Cat(Const(0, 2), eqs.any(), Const(0, 1)))
  98                 comb += cr0.ok.eq(1)
  99                 comb += o.ok.eq(0) # use o.data but do *not* actually output
 100
 101         ###### sticky overflow and context, both pass-through #####
 102
 103         comb += self.o.xer_so.data.eq(self.i.xer_so)
 104         comb += self.o.ctx.eq(self.i.ctx)
 105
 106         return m