src/soc/fu/alu/main_stage.py

   1 # This stage is intended to do most of the work of executing the Arithmetic
   2 # instructions. This would be like the additions, compares, and sign-extension
   3 # as well as carry and overflow generation. This module
   4 # however should not gate the carry or overflow, that's up to the
   5 # output stage
   6 from nmigen import (Module, Signal, Cat, Repl, Mux, Const)
   7 from nmutil.pipemodbase import PipeModBase
   8 from nmutil.extend import exts
   9 from soc.fu.alu.pipe_data import ALUInputData, ALUOutputData
  10 from ieee754.part.partsig import PartitionedSignal
  11 from soc.decoder.power_enums import MicrOp
  12
  13 from soc.decoder.power_fields import DecodeFields
  14 from soc.decoder.power_fieldsn import SignalBitRange
  15
  16
  17 # microwatt calc_ov function.
  18 def calc_ov(msb_a, msb_b, ca, msb_r):
  19     return (ca ^ msb_r) & ~(msb_a ^ msb_b)
  20
  21
  22 class ALUMainStage(PipeModBase):
  23     def __init__(self, pspec):
  24         super().__init__(pspec, "main")
  25         self.fields = DecodeFields(SignalBitRange, [self.i.ctx.op.insn])
  26         self.fields.create_specs()
  27
  28     def ispec(self):
  29         return ALUInputData(self.pspec) # defines pipeline stage input format
  30
  31     def ospec(self):
  32         return ALUOutputData(self.pspec) # defines pipeline stage output format
  33
  34     def elaborate(self, platform):
  35         m = Module()
  36         comb = m.d.comb
  37
  38         # convenience variables
  39         cry_o, o, cr0 = self.o.xer_ca, self.o.o, self.o.cr0
  40         ov_o = self.o.xer_ov
  41         a, b, cry_i, op = self.i.a, self.i.b, self.i.xer_ca, self.i.ctx.op
  42
  43         # get L-field for OP_CMP
  44         x_fields = self.fields.FormX
  45         L = x_fields.L[0]
  46
  47         # check if op is 32-bit, and get sign bit from operand a
  48         is_32bit = Signal(reset_less=True)
  49
  50         with m.If(op.insn_type == MicrOp.OP_CMP):
  51             comb += is_32bit.eq(~L)
  52
  53         # little trick: do the add using only one add (not 2)
  54         # LSB: carry-in [0].  op/result: [1:-1].  MSB: carry-out [-1]
  55         add_a = Signal(a.width + 2, reset_less=True)
  56         add_b = Signal(a.width + 2, reset_less=True)
  57         add_o = Signal(a.width + 2, reset_less=True)
  58
  59         a_i = Signal.like(a)
  60         b_i = Signal.like(b)
  61         with m.If(is_32bit):
  62             comb += a_i.eq(exts(a, 32, 64))
  63             comb += b_i.eq(exts(b, 32, 64))
  64         with m.Else():
  65             comb += a_i.eq(a)
  66             comb += b_i.eq(b)
  67
  68         with m.If((op.insn_type == MicrOp.OP_ADD) |
  69                   (op.insn_type == MicrOp.OP_CMP)):
  70             # in bit 0, 1+carry_in creates carry into bit 1 and above
  71             comb += add_a.eq(Cat(cry_i[0], a_i, Const(0, 1)))
  72             comb += add_b.eq(Cat(Const(1, 1), b_i, Const(0, 1)))
  73             comb += add_o.eq(add_a + add_b)
  74
  75         ##########################
  76         # main switch-statement for handling arithmetic operations
  77
  78         with m.Switch(op.insn_type):
  79
  80             ###################
  81             #### CMP, CMPL v3.0B p85-86
  82
  83             with m.Case(MicrOp.OP_CMP):
  84                 # this is supposed to be inverted (b-a, not a-b)
  85                 # however we have a trick: instead of adding either 2x 64-bit
  86                 # MUXes to invert a and b, or messing with a 64-bit output,
  87                 # swap +ve and -ve test in the *output* stage using an XOR gate
  88                 comb += o.data.eq(add_o[1:-1])
  89                 comb += o.ok.eq(0) # use o.data but do *not* actually output
  90
  91             ###################
  92             #### add v3.0B p67, p69-72
  93
  94             with m.Case(MicrOp.OP_ADD):
  95                 # bit 0 is not part of the result, top bit is the carry-out
  96                 comb += o.data.eq(add_o[1:-1])
  97                 comb += o.ok.eq(1) # output register
  98
  99                 # see microwatt OP_ADD code
 100                 # https://bugs.libre-soc.org/show_bug.cgi?id=319#c5
 101                 ca = Signal(2, reset_less=True)
 102                 comb += ca[0].eq(add_o[-1])                   # XER.CA
 103                 comb += ca[1].eq(add_o[33] ^ (a_i[32] ^ b_i[32])) # XER.CA32
 104                 comb += cry_o.data.eq(ca)
 105                 comb += cry_o.ok.eq(1)
 106                 # 32-bit (ov[1]) and 64-bit (ov[0]) overflow
 107                 ov = Signal(2, reset_less=True)
 108                 comb += ov[0].eq(calc_ov(a_i[-1], b_i[-1], ca[0], add_o[-2]))
 109                 comb += ov[1].eq(calc_ov(a_i[31], b_i[31], ca[1], add_o[32]))
 110                 comb += ov_o.data.eq(ov)
 111                 comb += ov_o.ok.eq(1)
 112
 113             ###################
 114             #### exts (sign-extend) v3.0B p96, p99
 115
 116             with m.Case(MicrOp.OP_EXTS):
 117                 with m.If(op.data_len == 1):
 118                     comb += o.data.eq(exts(a, 8, 64))
 119                 with m.If(op.data_len == 2):
 120                     comb += o.data.eq(exts(a, 16, 64))
 121                 with m.If(op.data_len == 4):
 122                     comb += o.data.eq(exts(a, 32, 64))
 123                 comb += o.ok.eq(1) # output register
 124
 125             ###################
 126             #### cmpeqb v3.0B p88
 127
 128             with m.Case(MicrOp.OP_CMPEQB):
 129                 eqs = Signal(8, reset_less=True)
 130                 src1 = Signal(8, reset_less=True)
 131                 comb += src1.eq(a[0:8])
 132                 for i in range(8):
 133                     comb += eqs[i].eq(src1 == b[8*i:8*(i+1)])
 134                 comb += o.data[0].eq(eqs.any())
 135                 comb += o.ok.eq(0) # use o.data but do *not* actually output
 136                 comb += cr0.data.eq(Cat(Const(0, 2), eqs.any(), Const(0, 1)))
 137                 comb += cr0.ok.eq(1)
 138
 139         ###### sticky overflow and context, both pass-through #####
 140
 141         comb += self.o.xer_so.data.eq(self.i.xer_so)
 142         comb += self.o.ctx.eq(self.i.ctx)
 143
 144         return m