Merge branch 'master' of git.libre-soc.org:soc
[soc.git] / src / soc / fu / alu / main_stage.py
1 # This stage is intended to do most of the work of executing the Arithmetic
2 # instructions. This would be like the additions, compares, and sign-extension
3 # as well as carry and overflow generation. This module
4 # however should not gate the carry or overflow, that's up to the
5 # output stage
6
7 # Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
8 from nmigen import (Module, Signal, Cat, Repl, Mux, Const)
9 from nmutil.pipemodbase import PipeModBase
10 from nmutil.extend import exts
11 from soc.fu.alu.pipe_data import ALUInputData, ALUOutputData
12 from ieee754.part.partsig import PartitionedSignal
13 from soc.decoder.power_enums import MicrOp
14
15 from soc.decoder.power_fields import DecodeFields
16 from soc.decoder.power_fieldsn import SignalBitRange
17
18
19 # microwatt calc_ov function.
20 def calc_ov(msb_a, msb_b, ca, msb_r):
21 return (ca ^ msb_r) & ~(msb_a ^ msb_b)
22
23
24 class ALUMainStage(PipeModBase):
25 def __init__(self, pspec):
26 super().__init__(pspec, "main")
27 self.fields = DecodeFields(SignalBitRange, [self.i.ctx.op.insn])
28 self.fields.create_specs()
29
30 def ispec(self):
31 return ALUInputData(self.pspec) # defines pipeline stage input format
32
33 def ospec(self):
34 return ALUOutputData(self.pspec) # defines pipeline stage output format
35
36 def elaborate(self, platform):
37 m = Module()
38 comb = m.d.comb
39
40 # convenience variables
41 cry_o, o, cr0 = self.o.xer_ca, self.o.o, self.o.cr0
42 ov_o = self.o.xer_ov
43 a, b, cry_i, op = self.i.a, self.i.b, self.i.xer_ca, self.i.ctx.op
44
45 # get L-field for OP_CMP
46 x_fields = self.fields.FormX
47 L = x_fields.L[0]
48
49 # check if op is 32-bit, and get sign bit from operand a
50 is_32bit = Signal(reset_less=True)
51
52 with m.If(op.insn_type == MicrOp.OP_CMP):
53 comb += is_32bit.eq(~L)
54
55 # little trick: do the add using only one add (not 2)
56 # LSB: carry-in [0]. op/result: [1:-1]. MSB: carry-out [-1]
57 add_a = Signal(a.width + 2, reset_less=True)
58 add_b = Signal(a.width + 2, reset_less=True)
59 add_o = Signal(a.width + 2, reset_less=True)
60
61 a_i = Signal.like(a)
62 b_i = Signal.like(b)
63 with m.If(is_32bit):
64 comb += a_i.eq(exts(a, 32, 64))
65 comb += b_i.eq(exts(b, 32, 64))
66 with m.Else():
67 comb += a_i.eq(a)
68 comb += b_i.eq(b)
69
70 with m.If((op.insn_type == MicrOp.OP_ADD) |
71 (op.insn_type == MicrOp.OP_CMP)):
72 # in bit 0, 1+carry_in creates carry into bit 1 and above
73 comb += add_a.eq(Cat(cry_i[0], a_i, Const(0, 1)))
74 comb += add_b.eq(Cat(Const(1, 1), b_i, Const(0, 1)))
75 comb += add_o.eq(add_a + add_b)
76
77 ##########################
78 # main switch-statement for handling arithmetic operations
79
80 with m.Switch(op.insn_type):
81
82 ###################
83 #### CMP, CMPL v3.0B p85-86
84
85 with m.Case(MicrOp.OP_CMP):
86 # this is supposed to be inverted (b-a, not a-b)
87 # however we have a trick: instead of adding either 2x 64-bit
88 # MUXes to invert a and b, or messing with a 64-bit output,
89 # swap +ve and -ve test in the *output* stage using an XOR gate
90 comb += o.data.eq(add_o[1:-1])
91 comb += o.ok.eq(0) # use o.data but do *not* actually output
92
93 ###################
94 #### add v3.0B p67, p69-72
95
96 with m.Case(MicrOp.OP_ADD):
97 # bit 0 is not part of the result, top bit is the carry-out
98 comb += o.data.eq(add_o[1:-1])
99 comb += o.ok.eq(1) # output register
100
101 # see microwatt OP_ADD code
102 # https://bugs.libre-soc.org/show_bug.cgi?id=319#c5
103 ca = Signal(2, reset_less=True)
104 comb += ca[0].eq(add_o[-1]) # XER.CA
105 comb += ca[1].eq(add_o[33] ^ (a_i[32] ^ b_i[32])) # XER.CA32
106 comb += cry_o.data.eq(ca)
107 comb += cry_o.ok.eq(1)
108 # 32-bit (ov[1]) and 64-bit (ov[0]) overflow
109 ov = Signal(2, reset_less=True)
110 comb += ov[0].eq(calc_ov(a_i[-1], b_i[-1], ca[0], add_o[-2]))
111 comb += ov[1].eq(calc_ov(a_i[31], b_i[31], ca[1], add_o[32]))
112 comb += ov_o.data.eq(ov)
113 comb += ov_o.ok.eq(1)
114
115 ###################
116 #### exts (sign-extend) v3.0B p96, p99
117
118 with m.Case(MicrOp.OP_EXTS):
119 with m.If(op.data_len == 1):
120 comb += o.data.eq(exts(a, 8, 64))
121 with m.If(op.data_len == 2):
122 comb += o.data.eq(exts(a, 16, 64))
123 with m.If(op.data_len == 4):
124 comb += o.data.eq(exts(a, 32, 64))
125 comb += o.ok.eq(1) # output register
126
127 ###################
128 #### cmpeqb v3.0B p88
129
130 with m.Case(MicrOp.OP_CMPEQB):
131 eqs = Signal(8, reset_less=True)
132 src1 = Signal(8, reset_less=True)
133 comb += src1.eq(a[0:8])
134 for i in range(8):
135 comb += eqs[i].eq(src1 == b[8*i:8*(i+1)])
136 comb += o.data[0].eq(eqs.any())
137 comb += o.ok.eq(0) # use o.data but do *not* actually output
138 comb += cr0.data.eq(Cat(Const(0, 2), eqs.any(), Const(0, 1)))
139 comb += cr0.ok.eq(1)
140
141 ###### sticky overflow and context, both pass-through #####
142
143 comb += self.o.xer_so.data.eq(self.i.xer_so)
144 comb += self.o.ctx.eq(self.i.ctx)
145
146 return m