src/openpower/decoder/power_svp64_rm.py

   1 # SPDX-License-Identifier: LGPLv3+
   2 # Copyright (C) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
   3 # Funded by NLnet http://nlnet.nl
   4
   5 # sigh this entire module is a laborious mess. it really should be
   6 # auto-generated from the power_insn.py database but a technique
   7 # for doing so (similar to python HTML/XML-node-walking) is needed
   8
   9 """SVP64 RM (Remap) Record.
  10
  11 https://libre-soc.org/openpower/sv/svp64/
  12
  13 | Field Name  | Field bits | Description                            |
  14 |-------------|------------|----------------------------------------|
  15 | MASKMODE    | `0`        | Execution (predication) Mask Kind      |
  16 | MASK        | `1:3`      | Execution Mask                         |
  17 | ELWIDTH     | `4:5`      | Element Width                          |
  18 | ELWIDTH_SRC | `6:7`      | Element Width for Source               |
  19 | SUBVL       | `8:9`      | Sub-vector length                      |
  20 | EXTRA       | `10:18`    | context-dependent extra                |
  21 | MODE        | `19:23`    | changes Vector behaviour               |
  22 """
  23
  24 from nmigen import Elaboratable, Module, Signal, Const
  25 from openpower.decoder.power_enums import (SVP64RMMode, Function, SVPType,
  26                                     SVMode,
  27                                     SVP64PredMode, SVP64Sat, SVP64LDSTmode,
  28                                     SVP64BCPredMode, SVP64BCVLSETMode,
  29                                     SVP64BCGate, SVP64BCCTRMode,
  30                                     SVP64Width
  31                                     )
  32 from openpower.consts import EXTRA3, SVP64MODE
  33 from openpower.sv.svp64 import SVP64Rec
  34 from nmutil.util import sel
  35
  36 # a list of fields which need to be added to input records in order
  37 # pass on vital information needed by each pipeline.
  38 # make sure to keep these the same as SVP64RMModeDecode, in fact,
  39 # TODO, make SVP64RMModeDecode *use* this as a Record!
  40 sv_input_record_layout = [
  41         ('sv_pred_sz', 1), # predicate source zeroing
  42         ('sv_pred_dz', 1), # predicate dest zeroing
  43         ('sv_saturate', SVP64Sat),
  44         ('sv_ldstmode', SVP64LDSTmode),
  45         ('SV_Ptype', SVPType),
  46         ('SV_mode', SVMode),
  47         #('sv_RC1', 1),
  48     ]
  49
  50 """RM Mode
  51 there are four Mode variants, two for LD/ST, one for Branch-Conditional,
  52 and one for everything else
  53 https://libre-soc.org/openpower/sv/svp64/
  54 https://libre-soc.org/openpower/sv/ldst/
  55 https://libre-soc.org/openpower/sv/branches/
  56
  57 LD/ST immed:
  58 | 0 | 1 |  2  |  3   4  |  description               |
  59 |---|---| --- |---------|--------------------------- |
  60 | 0 | 0 | 0   |  zz els | simple mode                |
  61 | 0 | 0 | 1   | PI  LF  | post-increment and Fault-First  |
  62 | 1 | 0 |   N | zz  els |  sat mode: N=0/1 u/s       |
  63 |VLi| 1 | inv | CR-bit  | Rc=1: ffirst CR sel        |
  64 |VLi| 1 | inv | els RC1 |  Rc=0: ffirst z/nonz       |
  65
  66 00      0       zz els  normal mode (with element-stride option)
  67 01      inv     CR-bit  Rc=1: ffirst CR sel
  68 01      inv     els RC1 Rc=0: ffirst z/nonz
  69 10      N       zz els  sat mode: N=0/1 u/s
  70 11      inv     CR-bit  Rc=1: pred-result CR sel
  71 11      inv     els RC1 Rc=0: pred-result z/nonz
  72
  73 LD/ST indexed:
  74
  75 | 0 | 1 |  2  |  3   4  |  description               |
  76 |---|---| --- |---------|--------------------------- |
  77 |els| 0 | SEA |  dz  sz | simple mode        |
  78 |VLi| 1 | inv | CR-bit  | Rc=1: ffirst CR sel        |
  79 |VLi| 1 | inv | els RC1 |  Rc=0: ffirst z/nonz       |
  80
  81 00      0       sz dz   normal mode
  82 00      1       rsvd    reserved
  83 01      inv     CR-bit  Rc=1: ffirst CR sel
  84 01      inv     dz RC1  Rc=0: ffirst z/nonz
  85 10      N       sz dz   sat mode: N=0/1 u/s
  86 11      inv     CR-bit  Rc=1: pred-result CR sel
  87 11      inv     zz RC1  Rc=0: pred-result z/nonz
  88
  89 Arithmetic:
  90 | 0-1 |  2  |  3   4  |  description              |
  91 | --- | --- |---------|-------------------------- |
  92 | 00  |   0 |  dz  sz | simple mode                      |
  93 | 00  |   1 | 0  RG   | scalar reduce mode (mapreduce), SUBVL=1 |
  94 | 00  |   1 | SVM 0   | subvector reduce mode, SUBVL>1   |
  95 | 00  |   1 | /   1   | reserved |
  96 | 01  | inv | CR-bit  | Rc=1: ffirst CR sel              |
  97 | 01  | inv | VLi RC1 |  Rc=0: ffirst z/nonz |
  98 | 10  |   N | dz   sz |  sat mode: N=0/1 u/s, SUBVL=1 |
  99 | 10  |   N | zz   0  |  sat mode: N=0/1 u/s, SUBVL>1 |
 100 | 10  |   N | /    1  |  reserved |
 101 | 11  | inv | CR-bit  |  Rc=1: pred-result CR sel |
 102 | 11  | inv | zz  RC1 |  Rc=0: pred-result z/nonz |
 103
 104 Branch Conditional:
 105 note that additional BC modes are in *other bits*, specifically
 106 the element-width fields: SVP64Rec.ewsrc and SVP64Rec.elwidth
 107
 108 elwidth   ewsrc    mode
 109 4   5     6   7    19 20    21  22  23
 110 ALL LRu   /   /     0  0        /       SNZ sz  normal mode
 111 ALL LRu   /   VSb   0  1        VLI     SNZ sz         VLSET mode
 112 ALL LRu   BRc /     1  0        /       SNZ sz  svstep       mode
 113 ALL LRu   BRc VSb   1  1        VLI     SNZ sz  svstep VLSET mode
 114 """
 115
 116
 117 class SVP64RMModeDecode(Elaboratable):
 118     def __init__(self, name=None):
 119         ##### inputs #####
 120         self.rm_in = SVP64Rec(name=name)
 121         self.fn_in = Signal(Function) # LD/ST and Branch is different
 122         self.sv_mode = Signal(SVMode) # BRANCH/LDST_IMM/CROP etc.
 123         self.svp64_vf_in = Signal()  # Vertical-First Mode
 124         self.ptype_in = Signal(SVPType)
 125         self.rc_in = Signal()
 126         self.cr_5bit_in = Signal()  # if CR field was 5-bit
 127         self.cr_2bit_in = Signal()  # bottom 2 bits of CR field
 128         self.ldst_ra_vec = Signal() # set when RA is vec, indicate Index mode
 129         self.ldst_imz_in = Signal() # set when LD/ST immediate is zero
 130         self.ldst_postinc = Signal() # set when LD/ST immediate post-inc set
 131         self.ldst_ffirst = Signal() # set when LD/ST immediate fail-first set
 132
 133         ##### outputs #####
 134
 135         # main mode (normal, reduce, saturate, ffirst, pred-result, branch)
 136         self.mode = Signal(SVP64RMMode)
 137
 138         # Branch Conditional Modes
 139         self.bc_vlset = Signal(SVP64BCVLSETMode) # Branch-Conditional VLSET
 140         self.bc_ctrtest = Signal(SVP64BCCTRMode) # Branch-Conditional CTR-Test
 141         self.bc_pred = Signal(SVP64BCPredMode) # BC predicate mode
 142         self.bc_vsb = Signal()                 # BC VLSET-branch (like BO[1])
 143         self.bc_gate = Signal(SVP64BCGate)     # BC ALL or ANY gate
 144         self.bc_lru  = Signal()                # BC Link Register Update
 145
 146         # predication
 147         self.predmode = Signal(SVP64PredMode)
 148         self.srcpred = Signal(3) # source predicate
 149         self.dstpred = Signal(3) # destination predicate
 150         self.pred_sz = Signal(1) # predicate source zeroing
 151         self.pred_dz = Signal(1) # predicate dest zeroing
 152
 153         # Modes n stuff
 154         self.ew_src = Signal(SVP64Width) # source elwidth
 155         self.ew_dst = Signal(SVP64Width) # dest elwidth
 156         self.subvl= Signal(2) # subvl
 157         self.saturate = Signal(SVP64Sat)
 158         self.RC1 = Signal()
 159         self.vli = Signal()
 160         self.cr_sel = Signal(2)  # bit of CR to test (index 0-3)
 161         self.inv = Signal(1)     # and whether it's inverted (like branch BO)
 162         self.map_evm = Signal(1)
 163         self.map_crm = Signal(1)
 164         self.reverse_gear = Signal(1)  # elements to go VL-1..0
 165         self.ldstmode = Signal(SVP64LDSTmode) # LD/ST Mode (strided type)
 166
 167     def elaborate(self, platform):
 168         m = Module()
 169         comb = m.d.comb
 170         mode = self.rm_in.mode
 171
 172         # decode pieces of mode
 173         is_ldst = Signal()
 174         is_bc = Signal()
 175         is_cr = Signal()
 176         is_ldstimm = Signal()
 177         comb += is_ldst.eq(self.fn_in == Function.LDST)
 178         comb += is_bc.eq(self.fn_in == Function.BRANCH) # XXX TODO use SV Mode
 179         comb += is_cr.eq(self.sv_mode == SVMode.CROP.value)
 180         comb += is_ldstimm.eq(self.sv_mode == SVMode.LDST_IMM.value)
 181         mode2 = sel(m, mode, SVP64MODE.MOD2)
 182         cr = sel(m, mode, SVP64MODE.CR)
 183
 184         #####################
 185         # Branch-Conditional decoding
 186         #####################
 187         with m.If(is_bc):
 188             # Counter-Test Mode.
 189             with m.If(mode[SVP64MODE.BC_CTRTEST]):
 190                 with m.If(self.rm_in.ewsrc[1]):
 191                     comb += self.bc_ctrtest.eq(SVP64BCCTRMode.TEST_INV)
 192                 with m.Else():
 193                     comb += self.bc_ctrtest.eq(SVP64BCCTRMode.TEST)
 194
 195             # BC Mode ALL or ANY (Great-Big-AND-gate or Great-Big-OR-gate)
 196             comb += self.bc_gate.eq(self.rm_in.elwidth[1])
 197             # Link-Register Update
 198             comb += self.bc_lru.eq(self.rm_in.elwidth[0])
 199             comb += self.bc_vsb.eq(self.rm_in.ewsrc[0])
 200
 201         #####################
 202         # CRops decoding
 203         #####################
 204         with m.Elif(is_cr):
 205             with m.Switch(mode2):
 206                 with m.Case(0, 1): # needs further decoding (LDST no mapreduce)
 207                     with m.If(mode[SVP64MODE.REDUCE]):
 208                         comb += self.mode.eq(SVP64RMMode.MAPREDUCE)
 209                     with m.Else():
 210                         comb += self.mode.eq(SVP64RMMode.NORMAL)
 211                 with m.Case(2,3):
 212                     comb += self.mode.eq(SVP64RMMode.FFIRST) # fail-first
 213
 214             # extract failfirst
 215             with m.If(self.mode == SVP64RMMode.FFIRST): # fail-first
 216                 comb += self.inv.eq(mode[SVP64MODE.INV])
 217                 comb += self.vli.eq(mode[SVP64MODE.BC_VLSET])
 218                 with m.If(self.cr_5bit_in):
 219                     comb += self.cr_sel.eq(0b10) # EQ bit index is implicit
 220                 with m.Else():
 221                     comb += self.cr_sel.eq(cr)
 222
 223         #####################
 224         # LDST decoding (both Idx and Imm - should be separate)
 225         #####################
 226         with m.Elif(is_ldst):
 227             with m.Switch(mode2):
 228                 with m.Case(0): # needs further decoding (LDST no mapreduce)
 229                     with m.If(is_ldstimm & mode[SVP64MODE.LDI_POST]):
 230                         comb += self.mode.eq(SVP64RMMode.NORMAL)
 231                         comb += self.ldst_postinc.eq(mode[SVP64MODE.LDI_PI])
 232                         comb += self.ldst_ffirst.eq(mode[SVP64MODE.LDI_FF])
 233                     with m.Elif(is_ldst):
 234                         comb += self.mode.eq(SVP64RMMode.NORMAL)
 235                 with m.Case(1, 3):
 236                     comb += self.mode.eq(SVP64RMMode.FFIRST) # ffirst
 237                 with m.Case(2):
 238                     with m.If(is_ldstimm):
 239                         comb += self.mode.eq(SVP64RMMode.SATURATE) # saturate
 240                     with m.Else():
 241                         comb += self.mode.eq(SVP64RMMode.NORMAL) # els-bit
 242
 243             # extract zeroing
 244             with m.If(is_ldst & ~is_ldstimm): # LDST-Indexed
 245                 with m.Switch(mode2):
 246                     with m.Case(0,2):
 247                             # sz/dz only when mode2[0] = 0
 248                             comb += self.pred_sz.eq(mode[SVP64MODE.DZ])
 249                             comb += self.pred_dz.eq(mode[SVP64MODE.DZ])
 250
 251             with m.Elif(is_ldstimm): # LDST-Immediate
 252                 with m.Switch(mode2):
 253                     with m.Case(0):  # simple mode
 254                         with m.If(~mode[2]): # (but not PI/LF)
 255                             # [MSB0-numbered] bits 0,1,2 of mode zero, use zz
 256                             comb += self.pred_sz.eq(mode[SVP64MODE.ZZ])
 257                             comb += self.pred_dz.eq(mode[SVP64MODE.ZZ])
 258                     with m.Case(2): # saturated mode
 259                             # saturated-mode also uses zz
 260                             comb += self.pred_sz.eq(mode[SVP64MODE.ZZ])
 261                             comb += self.pred_dz.eq(mode[SVP64MODE.ZZ])
 262
 263             # extract failfirst
 264             with m.If(self.mode == SVP64RMMode.FFIRST): # fail-first
 265                 comb += self.inv.eq(mode[SVP64MODE.INV])
 266                 with m.If(is_ldst):
 267                     comb += self.vli.eq(mode[SVP64MODE.LDST_VLI])
 268                 with m.If(self.rc_in):
 269                     comb += self.cr_sel.eq(cr)
 270                 with m.Else():
 271                     # only when Rc=0
 272                     comb += self.RC1.eq(mode[SVP64MODE.RC1])
 273                     with m.If(~is_ldst):
 274                         comb += self.vli.eq(mode[SVP64MODE.VLI])
 275                     comb += self.cr_sel.eq(0b10) # EQ bit index is implicit
 276
 277             # extract saturate
 278             with m.Switch(mode2):
 279                 with m.Case(2):
 280                     with m.If(mode[SVP64MODE.N]):
 281                         comb += self.saturate.eq(SVP64Sat.UNSIGNED)
 282                     with m.Else():
 283                         comb += self.saturate.eq(SVP64Sat.SIGNED)
 284                 with m.Default():
 285                     comb += self.saturate.eq(SVP64Sat.NONE)
 286
 287             # do elwidth/elwidth_src extract
 288             comb += self.ew_src.eq(self.rm_in.ewsrc)
 289             comb += self.ew_dst.eq(self.rm_in.elwidth)
 290             comb += self.subvl.eq(self.rm_in.subvl)
 291
 292             # extract els (element strided mode bit)
 293             # see https://libre-soc.org/openpower/sv/ldst/
 294             els = Signal()
 295             with m.If(is_ldstimm):    # LD/ST-immediate
 296                 with m.Switch(mode2):
 297                     with m.Case(0):
 298                         with m.If(~mode[2]): # (but not PI/LF)
 299                             comb += els.eq(mode[SVP64MODE.ELS_NORMAL])
 300                     with m.Case(2):
 301                         comb += els.eq(mode[SVP64MODE.ELS_SAT])
 302                     with m.Case(1, 3):
 303                         with m.If(self.rc_in):
 304                             comb += els.eq(mode[SVP64MODE.ELS_FFIRST_PRED])
 305             with m.Else():           # LD/ST-Indexed
 306                 with m.Switch(mode2):
 307                     with m.Case(0, 2):
 308                         comb += els.eq(mode[SVP64MODE.LDIDX_ELS])
 309                     with m.Case(1, 3):
 310                         with m.If(self.rc_in):
 311                             comb += els.eq(mode[SVP64MODE.ELS_FFIRST_PRED])
 312
 313             # RA is vectorised
 314             with m.If(self.ldst_ra_vec):
 315                 comb += self.ldstmode.eq(SVP64LDSTmode.INDEXED)
 316             # not element-strided, therefore unit...
 317             with m.Elif(~els):
 318                 comb += self.ldstmode.eq(SVP64LDSTmode.UNITSTRIDE)
 319             # but if the LD/ST immediate is zero, allow cache-inhibited
 320             # loads from same location, therefore don't do element-striding
 321             with m.Elif(~self.ldst_imz_in):
 322                 comb += self.ldstmode.eq(SVP64LDSTmode.ELSTRIDE)
 323
 324         ######################
 325         # arith decoding
 326         ######################
 327         with m.Else():
 328             with m.Switch(mode2):
 329                 with m.Case(0): # needs further decoding (LDST no mapreduce)
 330                     with m.If(mode[SVP64MODE.REDUCE]):
 331                         comb += self.mode.eq(SVP64RMMode.MAPREDUCE)
 332                     with m.Else():
 333                         comb += self.mode.eq(SVP64RMMode.NORMAL)
 334                 with m.Case(1):
 335                     comb += self.mode.eq(SVP64RMMode.FFIRST) # ffirst
 336                 with m.Case(2):
 337                     comb += self.mode.eq(SVP64RMMode.SATURATE) # saturate
 338                 with m.Case(3):
 339                     # mode = 0b11: arithmetic predicate-result
 340                     comb += self.mode.eq(SVP64RMMode.PREDRES) # pred result
 341
 342             # extract "reverse gear" for mapreduce mode
 343             with m.If((~is_ldst) &                     # not for LD/ST
 344                         (mode2 == 0) &                 # first 2 bits == 0
 345                         mode[SVP64MODE.REDUCE] &       # bit 2 == 1
 346                        (~mode[SVP64MODE.MOD3])):       # bit 3 == 0
 347                 comb += self.reverse_gear.eq(mode[SVP64MODE.RG]) # finally whew
 348
 349             # extract zeroing
 350             with m.Switch(mode2):
 351                 with m.Case(0):
 352                     # normal-mode only active in simple mode
 353                     with m.If(~mode[SVP64MODE.REDUCE]): # bit 2 is zero?
 354                         comb += self.pred_sz.eq(mode[SVP64MODE.SZ])
 355                         comb += self.pred_dz.eq(mode[SVP64MODE.DZ])
 356                 with m.Case(2):
 357                     # sat-mode all good...
 358                     comb += self.pred_sz.eq(mode[SVP64MODE.SZ])
 359                     comb += self.pred_dz.eq(mode[SVP64MODE.DZ])
 360                 with m.Case(3):
 361                     # predicate-result has ZZ
 362                     with m.If(self.rc_in):
 363                         comb += self.pred_sz.eq(mode[SVP64MODE.ZZ])
 364                         comb += self.pred_dz.eq(mode[SVP64MODE.ZZ])
 365
 366             # extract failfirst
 367             with m.If(self.mode == SVP64RMMode.FFIRST): # fail-first
 368                 comb += self.inv.eq(mode[SVP64MODE.INV])
 369                 with m.If(self.rc_in):
 370                     comb += self.cr_sel.eq(cr)
 371                 with m.Else():
 372                     # only when Rc=0
 373                     comb += self.RC1.eq(mode[SVP64MODE.RC1])
 374                     with m.If(~is_ldst):
 375                         comb += self.vli.eq(mode[SVP64MODE.VLI])
 376                     comb += self.cr_sel.eq(0b10) # EQ bit index is implicit
 377
 378             # extract saturate
 379             with m.Switch(mode2):
 380                 with m.Case(2):
 381                     with m.If(mode[SVP64MODE.N]):
 382                         comb += self.saturate.eq(SVP64Sat.UNSIGNED)
 383                     with m.Else():
 384                         comb += self.saturate.eq(SVP64Sat.SIGNED)
 385                 with m.Default():
 386                     comb += self.saturate.eq(SVP64Sat.NONE)
 387
 388             # do elwidth/elwidth_src extract
 389             comb += self.ew_src.eq(self.rm_in.ewsrc)
 390             comb += self.ew_dst.eq(self.rm_in.elwidth)
 391             comb += self.subvl.eq(self.rm_in.subvl)
 392
 393             # extract els (element strided mode bit)
 394             # see https://libre-soc.org/openpower/sv/ldst/
 395             els = Signal()
 396             with m.If(is_ldst):
 397                 with m.If(is_ldstimm):
 398                     with m.Switch(mode2):
 399                         with m.Case(0):
 400                             comb += els.eq(mode[SVP64MODE.ELS_NORMAL])
 401                         with m.Case(2):
 402                             comb += els.eq(mode[SVP64MODE.ELS_SAT])
 403                         with m.Case(1, 3):
 404                             with m.If(self.rc_in):
 405                                 comb += els.eq(mode[SVP64MODE.ELS_FFIRST_PRED])
 406                 with m.Else():
 407                     with m.Switch(mode2):
 408                         with m.Case(0, 2):
 409                             comb += els.eq(mode[SVP64MODE.LDIDX_ELS])
 410                         with m.Case(1, 3):
 411                             with m.If(self.rc_in):
 412                                 comb += els.eq(mode[SVP64MODE.ELS_FFIRST_PRED])
 413
 414                 # RA is vectorised
 415                 with m.If(self.ldst_ra_vec):
 416                     comb += self.ldstmode.eq(SVP64LDSTmode.INDEXED)
 417                 # not element-strided, therefore unit...
 418                 with m.Elif(~els):
 419                     comb += self.ldstmode.eq(SVP64LDSTmode.UNITSTRIDE)
 420                 # but if the LD/ST immediate is zero, allow cache-inhibited
 421                 # loads from same location, therefore don't do element-striding
 422                 with m.Elif(~self.ldst_imz_in):
 423                     comb += self.ldstmode.eq(SVP64LDSTmode.ELSTRIDE)
 424
 425         ######################
 426         # Common fields (not many, sigh)
 427         ######################
 428
 429         # extract src/dest predicate.  use EXTRA3.MASK because EXTRA2.MASK
 430         # is in exactly the same bits
 431         srcmask = sel(m, self.rm_in.extra, EXTRA3.MASK)
 432         dstmask = self.rm_in.mask
 433         with m.If(self.ptype_in == SVPType.P2):
 434             comb += self.srcpred.eq(srcmask)
 435         with m.Else():
 436             comb += self.srcpred.eq(dstmask)
 437         comb += self.dstpred.eq(dstmask)
 438
 439         # identify predicate mode
 440         with m.If(self.rm_in.mmode == 1):
 441             comb += self.predmode.eq(SVP64PredMode.CR) # CR Predicate
 442         with m.Elif((self.srcpred == 0) & (self.dstpred == 0)):
 443             comb += self.predmode.eq(SVP64PredMode.ALWAYS) # No predicate
 444         with m.Else():
 445             comb += self.predmode.eq(SVP64PredMode.INT) # non-zero src: INT
 446
 447         return m
 448