src/openpower/sv/trans/svp64.py

   1 # SPDX-License-Identifier: LGPLv3+
   2 # Copyright (C) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
   3 # Funded by NLnet http://nlnet.nl
   4
   5 """SVP64 OpenPOWER v3.0B assembly translator
   6
   7 This class takes raw svp64 assembly mnemonics (aliases excluded) and creates
   8 an EXT001-encoded "svp64 prefix" (as a .long) followed by a v3.0B opcode.
   9
  10 It is very simple and straightforward, the only weirdness being the
  11 extraction of the register information and conversion to v3.0B numbering.
  12
  13 Encoding format of svp64: https://libre-soc.org/openpower/sv/svp64/
  14 Encoding format of arithmetic: https://libre-soc.org/openpower/sv/normal/
  15 Encoding format of LDST: https://libre-soc.org/openpower/sv/ldst/
  16 **TODO format of branches: https://libre-soc.org/openpower/sv/branches/**
  17 **TODO format of CRs: https://libre-soc.org/openpower/sv/cr_ops/**
  18 Bugtracker: https://bugs.libre-soc.org/show_bug.cgi?id=578
  19 """
  20
  21 import functools
  22 import os
  23 import sys
  24 from collections import OrderedDict
  25 import inspect
  26
  27 from openpower.decoder.pseudo.pagereader import ISA
  28 from openpower.decoder.power_svp64 import SVP64RM, get_regtype, decode_extra
  29 from openpower.decoder.selectable_int import SelectableInt
  30 from openpower.consts import SVP64MODE
  31 from openpower.decoder.power_insn import SVP64Instruction
  32 from openpower.decoder.power_insn import Database
  33 from openpower.decoder.power_enums import find_wiki_dir
  34
  35 # for debug logging
  36 from openpower.util import log
  37
  38
  39 def instruction(*fields):
  40     def instruction(insn, desc):
  41         (value, start, end) = desc
  42         bits = ((1,) * ((end + 1) - start))
  43         mask = 0
  44         for bit in bits:
  45             mask = ((mask << 1) | bit)
  46         return (insn | ((value & mask) << (31 - end)))
  47
  48     return functools.reduce(instruction, fields, 0)
  49
  50
  51 CUSTOM_INSNS = {}
  52
  53
  54 def _insn(name, *args, **kwargs):
  55     return name, args, kwargs
  56
  57
  58 def _custom_insns(*insns):
  59     """ a decorator that adds the function to `CUSTOM_INSNS` """
  60
  61     def decorator(fn):
  62         FIELDS_ARG = object()
  63         if len(insns) == 0:
  64             insns_ = (fn.__name__, (), {}),
  65         else:
  66             insns_ = insns
  67         for name, args, kwargs in insns_:
  68             if not isinstance(name, str):
  69                 raise TypeError("instruction name must be a str: {name!r}")
  70             if name in CUSTOM_INSNS:
  71                 raise ValueError(f"duplicate instruction mnemonic: {name!r}")
  72             # use getcallargs to check that arguments work:
  73             inspect.getcallargs(fn, FIELDS_ARG, *args, **kwargs)
  74             CUSTOM_INSNS[name] = functools.partial(fn, *args, **kwargs)
  75         return fn
  76     return decorator
  77
  78
  79 @_custom_insns(
  80     _insn("setvl", Rc=0),
  81     _insn("setvl.", Rc=1),
  82 )
  83 def setvl(fields, Rc):
  84     """
  85     setvl is a *32-bit-only* instruction. It controls SVSTATE.
  86     It is *not* a 64-bit-prefixed Vector instruction (no sv.setvl, yet),
  87     it is a Vector *control* instruction.
  88
  89     * setvl RT,RA,SVi,vf,vs,ms
  90
  91     1.6.28 SVL-FORM - from fields.txt
  92     |0     |6    |11    |16   |23 |24 |25 |26    |31 |
  93     | PO   |  RT |   RA | SVi |ms |vs |vf |   XO |Rc |
  94     """
  95     PO = 22
  96     XO = 0b11011
  97     # ARRRGH these are in a non-obvious order in openpower/isa/simplev.mdwn
  98     # compared to the SVL-Form above. sigh
  99     # setvl RT,RA,SVi,vf,vs,ms
 100     (RT, RA, SVi, vf, vs, ms) = fields
 101     SVi -= 1
 102     return instruction(
 103         (PO, 0, 5),
 104         (RT, 6, 10),
 105         (RA, 11, 15),
 106         (SVi, 16, 22),
 107         (ms, 23, 23),
 108         (vs, 24, 24),
 109         (vf, 25, 25),
 110         (XO, 26, 30),
 111         (Rc, 31, 31),
 112     )
 113
 114
 115 @_custom_insns(
 116     _insn("svstep", Rc=0),
 117     _insn("svstep.", Rc=1),
 118 )
 119 def svstep(fields, Rc):
 120     """
 121     svstep is a 32-bit instruction. It updates SVSTATE.
 122     It *can* be SVP64-prefixed, to indicate that its registers
 123     are Vectorised.
 124
 125     * svstep RT,SVi,vf
 126
 127     # 1.6.28 SVL-FORM - from fields.txt
 128     # |0     |6    |11    |16   |23 |24 |25 |26    |31 |
 129     # | PO   |  RT | /    | SVi |/  |/  |vf |   XO |Rc |
 130
 131     """
 132     PO = 22
 133     XO = 0b10011
 134     (RT, SVi, vf) = fields
 135     SVi -= 1
 136     return instruction(
 137         (PO, 0, 5),
 138         (RT, 6, 10),
 139         (0, 11, 15),
 140         (SVi, 16, 22),
 141         (0, 23, 23),
 142         (0, 24, 24),
 143         (vf, 25, 25),
 144         (XO, 26, 30),
 145         (Rc, 31, 31),
 146     )
 147
 148
 149 @_custom_insns()
 150 def svshape(fields):
 151     """
 152     svshape is a *32-bit-only* instruction. It updates SVSHAPE and SVSTATE.
 153     It is *not* a 64-bit-prefixed Vector instruction (no sv.svshape, yet),
 154     it is a Vector *control* instruction.
 155
 156     * svshape SVxd,SVyd,SVzd,SVrm,vf
 157
 158     # 1.6.33 SVM-FORM from fields.txt
 159     # |0     |6        |11      |16    |21    |25 |26    |31  |
 160     # | PO   |  SVxd   |   SVyd | SVzd | SVrm |vf |   XO      |
 161
 162     """
 163     PO = 22
 164     XO = 0b011001
 165     (SVxd, SVyd, SVzd, SVrm, vf) = fields
 166     SVxd -= 1
 167     SVyd -= 1
 168     SVzd -= 1
 169     return instruction(
 170         (PO, 0, 5),
 171         (SVxd, 6, 10),
 172         (SVyd, 11, 15),
 173         (SVzd, 16, 20),
 174         (SVrm, 21, 24),
 175         (vf, 25, 25),
 176         (XO, 26, 31),
 177     )
 178
 179
 180 @_custom_insns()
 181 def svindex(fields):
 182     """
 183     svindex is a *32-bit-only* instruction. It is a convenience
 184     instruction that reduces instruction count for Indexed REMAP
 185     Mode.
 186     It is *not* a 64-bit-prefixed Vector instruction (no sv.svindex, yet),
 187     it is a Vector *control* instruction.
 188
 189     1.6.28 SVI-FORM
 190       |0     |6    |11    |16   |21 |23|24|25|26    31|
 191       | PO   |  SVG|rmm   | SVd |ew |yx|mm|sk|   XO   |
 192     """
 193     # note that the dimension field one subtracted
 194     PO = 22
 195     XO = 0b101001
 196     (SVG, rmm, SVd, ew, yx, mm, sk) = fields
 197     SVd -= 1
 198     return instruction(
 199         (PO, 0, 5),
 200         (SVG, 6, 10),
 201         (rmm, 11, 15),
 202         (SVd, 16, 20),
 203         (ew, 21, 22),
 204         (yx, 23, 23),
 205         (mm, 24, 24),
 206         (sk, 25, 25),
 207         (XO, 26, 31),
 208     )
 209
 210
 211 @_custom_insns()
 212 def svremap(fields):
 213     """
 214     this is a *32-bit-only* instruction. It updates the SVSHAPE SPR
 215     it is *not* a 64-bit-prefixed Vector instruction (no sv.svremap),
 216     it is a Vector *control* instruction.
 217
 218     * svremap SVme,mi0,mi1,mi2,mo0,mo1,pst
 219
 220     # 1.6.34 SVRM-FORM
 221        |0     |6     |11  |13   |15   |17   |19   |21  |22   |26     |31 |
 222        | PO   | SVme |mi0 | mi1 | mi2 | mo0 | mo1 |pst |///  | XO        |
 223
 224     """
 225     PO = 22
 226     XO = 0b111001
 227     (SVme, mi0, mi1, mi2, mo0, mo1, pst) = fields
 228     return instruction(
 229         (PO, 0, 5),
 230         (SVme, 6, 10),
 231         (mi0, 11, 12),
 232         (mi1, 13, 14),
 233         (mi2, 15, 16),
 234         (mo0, 17, 18),
 235         (mo1, 19, 20),
 236         (pst, 21, 21),
 237         (0, 22, 25),
 238         (XO, 26, 31),
 239     )
 240
 241
 242 # ok from here-on down these are added as 32-bit instructions
 243 # and are here only because binutils (at present) doesn't have
 244 # them (that's being fixed!)
 245 # they can - if implementations then choose - be Vectorised
 246 # because they are general-purpose scalar instructions
 247 @_custom_insns()
 248 def bmask(fields):
 249     """
 250     1.6.2.2 BM2-FORM
 251     |0     |6    |11    |16    |21   |26 |27    31|
 252     | PO   |  RT |   RA |   RB |bm   |L  |   XO   |
 253     """
 254     PO = 22
 255     XO = 0b010001
 256     (RT, RA, RB, bm, L) = fields
 257     return instruction(
 258         (PO, 0, 5),
 259         (RT, 6, 10),
 260         (RA, 11, 15),
 261         (RB, 16, 20),
 262         (bm, 21, 25),
 263         (L, 26, 26),
 264         (XO, 27, 31),
 265     )
 266
 267
 268 @_custom_insns(
 269     _insn("fsins", Rc=0),
 270     _insn("fsins.", Rc=1),
 271 )
 272 def fsins(fields, Rc):
 273     # XXX WARNING THESE ARE NOT APPROVED BY OPF ISA WG
 274     # however we are out of space with opcode 22
 275     # 1.6.7 X-FORM
 276     # |0     |6 |7|8|9  |10  |11|12|13  |15|16|17     |20|21    |31  |
 277     # | PO   |   FRT         |     ///     |   FRB       |   XO |Rc  |
 278     PO = 59
 279     XO = 0b1000001110
 280     (FRT, FRB) = fields
 281     return instruction(
 282         (PO, 0, 5),
 283         (FRT, 6, 10),
 284         (0, 11, 15),
 285         (FRB, 16, 20),
 286         (XO, 21, 30),
 287         (Rc, 31, 31),
 288     )
 289
 290
 291 @_custom_insns(
 292     _insn("fcoss", Rc=0),
 293     _insn("fcoss.", Rc=1),
 294 )
 295 def fcoss(fields, Rc):
 296     # XXX WARNING THESE ARE NOT APPROVED BY OPF ISA WG
 297     # however we are out of space with opcode 22
 298     # 1.6.7 X-FORM
 299     # |0     |6 |7|8|9  |10  |11|12|13  |15|16|17     |20|21    |31  |
 300     # | PO   |   FRT         |     ///     |   FRB       |   XO |Rc  |
 301     PO = 59
 302     XO = 0b1000101110
 303     (FRT, FRB) = fields
 304     return instruction(
 305         (PO, 0, 5),
 306         (FRT, 6, 10),
 307         (0, 11, 15),
 308         (FRB, 16, 20),
 309         (XO, 21, 30),
 310         (Rc, 31, 31),
 311     )
 312
 313
 314 @_custom_insns(
 315     _insn("ternlogi", Rc=0),
 316     _insn("ternlogi.", Rc=1),
 317 )
 318 def ternlogi(fields, Rc):
 319     # XXX WARNING THESE ARE NOT APPROVED BY OPF ISA WG
 320     # however we are out of space with opcode 22
 321     # 1.6.34 TLI-FORM
 322     #   |0   |6   |11   |16   |21   |29  |31 |
 323     #   | PO | RT |  RA |  RB | TLI | XO |Rc |
 324     PO = 5
 325     XO = 0
 326     (RT, RA, RB, TLI) = fields
 327     return instruction(
 328         (PO, 0, 5),
 329         (RT, 6, 10),
 330         (RA, 11, 15),
 331         (RB, 16, 20),
 332         (TLI, 21, 28),
 333         (XO, 29, 30),
 334         (Rc, 31, 31),
 335     )
 336
 337
 338 @_custom_insns(
 339     _insn("grev", Rc=0, imm=0, word=0),
 340     _insn("grevw", Rc=0, imm=0, word=1),
 341     _insn("grevi", Rc=0, imm=1, word=0),
 342     _insn("grevwi", Rc=0, imm=1, word=1),
 343     _insn("grev.", Rc=1, imm=0, word=0),
 344     _insn("grevw.", Rc=1, imm=0, word=1),
 345     _insn("grevi.", Rc=1, imm=1, word=0),
 346     _insn("grevwi.", Rc=1, imm=1, word=1),
 347 )
 348 def grev(fields, Rc, imm, word):
 349     # XXX WARNING THESE ARE NOT APPROVED BY OPF ISA WG
 350     # however we are out of space with opcode 22
 351     insn = PO = 5
 352     # _ matches fields in table at:
 353     # https://libre-soc.org/openpower/sv/bitmanip/
 354     XO = 0b1_0010_110
 355     if word:
 356         XO |= 0b100_000
 357     if imm:
 358         XO |= 0b1000_000
 359     (RT, RA, XBI) = fields
 360     insn = (insn << 5) | RT
 361     insn = (insn << 5) | RA
 362     if imm and not word:
 363         assert 0 <= XBI < 64
 364         insn = (insn << 6) | XBI
 365         insn = (insn << 9) | XO
 366     else:
 367         assert 0 <= XBI < 32
 368         insn = (insn << 5) | XBI
 369         insn = (insn << 10) | XO
 370     insn = (insn << 1) | Rc
 371     return insn
 372
 373
 374 @_custom_insns(
 375     _insn("maxs", XO=0b0111001110, Rc=0),
 376     _insn("maxs.", XO=0b0111001110, Rc=1),
 377     _insn("maxu", XO=0b0011001110, Rc=0),
 378     _insn("maxu.", XO=0b0011001110, Rc=1),
 379     _insn("minu", XO=0b0001001110, Rc=0),
 380     _insn("minu.", XO=0b0001001110, Rc=1),
 381     _insn("mins", XO=0b0101001110, Rc=0),
 382     _insn("mins.", XO=0b0101001110, Rc=1),
 383     _insn("absdu", XO=0b1011110110, Rc=0),
 384     _insn("absdu.", XO=0b1011110110, Rc=1),
 385     _insn("absds", XO=0b1001110110, Rc=0),
 386     _insn("absds.", XO=0b1001110110, Rc=1),
 387     _insn("avgadd", XO=0b1101001110, Rc=0),
 388     _insn("avgadd.", XO=0b1101001110, Rc=1),
 389     _insn("absdacu", XO=0b1111110110, Rc=0),
 390     _insn("absdacu.", XO=0b1111110110, Rc=1),
 391     _insn("absdacs", XO=0b0111110110, Rc=0),
 392     _insn("absdacs.", XO=0b0111110110, Rc=1),
 393     _insn("cprop", XO=0b0110001110, Rc=0),
 394     _insn("cprop.", XO=0b0110001110, Rc=1),
 395 )
 396 def av(fields, XO, Rc):
 397     # 1.6.7 X-FORM
 398     #   |0     |6 |7|8|9  |10  |11|12|13  |15|16|17     |20|21    |31  |
 399     #   | PO   |       RT      |    RA       |    RB       |   XO |Rc  |
 400     PO = 22
 401     (RT, RA, RB) = fields
 402     return instruction(
 403         (PO, 0, 5),
 404         (RT, 6, 10),
 405         (RA, 11, 15),
 406         (RB, 16, 20),
 407         (XO, 21, 30),
 408         (Rc, 31, 31),
 409     )
 410
 411
 412 @_custom_insns()
 413 def fmvis(fields):
 414     # XXX WARNING THESE ARE NOT APPROVED BY OPF ISA WG
 415     # V3.0B 1.6.6 DX-FORM
 416     # |0     |6 |7|8|9  |10  |11|12|13  |15|16|17     |26|27    |31  |
 417     # | PO   |   FRS         |     d1      |      d0  |      XO |d2  |
 418     PO = 22
 419     XO = 0b00011
 420     (FRS, imm) = fields
 421     # first split imm into d1, d0 and d2. sigh
 422     d2 = (imm & 1)  # LSB (0)
 423     d1 = (imm >> 1) & 0b11111  # bits 1-5
 424     d0 = (imm >> 6)  # MSBs 6-15
 425     return instruction(
 426         (PO, 0, 5),
 427         (FRS, 6, 10),
 428         (d1,  11, 15),
 429         (d0,  16, 25),
 430         (XO, 26, 30),
 431         (d2, 31, 31),
 432     )
 433
 434
 435 @_custom_insns()
 436 def fishmv(fields):
 437     # XXX WARNING THESE ARE NOT APPROVED BY OPF ISA WG
 438     # V3.0B 1.6.6 DX-FORM
 439     # |0     |6 |7|8|9  |10  |11|12|13  |15|16|17     |26|27   |31  |
 440     # | PO   |   FRS         |     d1      |      d0  |     XO |d2  |
 441     PO = 22
 442     XO = 0b01011
 443     (FRS, imm) = fields
 444     # first split imm into d1, d0 and d2. sigh
 445     d2 = (imm & 1)  # LSB (0)
 446     d1 = (imm >> 1) & 0b11111  # bits 1-5
 447     d0 = (imm >> 6)  # MSBs 6-15
 448     return instruction(
 449         (PO, 0, 5),
 450         (FRS, 6, 10),
 451         (d1,  11, 15),
 452         (d0,  16, 25),
 453         (XO, 26, 30),
 454         (d2, 31, 31),
 455     )
 456
 457
 458 # decode GPR into sv extra
 459 def get_extra_gpr(etype, regmode, field):
 460     if regmode == 'scalar':
 461         # cut into 2-bits 5-bits SS FFFFF
 462         sv_extra = field >> 5
 463         field = field & 0b11111
 464     else:
 465         # cut into 5-bits 2-bits FFFFF SS
 466         sv_extra = field & 0b11
 467         field = field >> 2
 468     return sv_extra, field
 469
 470
 471 # decode 3-bit CR into sv extra
 472 def get_extra_cr_3bit(etype, regmode, field):
 473     if regmode == 'scalar':
 474         # cut into 2-bits 3-bits SS FFF
 475         sv_extra = field >> 3
 476         field = field & 0b111
 477     else:
 478         # cut into 3-bits 4-bits FFF SSSS but will cut 2 zeros off later
 479         sv_extra = field & 0b1111
 480         field = field >> 4
 481     return sv_extra, field
 482
 483
 484 # decodes SUBVL
 485 def decode_subvl(encoding):
 486     pmap = {'2': 0b01, '3': 0b10, '4': 0b11}
 487     assert encoding in pmap, \
 488         "encoding %s for SUBVL not recognised" % encoding
 489     return pmap[encoding]
 490
 491
 492 # decodes elwidth
 493 def decode_elwidth(encoding):
 494     pmap = {'8': 0b11, '16': 0b10, '32': 0b01}
 495     assert encoding in pmap, \
 496         "encoding %s for elwidth not recognised" % encoding
 497     return pmap[encoding]
 498
 499
 500 # decodes predicate register encoding
 501 def decode_predicate(encoding):
 502     pmap = {  # integer
 503         '1<<r3': (0, 0b001),
 504         'r3': (0, 0b010),
 505         '~r3': (0, 0b011),
 506         'r10': (0, 0b100),
 507         '~r10': (0, 0b101),
 508         'r30': (0, 0b110),
 509         '~r30': (0, 0b111),
 510         # CR
 511         'lt': (1, 0b000),
 512         'nl': (1, 0b001), 'ge': (1, 0b001),  # same value
 513         'gt': (1, 0b010),
 514         'ng': (1, 0b011), 'le': (1, 0b011),  # same value
 515         'eq': (1, 0b100),
 516         'ne': (1, 0b101),
 517         'so': (1, 0b110), 'un': (1, 0b110),  # same value
 518         'ns': (1, 0b111), 'nu': (1, 0b111),  # same value
 519     }
 520     assert encoding in pmap, \
 521         "encoding %s for predicate not recognised" % encoding
 522     return pmap[encoding]
 523
 524
 525 # decodes "Mode" in similar way to BO field (supposed to, anyway)
 526 def decode_bo(encoding):
 527     pmap = {  # TODO: double-check that these are the same as Branch BO
 528         'lt': 0b000,
 529         'nl': 0b001, 'ge': 0b001,  # same value
 530         'gt': 0b010,
 531         'ng': 0b011, 'le': 0b011,  # same value
 532         'eq': 0b100,
 533         'ne': 0b101,
 534         'so': 0b110, 'un': 0b110,  # same value
 535         'ns': 0b111, 'nu': 0b111,  # same value
 536     }
 537     assert encoding in pmap, \
 538         "encoding %s for BO Mode not recognised" % encoding
 539     return pmap[encoding]
 540
 541
 542 # partial-decode fail-first mode
 543 def decode_ffirst(encoding):
 544     if encoding in ['RC1', '~RC1']:
 545         return encoding
 546     return decode_bo(encoding)
 547
 548
 549 def decode_reg(field, macros=None):
 550     if macros is None:
 551         macros = {}
 552     # decode the field number. "5.v" or "3.s" or "9"
 553     # and now also "*0", and "*%0".  note: *NOT* to add "*%rNNN" etc.
 554     # https://bugs.libre-soc.org/show_bug.cgi?id=884#c0
 555     if field.startswith(("*%", "*")):
 556         if field.startswith("*%"):
 557             field = field[2:]
 558         else:
 559             field = field[1:]
 560         while field in macros:
 561             field = macros[field]
 562         return int(field), "vector"  # actual register number
 563
 564     # try old convention (to be retired)
 565     field = field.split(".")
 566     regmode = 'scalar'  # default
 567     if len(field) == 2:
 568         if field[1] == 's':
 569             regmode = 'scalar'
 570         elif field[1] == 'v':
 571             regmode = 'vector'
 572     field = int(field[0])  # actual register number
 573     return field, regmode
 574
 575
 576 def decode_imm(field):
 577     ldst_imm = "(" in field and field[-1] == ')'
 578     if ldst_imm:
 579         return field[:-1].split("(")
 580     else:
 581         return None, field
 582
 583
 584 def crf_extra(etype, regmode, field, extras):
 585     """takes a CR Field number (CR0-CR127), splits into EXTRA2/3 and v3.0
 586     the scalar/vector mode (crNN.v or crNN.s) changes both the format
 587     of the EXTRA2/3 encoding as well as what range of registers is possible.
 588     this function can be used for both BF/BFA and BA/BB/BT by first removing
 589     the bottom 2 bits of BA/BB/BT then re-instating them after encoding.
 590     see https://libre-soc.org/openpower/sv/svp64/appendix/#cr_extra
 591     for specification
 592     """
 593     sv_extra, field = get_extra_cr_3bit(etype, regmode, field)
 594     # now sanity-check (and shrink afterwards)
 595     if etype == 'EXTRA2':
 596         # 3-bit CR Field (BF, BFA) EXTRA2 encoding
 597         if regmode == 'scalar':
 598             # range is CR0-CR15 in increments of 1
 599             assert (sv_extra >> 1) == 0, \
 600                 "scalar CR %s cannot fit into EXTRA2 %s" % \
 601                 (rname, str(extras[extra_idx]))
 602             # all good: encode as scalar
 603             sv_extra = sv_extra & 0b01
 604         else:  # vector
 605             # range is CR0-CR127 in increments of 16
 606             assert sv_extra & 0b111 == 0, \
 607                 "vector CR %s cannot fit into EXTRA2 %s" % \
 608                 (rname, str(extras[extra_idx]))
 609             # all good: encode as vector (bit 2 set)
 610             sv_extra = 0b10 | (sv_extra >> 3)
 611     else:
 612         # 3-bit CR Field (BF, BFA) EXTRA3 encoding
 613         if regmode == 'scalar':
 614             # range is CR0-CR31 in increments of 1
 615             assert (sv_extra >> 2) == 0, \
 616                 "scalar CR %s cannot fit into EXTRA3 %s" % \
 617                 (rname, str(extras[extra_idx]))
 618             # all good: encode as scalar
 619             sv_extra = sv_extra & 0b11
 620         else:  # vector
 621             # range is CR0-CR127 in increments of 8
 622             assert sv_extra & 0b11 == 0, \
 623                 "vector CR %s cannot fit into EXTRA3 %s" % \
 624                 (rname, str(extras[extra_idx]))
 625             # all good: encode as vector (bit 3 set)
 626             sv_extra = 0b100 | (sv_extra >> 2)
 627     return sv_extra, field
 628
 629
 630 def to_number(field):
 631     if field.startswith("0x"):
 632         return eval(field)
 633     if field.startswith("0b"):
 634         return eval(field)
 635     return int(field)
 636
 637
 638 # decodes svp64 assembly listings and creates EXT001 svp64 prefixes
 639 class SVP64Asm:
 640     def __init__(self, lst, bigendian=False, macros=None):
 641         if macros is None:
 642             macros = {}
 643         self.macros = macros
 644         self.lst = lst
 645         self.trans = self.translate(lst)
 646         self.isa = ISA()  # reads the v3.0B pseudo-code markdown files
 647         self.svp64 = SVP64RM()  # reads the svp64 Remap entries for registers
 648         assert bigendian == False, "error, bigendian not supported yet"
 649
 650     def __iter__(self):
 651         yield from self.trans
 652
 653     def translate_one(self, insn, macros=None):
 654         if macros is None:
 655             macros = {}
 656         macros.update(self.macros)
 657         isa = self.isa
 658         svp64 = self.svp64
 659         insn_no_comments = insn.partition('#')[0]
 660         # find first space, to get opcode
 661         ls = insn_no_comments.split(' ')
 662         opcode = ls[0]
 663         # now find opcode fields
 664         fields = ''.join(ls[1:]).split(',')
 665         mfields = list(map(str.strip, fields))
 666         log("opcode, fields", ls, opcode, mfields)
 667         fields = []
 668         # macro substitution
 669         for field in mfields:
 670             fields.append(macro_subst(macros, field))
 671         log("opcode, fields substed", ls, opcode, fields)
 672
 673         # identify if it is a special instruction
 674         custom_insn_hook = CUSTOM_INSNS.get(opcode)
 675         if custom_insn_hook is not None:
 676             fields = tuple(map(to_number, fields))
 677             insn_num = custom_insn_hook(fields)
 678             log(opcode, bin(insn_num))
 679             yield ".long 0x%X # %s" % (insn_num, insn)
 680             return
 681
 682         # identify if is a svp64 mnemonic
 683         if not opcode.startswith('sv.'):
 684             yield insn  # unaltered
 685             return
 686         opcode = opcode[3:]  # strip leading "sv"
 687
 688         # start working on decoding the svp64 op: sv.basev30Bop/vec2/mode
 689         opmodes = opcode.split("/")  # split at "/"
 690         v30b_op_orig = opmodes.pop(0)    # first is the v3.0B
 691         # check instruction ends with dot
 692         rc_mode = v30b_op_orig.endswith('.')
 693         if rc_mode:
 694             v30b_op = v30b_op_orig[:-1]
 695         else:
 696             v30b_op = v30b_op_orig
 697
 698         # look up the 32-bit op (original, with "." if it has it)
 699         if v30b_op_orig in isa.instr:
 700             isa_instr = isa.instr[v30b_op_orig]
 701         else:
 702             raise Exception("opcode %s of '%s' not supported" %
 703                             (v30b_op_orig, insn))
 704
 705         # look up the svp64 op, first the original (with "." if it has it)
 706         if v30b_op_orig in svp64.instrs:
 707             rm = svp64.instrs[v30b_op_orig]  # one row of the svp64 RM CSV
 708         # then without the "." (if there was one)
 709         elif v30b_op in svp64.instrs:
 710             rm = svp64.instrs[v30b_op]  # one row of the svp64 RM CSV
 711         else:
 712             raise Exception(f"opcode {v30b_op_orig!r} of "
 713                             f"{insn!r} not an svp64 instruction")
 714
 715         # get regs info e.g. "RT,RA,RB"
 716         v30b_regs = isa_instr.regs[0]
 717         log("v3.0B op", v30b_op, "Rc=1" if rc_mode else '')
 718         log("v3.0B regs", opcode, v30b_regs)
 719         log("RM", rm)
 720
 721         # right.  the first thing to do is identify the ordering of
 722         # the registers, by name.  the EXTRA2/3 ordering is in
 723         # rm['0']..rm['3'] but those fields contain the names RA, BB
 724         # etc.  we have to read the pseudocode to understand which
 725         # reg is which in our instruction. sigh.
 726
 727         # first turn the svp64 rm into a "by name" dict, recording
 728         # which position in the RM EXTRA it goes into
 729         # also: record if the src or dest was a CR, for sanity-checking
 730         # (elwidth overrides on CRs are banned)
 731         decode = decode_extra(rm)
 732         dest_reg_cr, src_reg_cr, svp64_src, svp64_dest = decode
 733
 734         log("EXTRA field index, src", svp64_src)
 735         log("EXTRA field index, dest", svp64_dest)
 736
 737         # okaaay now we identify the field value (opcode N,N,N) with
 738         # the pseudo-code info (opcode RT, RA, RB)
 739         assert len(fields) == len(v30b_regs), \
 740             "length of fields %s must match insn `%s` fields %s" % \
 741             (str(v30b_regs), insn, str(fields))
 742         opregfields = zip(fields, v30b_regs)  # err that was easy
 743
 744         # now for each of those find its place in the EXTRA encoding
 745         # note there is the possibility (for LD/ST-with-update) of
 746         # RA occurring **TWICE**.  to avoid it getting added to the
 747         # v3.0B suffix twice, we spot it as a duplicate, here
 748         extras = OrderedDict()
 749         for idx, (field, regname) in enumerate(opregfields):
 750             imm, regname = decode_imm(regname)
 751             rtype = get_regtype(regname)
 752             log("    idx find", rtype, idx, field, regname, imm)
 753             if rtype is None:
 754                 # probably an immediate field, append it straight
 755                 extras[('imm', idx, False)] = (idx, field, None, None, None)
 756                 continue
 757             extra = svp64_src.get(regname, None)
 758             if extra is not None:
 759                 extra = ('s', extra, False)  # not a duplicate
 760                 extras[extra] = (idx, field, regname, rtype, imm)
 761                 log("    idx src", idx, extra, extras[extra])
 762             dextra = svp64_dest.get(regname, None)
 763             log("regname in", regname, dextra)
 764             if dextra is not None:
 765                 is_a_duplicate = extra is not None  # duplicate spotted
 766                 dextra = ('d', dextra, is_a_duplicate)
 767                 extras[dextra] = (idx, field, regname, rtype, imm)
 768                 log("    idx dst", idx, extra, extras[dextra])
 769
 770         # great! got the extra fields in their associated positions:
 771         # also we know the register type. now to create the EXTRA encodings
 772         etype = rm['Etype']  # Extra type: EXTRA3/EXTRA2
 773         ptype = rm['Ptype']  # Predication type: Twin / Single
 774         extra_bits = 0
 775         v30b_newfields = []
 776         for extra_idx, (idx, field, rname, rtype, iname) in extras.items():
 777             # is it a field we don't alter/examine?  if so just put it
 778             # into newfields
 779             if rtype is None:
 780                 v30b_newfields.append(field)
 781                 continue
 782
 783             # identify if this is a ld/st immediate(reg) thing
 784             ldst_imm = "(" in field and field[-1] == ')'
 785             if ldst_imm:
 786                 immed, field = field[:-1].split("(")
 787
 788             field, regmode = decode_reg(field, macros=macros)
 789             log("    ", extra_idx, rname, rtype,
 790                 regmode, iname, field, end=" ")
 791
 792             # see Mode field https://libre-soc.org/openpower/sv/svp64/
 793             # XXX TODO: the following is a bit of a laborious repeated
 794             # mess, which could (and should) easily be parameterised.
 795             # XXX also TODO: the LD/ST modes which are different
 796             # https://libre-soc.org/openpower/sv/ldst/
 797
 798             # rright.  SVP64 register numbering is from 0 to 127
 799             # for GPRs, FPRs *and* CR Fields, where for v3.0 the GPRs and RPFs
 800             # are 0-31 and CR Fields are only 0-7.  the SVP64 RM "Extra"
 801             # area is used to extend the numbering from the 32-bit
 802             # instruction, and also to record whether the register
 803             # is scalar or vector. on a per-operand basis.  this
 804             # results in a slightly finnicky encoding: here we go...
 805
 806             # encode SV-GPR and SV-FPR field into extra, v3.0field
 807             if rtype in ['GPR', 'FPR']:
 808                 sv_extra, field = get_extra_gpr(etype, regmode, field)
 809                 # now sanity-check. EXTRA3 is ok, EXTRA2 has limits
 810                 # (and shrink to a single bit if ok)
 811                 if etype == 'EXTRA2':
 812                     if regmode == 'scalar':
 813                         # range is r0-r63 in increments of 1
 814                         assert (sv_extra >> 1) == 0, \
 815                             "scalar GPR %s cannot fit into EXTRA2 %s" % \
 816                             (rname, str(extras[extra_idx]))
 817                         # all good: encode as scalar
 818                         sv_extra = sv_extra & 0b01
 819                     else:
 820                         # range is r0-r127 in increments of 2 (r0 r2 ... r126)
 821                         assert sv_extra & 0b01 == 0, \
 822                             "%s: vector field %s cannot fit " \
 823                             "into EXTRA2 %s" % \
 824                             (insn, rname, str(extras[extra_idx]))
 825                         # all good: encode as vector (bit 2 set)
 826                         sv_extra = 0b10 | (sv_extra >> 1)
 827                 elif regmode == 'vector':
 828                     # EXTRA3 vector bit needs marking
 829                     sv_extra |= 0b100
 830
 831             # encode SV-CR 3-bit field into extra, v3.0field.
 832             # 3-bit is for things like BF and BFA
 833             elif rtype == 'CR_3bit':
 834                 sv_extra, field = crf_extra(etype, regmode, field, extras)
 835
 836             # encode SV-CR 5-bit field into extra, v3.0field
 837             # 5-bit is for things like BA BB BC BT etc.
 838             # *sigh* this is the same as 3-bit except the 2 LSBs of the
 839             # 5-bit field are passed through unaltered.
 840             elif rtype == 'CR_5bit':
 841                 cr_subfield = field & 0b11  # record bottom 2 bits for later
 842                 field = field >> 2         # strip bottom 2 bits
 843                 # use the exact same 3-bit function for the top 3 bits
 844                 sv_extra, field = crf_extra(etype, regmode, field, extras)
 845                 # reconstruct the actual 5-bit CR field (preserving the
 846                 # bottom 2 bits, unaltered)
 847                 field = (field << 2) | cr_subfield
 848
 849             else:
 850                 raise Exception("no type match: %s" % rtype)
 851
 852             # capture the extra field info
 853             log("=>", "%5s" % bin(sv_extra), field)
 854             extras[extra_idx] = sv_extra
 855
 856             # append altered field value to v3.0b, differs for LDST
 857             # note that duplicates are skipped e.g. EXTRA2 contains
 858             # *BOTH* s:RA *AND* d:RA which happens on LD/ST-with-update
 859             srcdest, idx, duplicate = extra_idx
 860             if duplicate:  # skip adding to v3.0b fields, already added
 861                 continue
 862             if ldst_imm:
 863                 v30b_newfields.append(("%s(%s)" % (immed, str(field))))
 864             else:
 865                 v30b_newfields.append(str(field))
 866
 867         log("new v3.0B fields", v30b_op, v30b_newfields)
 868         log("extras", extras)
 869
 870         # rright. now we have all the info. start creating SVP64 instruction.
 871         db = Database(find_wiki_dir())
 872         svp64_insn = SVP64Instruction.pair(prefix=0, suffix=0)
 873         svp64_prefix = svp64_insn.prefix
 874         svp64_rm = svp64_insn.prefix.rm
 875
 876         # begin with EXTRA fields
 877         for idx, sv_extra in extras.items():
 878             log(idx)
 879             if idx is None:
 880                 continue
 881             if idx[0] == 'imm':
 882                 continue
 883             srcdest, idx, duplicate = idx
 884             if etype == 'EXTRA2':
 885                 svp64_rm.extra2[idx] = sv_extra
 886             else:
 887                 svp64_rm.extra3[idx] = sv_extra
 888
 889         # identify if the op is a LD/ST. the "blegh" way. copied
 890         # from power_enums.  TODO, split the list _insns down.
 891         is_ld = v30b_op in [
 892             "lbarx", "lbz", "lbzu", "lbzux", "lbzx",            # load byte
 893             "ld", "ldarx", "ldbrx", "ldu", "ldux", "ldx",       # load double
 894             "lfs", "lfsx", "lfsu", "lfsux",                     # FP load single
 895             "lfd", "lfdx", "lfdu", "lfdux", "lfiwzx", "lfiwax",  # FP load dbl
 896             "lha", "lharx", "lhau", "lhaux", "lhax",            # load half
 897             "lhbrx", "lhz", "lhzu", "lhzux", "lhzx",            # more load half
 898             "lwa", "lwarx", "lwaux", "lwax", "lwbrx",           # load word
 899             "lwz", "lwzcix", "lwzu", "lwzux", "lwzx",           # more load word
 900         ]
 901         is_st = v30b_op in [
 902             "stb", "stbcix", "stbcx", "stbu", "stbux", "stbx",
 903             "std", "stdbrx", "stdcx", "stdu", "stdux", "stdx",
 904             "stfs", "stfsx", "stfsu", "stfux",                  # FP store sgl
 905             "stfd", "stfdx", "stfdu", "stfdux", "stfiwx",       # FP store dbl
 906             "sth", "sthbrx", "sthcx", "sthu", "sthux", "sthx",
 907             "stw", "stwbrx", "stwcx", "stwu", "stwux", "stwx",
 908         ]
 909         # use this to determine if the SVP64 RM format is different.
 910         # see https://libre-soc.org/openpower/sv/ldst/
 911         is_ldst = is_ld or is_st
 912
 913         # branch-conditional detection
 914         is_bc = v30b_op in [
 915             "bc", "bclr",
 916         ]
 917
 918         # parts of svp64_rm
 919         mmode = 0  # bit 0
 920         pmask = 0  # bits 1-3
 921         destwid = 0  # bits 4-5
 922         srcwid = 0  # bits 6-7
 923         subvl = 0   # bits 8-9
 924         smask = 0  # bits 16-18 but only for twin-predication
 925         mode = 0  # bits 19-23
 926
 927         mask_m_specified = False
 928         has_pmask = False
 929         has_smask = False
 930
 931         saturation = None
 932         src_zero = 0
 933         dst_zero = 0
 934         sv_mode = None
 935
 936         mapreduce = False
 937         reverse_gear = False
 938         mapreduce_crm = False
 939         mapreduce_svm = False
 940
 941         predresult = False
 942         failfirst = False
 943         ldst_elstride = 0
 944
 945         # branch-conditional bits
 946         bc_all = 0
 947         bc_lru = 0
 948         bc_brc = 0
 949         bc_svstep = 0
 950         bc_vsb = 0
 951         bc_vlset = 0
 952         bc_vli = 0
 953         bc_snz = 0
 954
 955         # ok let's start identifying opcode augmentation fields
 956         for encmode in opmodes:
 957             # predicate mask (src and dest)
 958             if encmode.startswith("m="):
 959                 pme = encmode
 960                 pmmode, pmask = decode_predicate(encmode[2:])
 961                 smmode, smask = pmmode, pmask
 962                 mmode = pmmode
 963                 mask_m_specified = True
 964             # predicate mask (dest)
 965             elif encmode.startswith("dm="):
 966                 pme = encmode
 967                 pmmode, pmask = decode_predicate(encmode[3:])
 968                 mmode = pmmode
 969                 has_pmask = True
 970             # predicate mask (src, twin-pred)
 971             elif encmode.startswith("sm="):
 972                 sme = encmode
 973                 smmode, smask = decode_predicate(encmode[3:])
 974                 mmode = smmode
 975                 has_smask = True
 976             # vec2/3/4
 977             elif encmode.startswith("vec"):
 978                 subvl = decode_subvl(encmode[3:])
 979             # elwidth
 980             elif encmode.startswith("ew="):
 981                 destwid = decode_elwidth(encmode[3:])
 982             elif encmode.startswith("sw="):
 983                 srcwid = decode_elwidth(encmode[3:])
 984             # element-strided LD/ST
 985             elif encmode == 'els':
 986                 ldst_elstride = 1
 987             # saturation
 988             elif encmode == 'sats':
 989                 assert sv_mode is None
 990                 saturation = 1
 991                 sv_mode = 0b10
 992             elif encmode == 'satu':
 993                 assert sv_mode is None
 994                 sv_mode = 0b10
 995                 saturation = 0
 996             # predicate zeroing
 997             elif encmode == 'sz':
 998                 src_zero = 1
 999             elif encmode == 'dz':
1000                 dst_zero = 1
1001             # failfirst
1002             elif encmode.startswith("ff="):
1003                 assert sv_mode is None
1004                 sv_mode = 0b01
1005                 failfirst = decode_ffirst(encmode[3:])
1006             # predicate-result, interestingly same as fail-first
1007             elif encmode.startswith("pr="):
1008                 assert sv_mode is None
1009                 sv_mode = 0b11
1010                 predresult = decode_ffirst(encmode[3:])
1011             # map-reduce mode, reverse-gear
1012             elif encmode == 'mrr':
1013                 assert sv_mode is None
1014                 sv_mode = 0b00
1015                 mapreduce = True
1016                 reverse_gear = True
1017             # map-reduce mode
1018             elif encmode == 'mr':
1019                 assert sv_mode is None
1020                 sv_mode = 0b00
1021                 mapreduce = True
1022             elif encmode == 'crm':  # CR on map-reduce
1023                 assert sv_mode is None
1024                 sv_mode = 0b00
1025                 mapreduce_crm = True
1026             elif encmode == 'svm':  # sub-vector mode
1027                 mapreduce_svm = True
1028             elif is_bc:
1029                 if encmode == 'all':
1030                     bc_all = 1
1031                 elif encmode == 'st':  # svstep mode
1032                     bc_step = 1
1033                 elif encmode == 'sr':  # svstep BRc mode
1034                     bc_step = 1
1035                     bc_brc = 1
1036                 elif encmode == 'vs':  # VLSET mode
1037                     bc_vlset = 1
1038                 elif encmode == 'vsi':  # VLSET mode with VLI (VL inclusives)
1039                     bc_vlset = 1
1040                     bc_vli = 1
1041                 elif encmode == 'vsb':  # VLSET mode with VSb
1042                     bc_vlset = 1
1043                     bc_vsb = 1
1044                 elif encmode == 'vsbi':  # VLSET mode with VLI and VSb
1045                     bc_vlset = 1
1046                     bc_vli = 1
1047                     bc_vsb = 1
1048                 elif encmode == 'snz':  # sz (only) already set above
1049                     src_zero = 1
1050                     bc_snz = 1
1051                 elif encmode == 'lu':  # LR update mode
1052                     bc_lru = 1
1053                 else:
1054                     raise AssertionError("unknown encmode %s" % encmode)
1055             else:
1056                 raise AssertionError("unknown encmode %s" % encmode)
1057
1058         if ptype == '2P':
1059             # since m=xx takes precedence (overrides) sm=xx and dm=xx,
1060             # treat them as mutually exclusive
1061             if mask_m_specified:
1062                 assert not has_smask,\
1063                     "cannot have both source-mask and predicate mask"
1064                 assert not has_pmask,\
1065                     "cannot have both dest-mask and predicate mask"
1066             # since the default is INT predication (ALWAYS), if you
1067             # specify one CR mask, you must specify both, to avoid
1068             # mixing INT and CR reg types
1069             if has_pmask and pmmode == 1:
1070                 assert has_smask, \
1071                     "need explicit source-mask in CR twin predication"
1072             if has_smask and smmode == 1:
1073                 assert has_pmask, \
1074                     "need explicit dest-mask in CR twin predication"
1075             # sanity-check that 2Pred mask is same mode
1076             if has_pmask and has_smask:
1077                 assert smmode == pmmode, \
1078                     "predicate masks %s and %s must be same reg type" % \
1079                     (pme, sme)
1080
1081         # sanity-check that twin-predication mask only specified in 2P mode
1082         if ptype == '1P':
1083             assert not has_smask, \
1084                 "source-mask can only be specified on Twin-predicate ops"
1085             assert not has_pmask, \
1086                 "dest-mask can only be specified on Twin-predicate ops"
1087
1088         # construct the mode field, doing sanity-checking along the way
1089         if mapreduce_svm:
1090             assert sv_mode == 0b00, "sub-vector mode in mapreduce only"
1091             assert subvl != 0, "sub-vector mode not possible on SUBVL=1"
1092
1093         if src_zero:
1094             assert has_smask or mask_m_specified, \
1095                 "src zeroing requires a source predicate"
1096         if dst_zero:
1097             assert has_pmask or mask_m_specified, \
1098                 "dest zeroing requires a dest predicate"
1099
1100         # okaaay, so there are 4 different modes, here, which will be
1101         # partly-merged-in: is_ldst is merged in with "normal", but
1102         # is_bc is so different it's done separately.  likewise is_cr
1103         # (when it is done).  here are the maps:
1104
1105         # for "normal" arithmetic: https://libre-soc.org/openpower/sv/normal/
1106         """
1107             | 0-1 |  2  |  3   4  |  description              |
1108             | --- | --- |---------|-------------------------- |
1109             | 00  |   0 |  dz  sz | normal mode                      |
1110             | 00  |   1 | 0  RG   | scalar reduce mode (mapreduce), SUBVL=1 |
1111             | 00  |   1 | 1  /    | parallel reduce mode (mapreduce), SUBVL=1 |
1112             | 00  |   1 | SVM RG  | subvector reduce mode, SUBVL>1   |
1113             | 01  | inv | CR-bit  | Rc=1: ffirst CR sel              |
1114             | 01  | inv | VLi RC1 |  Rc=0: ffirst z/nonz |
1115             | 10  |   N | dz   sz |  sat mode: N=0/1 u/s |
1116             | 11  | inv | CR-bit  |  Rc=1: pred-result CR sel |
1117             | 11  | inv | dz  RC1 |  Rc=0: pred-result z/nonz |
1118         """
1119
1120         # https://libre-soc.org/openpower/sv/ldst/
1121         # for LD/ST-immediate:
1122         """
1123             | 0-1 |  2  |  3   4  |  description               |
1124             | --- | --- |---------|--------------------------- |
1125             | 00  | 0   |  dz els | normal mode                |
1126             | 00  | 1   |  dz shf | shift mode                 |
1127             | 01  | inv | CR-bit  | Rc=1: ffirst CR sel        |
1128             | 01  | inv | els RC1 |  Rc=0: ffirst z/nonz       |
1129             | 10  |   N | dz  els |  sat mode: N=0/1 u/s       |
1130             | 11  | inv | CR-bit  |  Rc=1: pred-result CR sel  |
1131             | 11  | inv | els RC1 |  Rc=0: pred-result z/nonz  |
1132         """
1133
1134         # for LD/ST-indexed (RA+RB):
1135         """
1136             | 0-1 |  2  |  3   4  |  description              |
1137             | --- | --- |---------|-------------------------- |
1138             | 00  | SEA |  dz  sz | normal mode        |
1139             | 01  | SEA | dz sz  | Strided (scalar only source)   |
1140             | 10  |   N | dz   sz |  sat mode: N=0/1 u/s |
1141             | 11  | inv | CR-bit  |  Rc=1: pred-result CR sel |
1142             | 11  | inv | dz  RC1 |  Rc=0: pred-result z/nonz |
1143         """
1144
1145         # and leaving out branches and cr_ops for now because they're
1146         # under development
1147         """ TODO branches and cr_ops
1148         """
1149
1150         # now create mode and (overridden) src/dst widths
1151         # XXX TODO: sanity-check bc modes
1152         if is_bc:
1153             sv_mode = ((bc_svstep << SVP64MODE.MOD2_MSB) |
1154                        (bc_vlset << SVP64MODE.MOD2_LSB) |
1155                        (bc_snz << SVP64MODE.BC_SNZ))
1156             srcwid = (bc_vsb << 1) | bc_lru
1157             destwid = (bc_lru << 1) | bc_all
1158
1159         else:
1160
1161             ######################################
1162             # "normal" mode
1163             if sv_mode is None:
1164                 mode |= src_zero << SVP64MODE.SZ  # predicate zeroing
1165                 mode |= dst_zero << SVP64MODE.DZ  # predicate zeroing
1166                 if is_ldst:
1167                     # TODO: for now, LD/ST-indexed is ignored.
1168                     mode |= ldst_elstride << SVP64MODE.ELS_NORMAL  # el-strided
1169                 else:
1170                     # TODO, reduce and subvector mode
1171                     # 00  1   dz CRM  reduce mode (mapreduce), SUBVL=1
1172                     # 00  1   SVM CRM subvector reduce mode, SUBVL>1
1173                     pass
1174                 sv_mode = 0b00
1175
1176             ######################################
1177             # "mapreduce" modes
1178             elif sv_mode == 0b00:
1179                 mode |= (0b1 << SVP64MODE.REDUCE)  # sets mapreduce
1180                 assert dst_zero == 0, "dest-zero not allowed in mapreduce mode"
1181                 if reverse_gear:
1182                     mode |= (0b1 << SVP64MODE.RG)  # sets Reverse-gear mode
1183                 if mapreduce_crm:
1184                     mode |= (0b1 << SVP64MODE.CRM)  # sets CRM mode
1185                     assert rc_mode, "CRM only allowed when Rc=1"
1186                 # bit of weird encoding to jam zero-pred or SVM mode in.
1187                 # SVM mode can be enabled only when SUBVL=2/3/4 (vec2/3/4)
1188                 if subvl == 0:
1189                     mode |= dst_zero << SVP64MODE.DZ  # predicate zeroing
1190                 elif mapreduce_svm:
1191                     mode |= (0b1 << SVP64MODE.SVM)  # sets SVM mode
1192
1193             ######################################
1194             # "failfirst" modes
1195             elif sv_mode == 0b01:
1196                 assert src_zero == 0, "dest-zero not allowed in failfirst mode"
1197                 if failfirst == 'RC1':
1198                     mode |= (0b1 << SVP64MODE.RC1)  # sets RC1 mode
1199                     mode |= (dst_zero << SVP64MODE.DZ)  # predicate dst-zeroing
1200                     assert rc_mode == False, "ffirst RC1 only ok when Rc=0"
1201                 elif failfirst == '~RC1':
1202                     mode |= (0b1 << SVP64MODE.RC1)  # sets RC1 mode
1203                     mode |= (dst_zero << SVP64MODE.DZ)  # predicate dst-zeroing
1204                     mode |= (0b1 << SVP64MODE.INV)  # ... with inversion
1205                     assert rc_mode == False, "ffirst RC1 only ok when Rc=0"
1206                 else:
1207                     assert dst_zero == 0, "dst-zero not allowed in ffirst BO"
1208                     assert rc_mode, "ffirst BO only possible when Rc=1"
1209                     mode |= (failfirst << SVP64MODE.BO_LSB)  # set BO
1210
1211             ######################################
1212             # "saturation" modes
1213             elif sv_mode == 0b10:
1214                 mode |= src_zero << SVP64MODE.SZ  # predicate zeroing
1215                 mode |= dst_zero << SVP64MODE.DZ  # predicate zeroing
1216                 mode |= (saturation << SVP64MODE.N)  # signed/us saturation
1217
1218             ######################################
1219             # "predicate-result" modes.  err... code-duplication from ffirst
1220             elif sv_mode == 0b11:
1221                 assert src_zero == 0, "dest-zero not allowed in predresult mode"
1222                 if predresult == 'RC1':
1223                     mode |= (0b1 << SVP64MODE.RC1)  # sets RC1 mode
1224                     mode |= (dst_zero << SVP64MODE.DZ)  # predicate dst-zeroing
1225                     assert rc_mode == False, "pr-mode RC1 only ok when Rc=0"
1226                 elif predresult == '~RC1':
1227                     mode |= (0b1 << SVP64MODE.RC1)  # sets RC1 mode
1228                     mode |= (dst_zero << SVP64MODE.DZ)  # predicate dst-zeroing
1229                     mode |= (0b1 << SVP64MODE.INV)  # ... with inversion
1230                     assert rc_mode == False, "pr-mode RC1 only ok when Rc=0"
1231                 else:
1232                     assert dst_zero == 0, "dst-zero not allowed in pr-mode BO"
1233                     assert rc_mode, "pr-mode BO only possible when Rc=1"
1234                     mode |= (predresult << SVP64MODE.BO_LSB)  # set BO
1235
1236         # whewww.... modes all done :)
1237         # now put into svp64_rm
1238         mode |= sv_mode
1239         # mode: bits 19-23
1240         svp64_rm.mode = mode
1241
1242         # put in predicate masks into svp64_rm
1243         if ptype == '2P':
1244             # source pred: bits 16-18
1245             svp64_rm.smask = smask
1246         # mask mode: bit 0
1247         svp64_rm.mmode = mmode
1248         # 1-pred: bits 1-3
1249         svp64_rm.mask = pmask
1250
1251         # and subvl: bits 8-9
1252         svp64_rm.subvl = subvl
1253
1254         # put in elwidths
1255         # srcwid: bits 6-7
1256         svp64_rm.ewsrc = srcwid
1257         # destwid: bits 4-5
1258         svp64_rm.elwidth = destwid
1259
1260         # nice debug printout. (and now for something completely different)
1261         # https://youtu.be/u0WOIwlXE9g?t=146
1262         svp64_rm_value = int(svp64_rm)
1263         log("svp64_rm", hex(svp64_rm_value), bin(svp64_rm_value))
1264         log("    mmode  0    :", bin(mmode))
1265         log("    pmask  1-3  :", bin(pmask))
1266         log("    dstwid 4-5  :", bin(destwid))
1267         log("    srcwid 6-7  :", bin(srcwid))
1268         log("    subvl  8-9  :", bin(subvl))
1269         log("    mode   19-23:", bin(mode))
1270         offs = 2 if etype == 'EXTRA2' else 3  # 2 or 3 bits
1271         for idx, sv_extra in extras.items():
1272             if idx is None:
1273                 continue
1274             if idx[0] == 'imm':
1275                 continue
1276             srcdest, idx, duplicate = idx
1277             start = (10+idx*offs)
1278             end = start + offs-1
1279             log("    extra%d %2d-%2d:" % (idx, start, end),
1280                 bin(sv_extra))
1281         if ptype == '2P':
1282             log("    smask  16-17:", bin(smask))
1283         log()
1284
1285         # update prefix PO and ID (aka PID)
1286         svp64_prefix.po = 0x1
1287         svp64_prefix.id = 0b11
1288
1289         # fiinally yield the svp64 prefix and the thingy.  v3.0b opcode
1290         rc = '.' if rc_mode else ''
1291         yield ".long 0x%08x" % int(svp64_prefix)
1292         log(v30b_op, v30b_newfields)
1293         # argh, sv.fmadds etc. need to be done manually
1294         if v30b_op == 'ffmadds':
1295             opcode = 59 << (32-6)    # bits 0..6 (MSB0)
1296             opcode |= int(v30b_newfields[0]) << (32-11)  # FRT
1297             opcode |= int(v30b_newfields[1]) << (32-16)  # FRA
1298             opcode |= int(v30b_newfields[2]) << (32-21)  # FRB
1299             opcode |= int(v30b_newfields[3]) << (32-26)  # FRC
1300             opcode |= 0b00101 << (32-31)   # bits 26-30
1301             if rc:
1302                 opcode |= 1  # Rc, bit 31.
1303             yield ".long 0x%x" % opcode
1304         # argh, sv.fdmadds need to be done manually
1305         elif v30b_op == 'fdmadds':
1306             opcode = 59 << (32-6)    # bits 0..6 (MSB0)
1307             opcode |= int(v30b_newfields[0]) << (32-11)  # FRT
1308             opcode |= int(v30b_newfields[1]) << (32-16)  # FRA
1309             opcode |= int(v30b_newfields[2]) << (32-21)  # FRB
1310             opcode |= int(v30b_newfields[3]) << (32-26)  # FRC
1311             opcode |= 0b01111 << (32-31)   # bits 26-30
1312             if rc:
1313                 opcode |= 1  # Rc, bit 31.
1314             yield ".long 0x%x" % opcode
1315         # argh, sv.ffadds etc. need to be done manually
1316         elif v30b_op == 'ffadds':
1317             opcode = 59 << (32-6)    # bits 0..6 (MSB0)
1318             opcode |= int(v30b_newfields[0]) << (32-11)  # FRT
1319             opcode |= int(v30b_newfields[1]) << (32-16)  # FRA
1320             opcode |= int(v30b_newfields[2]) << (32-21)  # FRB
1321             opcode |= 0b01101 << (32-31)   # bits 26-30
1322             if rc:
1323                 opcode |= 1  # Rc, bit 31.
1324             yield ".long 0x%x" % opcode
1325         # sigh have to do svstep here manually for now...
1326         elif v30b_op in ["svstep", "svstep."]:
1327             insn = 22 << (31-5)          # opcode 22, bits 0-5
1328             insn |= int(v30b_newfields[0]) << (31-10)  # RT       , bits 6-10
1329             insn |= int(v30b_newfields[1]) << (31-22)  # SVi      , bits 16-22
1330             insn |= int(v30b_newfields[2]) << (31-25)  # vf       , bit  25
1331             insn |= 0b10011 << (31-30)  # XO       , bits 26..30
1332             if opcode == 'svstep.':
1333                 insn |= 1 << (31-31)     # Rc=1     , bit 31
1334             log("svstep", bin(insn))
1335             yield ".long 0x%x" % insn
1336         # argh, sv.fcoss etc. need to be done manually
1337         elif v30b_op in ["fcoss", "fcoss."]:
1338             insn = 59 << (31-5)  # opcode 59, bits 0-5
1339             insn |= int(v30b_newfields[0]) << (31-10)  # RT       , bits 6-10
1340             insn |= int(v30b_newfields[1]) << (31-20)  # RB       , bits 16-20
1341             insn |= 0b1000101110 << (31-30)  # XO       , bits 21..30
1342             if opcode == 'fcoss.':
1343                 insn |= 1 << (31-31)     # Rc=1     , bit 31
1344             log("fcoss", bin(insn))
1345             yield ".long 0x%x" % insn
1346         else:
1347             if not v30b_op.endswith('.'):
1348                 v30b_op += rc
1349             yield "%s %s" % (v30b_op, ", ".join(v30b_newfields))
1350         log("new v3.0B fields", v30b_op, v30b_newfields)
1351
1352     def translate(self, lst):
1353         for insn in lst:
1354             yield from self.translate_one(insn)
1355
1356
1357 def macro_subst(macros, txt):
1358     again = True
1359     log("subst", txt, macros)
1360     while again:
1361         again = False
1362         for macro, value in macros.items():
1363             if macro == txt:
1364                 again = True
1365                 replaced = txt.replace(macro, value)
1366                 log("macro", txt, "replaced", replaced, macro, value)
1367                 txt = replaced
1368                 continue
1369             toreplace = '%s.s' % macro
1370             if toreplace == txt:
1371                 again = True
1372                 replaced = txt.replace(toreplace, "%s.s" % value)
1373                 log("macro", txt, "replaced", replaced, toreplace, value)
1374                 txt = replaced
1375                 continue
1376             toreplace = '%s.v' % macro
1377             if toreplace == txt:
1378                 again = True
1379                 replaced = txt.replace(toreplace, "%s.v" % value)
1380                 log("macro", txt, "replaced", replaced, toreplace, value)
1381                 txt = replaced
1382                 continue
1383             toreplace = '(%s)' % macro
1384             if toreplace in txt:
1385                 again = True
1386                 replaced = txt.replace(toreplace, '(%s)' % value)
1387                 log("macro", txt, "replaced", replaced, toreplace, value)
1388                 txt = replaced
1389                 continue
1390     log("    processed", txt)
1391     return txt
1392
1393
1394 def get_ws(line):
1395     # find whitespace
1396     ws = ''
1397     while line:
1398         if not line[0].isspace():
1399             break
1400         ws += line[0]
1401         line = line[1:]
1402     return ws, line
1403
1404
1405 def asm_process():
1406     # get an input file and an output file
1407     args = sys.argv[1:]
1408     if len(args) == 0:
1409         infile = sys.stdin
1410         outfile = sys.stdout
1411         # read the whole lot in advance in case of in-place
1412         lines = list(infile.readlines())
1413     elif len(args) != 2:
1414         print("pysvp64asm [infile | -] [outfile | -]", file=sys.stderr)
1415         exit(0)
1416     else:
1417         if args[0] == '--':
1418             infile = sys.stdin
1419         else:
1420             infile = open(args[0], "r")
1421         # read the whole lot in advance in case of in-place overwrite
1422         lines = list(infile.readlines())
1423
1424         if args[1] == '--':
1425             outfile = sys.stdout
1426         else:
1427             outfile = open(args[1], "w")
1428
1429     # read the line, look for custom insn, process it
1430     macros = {}  # macros which start ".set"
1431     isa = SVP64Asm([])
1432     for line in lines:
1433         op = line.split("#")[0].strip()
1434         # identify macros
1435         if op.startswith(".set"):
1436             macro = op[4:].split(",")
1437             (macro, value) = map(str.strip, macro)
1438             macros[macro] = value
1439         if not op.startswith('sv.') and not op.startswith(tuple(CUSTOM_INSNS)):
1440             outfile.write(line)
1441             continue
1442
1443         (ws, line) = get_ws(line)
1444         lst = isa.translate_one(op, macros)
1445         lst = '; '.join(lst)
1446         outfile.write("%s%s # %s\n" % (ws, lst, op))
1447
1448
1449 if __name__ == '__main__':
1450     lst = ['slw 3, 1, 4',
1451            'extsw 5, 3',
1452            'sv.extsw 5, 3',
1453            'sv.cmpi 5, 1, 3, 2',
1454            'sv.setb 5, 31',
1455            'sv.isel 64.v, 3, 2, 65.v',
1456            'sv.setb/dm=r3/sm=1<<r3 5, 31',
1457            'sv.setb/m=r3 5, 31',
1458            'sv.setb/vec2 5, 31',
1459            'sv.setb/sw=8/ew=16 5, 31',
1460            'sv.extsw./ff=eq 5, 31',
1461            'sv.extsw./satu/sz/dz/sm=r3/dm=r3 5, 31',
1462            'sv.extsw./pr=eq 5.v, 31',
1463            'sv.add. 5.v, 2.v, 1.v',
1464            'sv.add./m=r3 5.v, 2.v, 1.v',
1465            ]
1466     lst += [
1467         'sv.stw 5.v, 4(1.v)',
1468         'sv.ld 5.v, 4(1.v)',
1469         'setvl. 2, 3, 4, 0, 1, 1',
1470         'sv.setvl. 2, 3, 4, 0, 1, 1',
1471     ]
1472     lst = [
1473         "sv.stfsu 0.v, 16(4.v)",
1474     ]
1475     lst = [
1476         "sv.stfsu/els 0.v, 16(4)",
1477     ]
1478     lst = [
1479         'sv.add./mr 5.v, 2.v, 1.v',
1480     ]
1481     macros = {'win2': '50', 'win': '60'}
1482     lst = [
1483         'sv.addi win2.v, win.v, -1',
1484         'sv.add./mrr 5.v, 2.v, 1.v',
1485         #'sv.lhzsh 5.v, 11(9.v), 15',
1486         #'sv.lwzsh 5.v, 11(9.v), 15',
1487         'sv.ffmadds 6.v, 2.v, 4.v, 6.v',
1488     ]
1489     lst = [
1490         #'sv.fmadds 0.v, 8.v, 16.v, 4.v',
1491         #'sv.ffadds 0.v, 8.v, 4.v',
1492         'svremap 11, 0, 1, 2, 3, 2, 1',
1493         'svshape 8, 1, 1, 1, 0',
1494         'svshape 8, 1, 1, 1, 1',
1495     ]
1496     lst = [
1497         #'sv.lfssh 4.v, 11(8.v), 15',
1498         #'sv.lwzsh 4.v, 11(8.v), 15',
1499         #'sv.svstep. 2.v, 4, 0',
1500         #'sv.fcfids. 48.v, 64.v',
1501         'sv.fcoss. 80.v, 0.v',
1502         'sv.fcoss. 20.v, 0.v',
1503     ]
1504     lst = [
1505         'sv.bc/all 3,12,192',
1506         'sv.bclr/vsbi 3,81.v,192',
1507         'sv.ld 5.v, 4(1.v)',
1508         'sv.svstep. 2.v, 4, 0',
1509     ]
1510     lst = [
1511         'maxs 3,12,5',
1512         'maxs. 3,12,5',
1513         'avgadd 3,12,5',
1514         'absdu 3,12,5',
1515         'absds 3,12,5',
1516         'absdacu 3,12,5',
1517         'absdacs 3,12,5',
1518         'cprop 3,12,5',
1519         'svindex 0,0,1,0,0,0,0',
1520     ]
1521     lst = [
1522         'sv.svstep./m=r3 2.v, 4, 0',
1523         'ternlogi 0,0,0,0x5',
1524         'fmvis 5,65535',
1525         'fmvis 5,1',
1526         'fmvis 5,2',
1527         'fmvis 5,4',
1528         'fmvis 5,8',
1529         'fmvis 5,16',
1530         'fmvis 5,32',
1531         'fmvis 5,64',
1532         'fmvis 5,32768',
1533     ]
1534     lst = [
1535         'sv.andi. *80, *80, 1',
1536         'sv.ffmadds. 6.v, 2.v, 4.v, 6.v',  # incorrectly inserted 32-bit op
1537         'sv.ffmadds 6.v, 2.v, 4.v, 6.v',  # correctly converted to .long
1538     ]
1539     isa = SVP64Asm(lst, macros=macros)
1540     log("list:\n", "\n\t".join(list(isa)))
1541     # running svp64.py is designed to test hard-coded lists
1542     # (above) - which strictly speaking should all be unit tests.
1543     # if you need to actually do assembler translation at the
1544     # commandline use "pysvp64asm" - see setup.py
1545     # XXX NO. asm_process()