src/openpower/sv/trans/svp64.py

   1 # SPDX-License-Identifier: LGPLv3+
   2 # Copyright (C) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
   3 # Funded by NLnet http://nlnet.nl
   4
   5 """SVP64 OpenPOWER v3.0B assembly translator
   6
   7 This class takes raw svp64 assembly mnemonics (aliases excluded) and creates
   8 an EXT001-encoded "svp64 prefix" (as a .long) followed by a v3.0B opcode.
   9
  10 It is very simple and straightforward, the only weirdness being the
  11 extraction of the register information and conversion to v3.0B numbering.
  12
  13 Encoding format of svp64: https://libre-soc.org/openpower/sv/svp64/
  14 Encoding format of arithmetic: https://libre-soc.org/openpower/sv/normal/
  15 Encoding format of LDST: https://libre-soc.org/openpower/sv/ldst/
  16 **TODO format of branches: https://libre-soc.org/openpower/sv/branches/**
  17 **TODO format of CRs: https://libre-soc.org/openpower/sv/cr_ops/**
  18 Bugtracker: https://bugs.libre-soc.org/show_bug.cgi?id=578
  19 """
  20
  21 import functools
  22 import os
  23 import sys
  24 from collections import OrderedDict
  25 import inspect
  26
  27 from openpower.decoder.pseudo.pagereader import ISA
  28 from openpower.decoder.power_svp64 import SVP64RM, get_regtype, decode_extra
  29 from openpower.decoder.selectable_int import SelectableInt
  30 from openpower.consts import SVP64MODE
  31 from openpower.decoder.power_insn import SVP64Instruction
  32 from openpower.decoder.power_insn import Database
  33 from openpower.decoder.power_enums import find_wiki_dir
  34
  35 # for debug logging
  36 from openpower.util import log
  37
  38
  39 def instruction(*fields):
  40     def instruction(insn, desc):
  41         (value, start, end) = desc
  42         bits = ((1,) * ((end + 1) - start))
  43         mask = 0
  44         for bit in bits:
  45             mask = ((mask << 1) | bit)
  46         return (insn | ((value & mask) << (31 - end)))
  47
  48     return functools.reduce(instruction, fields, 0)
  49
  50
  51 CUSTOM_INSNS = {}
  52
  53
  54 def _insn(name, **kwargs):
  55     return name, kwargs
  56
  57
  58 def _custom_insns(*insns):
  59     """ a decorator that adds the function to `CUSTOM_INSNS` """
  60
  61     def decorator(fn):
  62         FIELDS_ARG = object()
  63         if len(insns) == 0:
  64             insns_ = (fn.__name__, {}),
  65         else:
  66             insns_ = insns
  67         for name, kwargs in insns_:
  68             if not isinstance(name, str):
  69                 raise TypeError("instruction name must be a str: {name!r}")
  70             if name in CUSTOM_INSNS:
  71                 raise ValueError(f"duplicate instruction mnemonic: {name!r}")
  72             # use getcallargs to check that arguments work:
  73             inspect.getcallargs(fn, FIELDS_ARG, **kwargs)
  74             CUSTOM_INSNS[name] = functools.partial(fn, **kwargs)
  75         return fn
  76     return decorator
  77
  78
  79 @_custom_insns(
  80     _insn("setvl", Rc=0),
  81     _insn("setvl.", Rc=1),
  82 )
  83 def setvl(fields, Rc):
  84     """
  85     setvl is a *32-bit-only* instruction. It controls SVSTATE.
  86     It is *not* a 64-bit-prefixed Vector instruction (no sv.setvl, yet),
  87     it is a Vector *control* instruction.
  88
  89     * setvl RT,RA,SVi,vf,vs,ms
  90
  91     1.6.28 SVL-FORM - from fields.txt
  92     |0     |6    |11    |16   |23 |24 |25 |26    |31 |
  93     | PO   |  RT |   RA | SVi |ms |vs |vf |   XO |Rc |
  94     """
  95     PO = 22
  96     XO = 0b11011
  97     # ARRRGH these are in a non-obvious order in openpower/isa/simplev.mdwn
  98     # compared to the SVL-Form above. sigh
  99     # setvl RT,RA,SVi,vf,vs,ms
 100     (RT, RA, SVi, vf, vs, ms) = fields
 101     SVi -= 1
 102     return instruction(
 103         (PO, 0, 5),
 104         (RT, 6, 10),
 105         (RA, 11, 15),
 106         (SVi, 16, 22),
 107         (ms, 23, 23),
 108         (vs, 24, 24),
 109         (vf, 25, 25),
 110         (XO, 26, 30),
 111         (Rc, 31, 31),
 112     )
 113
 114
 115 @_custom_insns(
 116     _insn("svstep", Rc=0),
 117     _insn("svstep.", Rc=1),
 118 )
 119 def svstep(fields, Rc):
 120     """
 121     svstep is a 32-bit instruction. It updates SVSTATE.
 122     It *can* be SVP64-prefixed, to indicate that its registers
 123     are Vectorised.
 124
 125     * svstep RT,SVi,vf
 126
 127     # 1.6.28 SVL-FORM - from fields.txt
 128     # |0     |6    |11    |16   |23 |24 |25 |26    |31 |
 129     # | PO   |  RT | /    | SVi |/  |/  |vf |   XO |Rc |
 130
 131     """
 132     PO = 22
 133     XO = 0b10011
 134     (RT, SVi, vf) = fields
 135     SVi -= 1
 136     return instruction(
 137         (PO, 0, 5),
 138         (RT, 6, 10),
 139         (0, 11, 15),
 140         (SVi, 16, 22),
 141         (0, 23, 23),
 142         (0, 24, 24),
 143         (vf, 25, 25),
 144         (XO, 26, 30),
 145         (Rc, 31, 31),
 146     )
 147
 148
 149 @_custom_insns()
 150 def svshape(fields):
 151     """
 152     svshape is a *32-bit-only* instruction. It updates SVSHAPE and SVSTATE.
 153     It is *not* a 64-bit-prefixed Vector instruction (no sv.svshape, yet),
 154     it is a Vector *control* instruction.
 155
 156     * svshape SVxd,SVyd,SVzd,SVrm,vf
 157
 158     # 1.6.33 SVM-FORM from fields.txt
 159     # |0     |6        |11      |16    |21    |25 |26    |31  |
 160     # | PO   |  SVxd   |   SVyd | SVzd | SVrm |vf |   XO      |
 161
 162     note that SVrm is not permitted to be 0b0111, 0b1000 or 0b1001.
 163     0b0111 is reserved and 0b100- is for svshape2
 164
 165     """
 166     PO = 22
 167     XO = 0b011001
 168     (SVxd, SVyd, SVzd, SVrm, vf) = fields
 169     SVxd -= 1
 170     SVyd -= 1
 171     SVzd -= 1
 172
 173     # check SVrm for reserved (and svshape2) values
 174     assert SVrm not in [0b0111, 0b1000, 0b1001],
 175             "svshape reserved SVrm value %s" % bin(SVrm)
 176
 177     return instruction(
 178         (PO, 0, 5),
 179         (SVxd, 6, 10),
 180         (SVyd, 11, 15),
 181         (SVzd, 16, 20),
 182         (SVrm, 21, 24),
 183         (vf, 25, 25),
 184         (XO, 26, 31),
 185     )
 186
 187
 188 @_custom_insns()
 189 def svindex(fields):
 190     """
 191     svindex is a *32-bit-only* instruction. It is a convenience
 192     instruction that reduces instruction count for Indexed REMAP
 193     Mode.
 194     It is *not* a 64-bit-prefixed Vector instruction (no sv.svindex, yet),
 195     it is a Vector *control* instruction.
 196
 197     1.6.28 SVI-FORM
 198       |0     |6    |11    |16   |21 |23|24|25|26    31|
 199       | PO   |  SVG|rmm   | SVd |ew |yx|mm|sk|   XO   |
 200     """
 201     # note that the dimension field one subtracted
 202     PO = 22
 203     XO = 0b101001
 204     (SVG, rmm, SVd, ew, yx, mm, sk) = fields
 205     SVd -= 1
 206     return instruction(
 207         (PO, 0, 5),
 208         (SVG, 6, 10),
 209         (rmm, 11, 15),
 210         (SVd, 16, 20),
 211         (ew, 21, 22),
 212         (yx, 23, 23),
 213         (mm, 24, 24),
 214         (sk, 25, 25),
 215         (XO, 26, 31),
 216     )
 217
 218
 219 @_custom_insns()
 220 def svremap(fields):
 221     """
 222     this is a *32-bit-only* instruction. It updates the SVSHAPE SPR
 223     it is *not* a 64-bit-prefixed Vector instruction (no sv.svremap),
 224     it is a Vector *control* instruction.
 225
 226     * svremap SVme,mi0,mi1,mi2,mo0,mo1,pst
 227
 228     # 1.6.34 SVRM-FORM
 229        |0     |6     |11  |13   |15   |17   |19   |21  |22   |26     |31 |
 230        | PO   | SVme |mi0 | mi1 | mi2 | mo0 | mo1 |pst |///  | XO        |
 231
 232     """
 233     PO = 22
 234     XO = 0b111001
 235     (SVme, mi0, mi1, mi2, mo0, mo1, pst) = fields
 236     return instruction(
 237         (PO, 0, 5),
 238         (SVme, 6, 10),
 239         (mi0, 11, 12),
 240         (mi1, 13, 14),
 241         (mi2, 15, 16),
 242         (mo0, 17, 18),
 243         (mo1, 19, 20),
 244         (pst, 21, 21),
 245         (0, 22, 25),
 246         (XO, 26, 31),
 247     )
 248
 249
 250 # ok from here-on down these are added as 32-bit instructions
 251 # and are here only because binutils (at present) doesn't have
 252 # them (that's being fixed!)
 253 # they can - if implementations then choose - be Vectorised
 254 # because they are general-purpose scalar instructions
 255 @_custom_insns()
 256 def bmask(fields):
 257     """
 258     1.6.2.2 BM2-FORM
 259     |0     |6    |11    |16    |21   |26 |27    31|
 260     | PO   |  RT |   RA |   RB |bm   |L  |   XO   |
 261     """
 262     PO = 22
 263     XO = 0b010001
 264     (RT, RA, RB, bm, L) = fields
 265     return instruction(
 266         (PO, 0, 5),
 267         (RT, 6, 10),
 268         (RA, 11, 15),
 269         (RB, 16, 20),
 270         (bm, 21, 25),
 271         (L, 26, 26),
 272         (XO, 27, 31),
 273     )
 274
 275
 276 @_custom_insns(
 277     _insn("fsins", Rc=0),
 278     _insn("fsins.", Rc=1),
 279 )
 280 def fsins(fields, Rc):
 281     # XXX WARNING THESE ARE NOT APPROVED BY OPF ISA WG
 282     # however we are out of space with opcode 22
 283     # 1.6.7 X-FORM
 284     # |0     |6 |7|8|9  |10  |11|12|13  |15|16|17     |20|21    |31  |
 285     # | PO   |   FRT         |     ///     |   FRB       |   XO |Rc  |
 286     PO = 59
 287     XO = 0b1000001110
 288     (FRT, FRB) = fields
 289     return instruction(
 290         (PO, 0, 5),
 291         (FRT, 6, 10),
 292         (0, 11, 15),
 293         (FRB, 16, 20),
 294         (XO, 21, 30),
 295         (Rc, 31, 31),
 296     )
 297
 298
 299 @_custom_insns(
 300     _insn("fcoss", Rc=0),
 301     _insn("fcoss.", Rc=1),
 302 )
 303 def fcoss(fields, Rc):
 304     # XXX WARNING THESE ARE NOT APPROVED BY OPF ISA WG
 305     # however we are out of space with opcode 22
 306     # 1.6.7 X-FORM
 307     # |0     |6 |7|8|9  |10  |11|12|13  |15|16|17     |20|21    |31  |
 308     # | PO   |   FRT         |     ///     |   FRB       |   XO |Rc  |
 309     PO = 59
 310     XO = 0b1000101110
 311     (FRT, FRB) = fields
 312     return instruction(
 313         (PO, 0, 5),
 314         (FRT, 6, 10),
 315         (0, 11, 15),
 316         (FRB, 16, 20),
 317         (XO, 21, 30),
 318         (Rc, 31, 31),
 319     )
 320
 321
 322 @_custom_insns(
 323     _insn("ternlogi", Rc=0),
 324     _insn("ternlogi.", Rc=1),
 325 )
 326 def ternlogi(fields, Rc):
 327     # XXX WARNING THESE ARE NOT APPROVED BY OPF ISA WG
 328     # however we are out of space with opcode 22
 329     # 1.6.34 TLI-FORM
 330     #   |0   |6   |11   |16   |21   |29  |31 |
 331     #   | PO | RT |  RA |  RB | TLI | XO |Rc |
 332     PO = 5
 333     XO = 0
 334     (RT, RA, RB, TLI) = fields
 335     return instruction(
 336         (PO, 0, 5),
 337         (RT, 6, 10),
 338         (RA, 11, 15),
 339         (RB, 16, 20),
 340         (TLI, 21, 28),
 341         (XO, 29, 30),
 342         (Rc, 31, 31),
 343     )
 344
 345
 346 @_custom_insns(
 347     _insn("grev", Rc=0, imm=0, word=0),
 348     _insn("grevw", Rc=0, imm=0, word=1),
 349     _insn("grevi", Rc=0, imm=1, word=0),
 350     _insn("grevwi", Rc=0, imm=1, word=1),
 351     _insn("grev.", Rc=1, imm=0, word=0),
 352     _insn("grevw.", Rc=1, imm=0, word=1),
 353     _insn("grevi.", Rc=1, imm=1, word=0),
 354     _insn("grevwi.", Rc=1, imm=1, word=1),
 355 )
 356 def grev(fields, Rc, imm, word):
 357     # XXX WARNING THESE ARE NOT APPROVED BY OPF ISA WG
 358     # however we are out of space with opcode 22
 359     insn = PO = 5
 360     # _ matches fields in table at:
 361     # https://libre-soc.org/openpower/sv/bitmanip/
 362     XO = 0b1_0010_110
 363     if word:
 364         XO |= 0b100_000
 365     if imm:
 366         XO |= 0b1000_000
 367     (RT, RA, XBI) = fields
 368     insn = (insn << 5) | RT
 369     insn = (insn << 5) | RA
 370     if imm and not word:
 371         assert 0 <= XBI < 64
 372         insn = (insn << 6) | XBI
 373         insn = (insn << 9) | XO
 374     else:
 375         assert 0 <= XBI < 32
 376         insn = (insn << 5) | XBI
 377         insn = (insn << 10) | XO
 378     insn = (insn << 1) | Rc
 379     return insn
 380
 381
 382 @_custom_insns(
 383     _insn("maxs", XO=0b0111001110, Rc=0),
 384     _insn("maxs.", XO=0b0111001110, Rc=1),
 385     _insn("maxu", XO=0b0011001110, Rc=0),
 386     _insn("maxu.", XO=0b0011001110, Rc=1),
 387     _insn("minu", XO=0b0001001110, Rc=0),
 388     _insn("minu.", XO=0b0001001110, Rc=1),
 389     _insn("mins", XO=0b0101001110, Rc=0),
 390     _insn("mins.", XO=0b0101001110, Rc=1),
 391     _insn("absdu", XO=0b1011110110, Rc=0),
 392     _insn("absdu.", XO=0b1011110110, Rc=1),
 393     _insn("absds", XO=0b1001110110, Rc=0),
 394     _insn("absds.", XO=0b1001110110, Rc=1),
 395     _insn("avgadd", XO=0b1101001110, Rc=0),
 396     _insn("avgadd.", XO=0b1101001110, Rc=1),
 397     _insn("absdacu", XO=0b1111110110, Rc=0),
 398     _insn("absdacu.", XO=0b1111110110, Rc=1),
 399     _insn("absdacs", XO=0b0111110110, Rc=0),
 400     _insn("absdacs.", XO=0b0111110110, Rc=1),
 401     _insn("cprop", XO=0b0110001110, Rc=0),
 402     _insn("cprop.", XO=0b0110001110, Rc=1),
 403 )
 404 def av(fields, XO, Rc):
 405     # 1.6.7 X-FORM
 406     #   |0     |6 |7|8|9  |10  |11|12|13  |15|16|17     |20|21    |31  |
 407     #   | PO   |       RT      |    RA       |    RB       |   XO |Rc  |
 408     PO = 22
 409     (RT, RA, RB) = fields
 410     return instruction(
 411         (PO, 0, 5),
 412         (RT, 6, 10),
 413         (RA, 11, 15),
 414         (RB, 16, 20),
 415         (XO, 21, 30),
 416         (Rc, 31, 31),
 417     )
 418
 419
 420 @_custom_insns()
 421 def fmvis(fields):
 422     # XXX WARNING THESE ARE NOT APPROVED BY OPF ISA WG
 423     # V3.0B 1.6.6 DX-FORM
 424     # |0     |6 |7|8|9  |10  |11|12|13  |15|16|17     |26|27    |31  |
 425     # | PO   |   FRS         |     d1      |      d0  |      XO |d2  |
 426     PO = 22
 427     XO = 0b00011
 428     (FRS, imm) = fields
 429     # first split imm into d1, d0 and d2. sigh
 430     d2 = (imm & 1)  # LSB (0)
 431     d1 = (imm >> 1) & 0b11111  # bits 1-5
 432     d0 = (imm >> 6)  # MSBs 6-15
 433     return instruction(
 434         (PO, 0, 5),
 435         (FRS, 6, 10),
 436         (d1,  11, 15),
 437         (d0,  16, 25),
 438         (XO, 26, 30),
 439         (d2, 31, 31),
 440     )
 441
 442
 443 @_custom_insns()
 444 def fishmv(fields):
 445     # XXX WARNING THESE ARE NOT APPROVED BY OPF ISA WG
 446     # V3.0B 1.6.6 DX-FORM
 447     # |0     |6 |7|8|9  |10  |11|12|13  |15|16|17     |26|27   |31  |
 448     # | PO   |   FRS         |     d1      |      d0  |     XO |d2  |
 449     PO = 22
 450     XO = 0b01011
 451     (FRS, imm) = fields
 452     # first split imm into d1, d0 and d2. sigh
 453     d2 = (imm & 1)  # LSB (0)
 454     d1 = (imm >> 1) & 0b11111  # bits 1-5
 455     d0 = (imm >> 6)  # MSBs 6-15
 456     return instruction(
 457         (PO, 0, 5),
 458         (FRS, 6, 10),
 459         (d1,  11, 15),
 460         (d0,  16, 25),
 461         (XO, 26, 30),
 462         (d2, 31, 31),
 463     )
 464
 465
 466 # decode GPR into sv extra
 467 def get_extra_gpr(etype, regmode, field):
 468     if regmode == 'scalar':
 469         # cut into 2-bits 5-bits SS FFFFF
 470         sv_extra = field >> 5
 471         field = field & 0b11111
 472     else:
 473         # cut into 5-bits 2-bits FFFFF SS
 474         sv_extra = field & 0b11
 475         field = field >> 2
 476     return sv_extra, field
 477
 478
 479 # decode 3-bit CR into sv extra
 480 def get_extra_cr_3bit(etype, regmode, field):
 481     if regmode == 'scalar':
 482         # cut into 2-bits 3-bits SS FFF
 483         sv_extra = field >> 3
 484         field = field & 0b111
 485     else:
 486         # cut into 3-bits 4-bits FFF SSSS but will cut 2 zeros off later
 487         sv_extra = field & 0b1111
 488         field = field >> 4
 489     return sv_extra, field
 490
 491
 492 # decodes SUBVL
 493 def decode_subvl(encoding):
 494     pmap = {'2': 0b01, '3': 0b10, '4': 0b11}
 495     assert encoding in pmap, \
 496         "encoding %s for SUBVL not recognised" % encoding
 497     return pmap[encoding]
 498
 499
 500 # decodes elwidth
 501 def decode_elwidth(encoding):
 502     pmap = {'8': 0b11, '16': 0b10, '32': 0b01}
 503     assert encoding in pmap, \
 504         "encoding %s for elwidth not recognised" % encoding
 505     return pmap[encoding]
 506
 507
 508 # decodes predicate register encoding
 509 def decode_predicate(encoding):
 510     pmap = {  # integer
 511         '1<<r3': (0, 0b001),
 512         'r3': (0, 0b010),
 513         '~r3': (0, 0b011),
 514         'r10': (0, 0b100),
 515         '~r10': (0, 0b101),
 516         'r30': (0, 0b110),
 517         '~r30': (0, 0b111),
 518         # CR
 519         'lt': (1, 0b000),
 520         'nl': (1, 0b001), 'ge': (1, 0b001),  # same value
 521         'gt': (1, 0b010),
 522         'ng': (1, 0b011), 'le': (1, 0b011),  # same value
 523         'eq': (1, 0b100),
 524         'ne': (1, 0b101),
 525         'so': (1, 0b110), 'un': (1, 0b110),  # same value
 526         'ns': (1, 0b111), 'nu': (1, 0b111),  # same value
 527     }
 528     assert encoding in pmap, \
 529         "encoding %s for predicate not recognised" % encoding
 530     return pmap[encoding]
 531
 532
 533 # decodes "Mode" in similar way to BO field (supposed to, anyway)
 534 def decode_bo(encoding):
 535     pmap = {  # TODO: double-check that these are the same as Branch BO
 536         'lt': 0b000,
 537         'nl': 0b001, 'ge': 0b001,  # same value
 538         'gt': 0b010,
 539         'ng': 0b011, 'le': 0b011,  # same value
 540         'eq': 0b100,
 541         'ne': 0b101,
 542         'so': 0b110, 'un': 0b110,  # same value
 543         'ns': 0b111, 'nu': 0b111,  # same value
 544     }
 545     assert encoding in pmap, \
 546         "encoding %s for BO Mode not recognised" % encoding
 547     return pmap[encoding]
 548
 549
 550 # partial-decode fail-first mode
 551 def decode_ffirst(encoding):
 552     if encoding in ['RC1', '~RC1']:
 553         return encoding
 554     return decode_bo(encoding)
 555
 556
 557 def decode_reg(field, macros=None):
 558     if macros is None:
 559         macros = {}
 560     # decode the field number. "5.v" or "3.s" or "9"
 561     # and now also "*0", and "*%0".  note: *NOT* to add "*%rNNN" etc.
 562     # https://bugs.libre-soc.org/show_bug.cgi?id=884#c0
 563     if field.startswith(("*%", "*")):
 564         if field.startswith("*%"):
 565             field = field[2:]
 566         else:
 567             field = field[1:]
 568         while field in macros:
 569             field = macros[field]
 570         return int(field), "vector"  # actual register number
 571
 572     # try old convention (to be retired)
 573     field = field.split(".")
 574     regmode = 'scalar'  # default
 575     if len(field) == 2:
 576         if field[1] == 's':
 577             regmode = 'scalar'
 578         elif field[1] == 'v':
 579             regmode = 'vector'
 580     field = int(field[0])  # actual register number
 581     return field, regmode
 582
 583
 584 def decode_imm(field):
 585     ldst_imm = "(" in field and field[-1] == ')'
 586     if ldst_imm:
 587         return field[:-1].split("(")
 588     else:
 589         return None, field
 590
 591
 592 def crf_extra(etype, regmode, field, extras):
 593     """takes a CR Field number (CR0-CR127), splits into EXTRA2/3 and v3.0
 594     the scalar/vector mode (crNN.v or crNN.s) changes both the format
 595     of the EXTRA2/3 encoding as well as what range of registers is possible.
 596     this function can be used for both BF/BFA and BA/BB/BT by first removing
 597     the bottom 2 bits of BA/BB/BT then re-instating them after encoding.
 598     see https://libre-soc.org/openpower/sv/svp64/appendix/#cr_extra
 599     for specification
 600     """
 601     sv_extra, field = get_extra_cr_3bit(etype, regmode, field)
 602     # now sanity-check (and shrink afterwards)
 603     if etype == 'EXTRA2':
 604         # 3-bit CR Field (BF, BFA) EXTRA2 encoding
 605         if regmode == 'scalar':
 606             # range is CR0-CR15 in increments of 1
 607             assert (sv_extra >> 1) == 0, \
 608                 "scalar CR %s cannot fit into EXTRA2 %s" % \
 609                 (rname, str(extras[extra_idx]))
 610             # all good: encode as scalar
 611             sv_extra = sv_extra & 0b01
 612         else:  # vector
 613             # range is CR0-CR127 in increments of 16
 614             assert sv_extra & 0b111 == 0, \
 615                 "vector CR %s cannot fit into EXTRA2 %s" % \
 616                 (rname, str(extras[extra_idx]))
 617             # all good: encode as vector (bit 2 set)
 618             sv_extra = 0b10 | (sv_extra >> 3)
 619     else:
 620         # 3-bit CR Field (BF, BFA) EXTRA3 encoding
 621         if regmode == 'scalar':
 622             # range is CR0-CR31 in increments of 1
 623             assert (sv_extra >> 2) == 0, \
 624                 "scalar CR %s cannot fit into EXTRA3 %s" % \
 625                 (rname, str(extras[extra_idx]))
 626             # all good: encode as scalar
 627             sv_extra = sv_extra & 0b11
 628         else:  # vector
 629             # range is CR0-CR127 in increments of 8
 630             assert sv_extra & 0b11 == 0, \
 631                 "vector CR %s cannot fit into EXTRA3 %s" % \
 632                 (rname, str(extras[extra_idx]))
 633             # all good: encode as vector (bit 3 set)
 634             sv_extra = 0b100 | (sv_extra >> 2)
 635     return sv_extra, field
 636
 637
 638 def to_number(field):
 639     if field.startswith("0x"):
 640         return eval(field)
 641     if field.startswith("0b"):
 642         return eval(field)
 643     return int(field)
 644
 645
 646 # decodes svp64 assembly listings and creates EXT001 svp64 prefixes
 647 class SVP64Asm:
 648     def __init__(self, lst, bigendian=False, macros=None):
 649         if macros is None:
 650             macros = {}
 651         self.macros = macros
 652         self.lst = lst
 653         self.trans = self.translate(lst)
 654         self.isa = ISA()  # reads the v3.0B pseudo-code markdown files
 655         self.svp64 = SVP64RM()  # reads the svp64 Remap entries for registers
 656         assert bigendian == False, "error, bigendian not supported yet"
 657
 658     def __iter__(self):
 659         yield from self.trans
 660
 661     def translate_one(self, insn, macros=None):
 662         if macros is None:
 663             macros = {}
 664         macros.update(self.macros)
 665         isa = self.isa
 666         svp64 = self.svp64
 667         insn_no_comments = insn.partition('#')[0]
 668         # find first space, to get opcode
 669         ls = insn_no_comments.split(' ')
 670         opcode = ls[0]
 671         # now find opcode fields
 672         fields = ''.join(ls[1:]).split(',')
 673         mfields = list(map(str.strip, fields))
 674         log("opcode, fields", ls, opcode, mfields)
 675         fields = []
 676         # macro substitution
 677         for field in mfields:
 678             fields.append(macro_subst(macros, field))
 679         log("opcode, fields substed", ls, opcode, fields)
 680
 681         # identify if it is a special instruction
 682         custom_insn_hook = CUSTOM_INSNS.get(opcode)
 683         if custom_insn_hook is not None:
 684             fields = tuple(map(to_number, fields))
 685             insn_num = custom_insn_hook(fields)
 686             log(opcode, bin(insn_num))
 687             yield ".long 0x%X # %s" % (insn_num, insn)
 688             return
 689
 690         # identify if is a svp64 mnemonic
 691         if not opcode.startswith('sv.'):
 692             yield insn  # unaltered
 693             return
 694         opcode = opcode[3:]  # strip leading "sv"
 695
 696         # start working on decoding the svp64 op: sv.basev30Bop/vec2/mode
 697         opmodes = opcode.split("/")  # split at "/"
 698         v30b_op_orig = opmodes.pop(0)    # first is the v3.0B
 699         # check instruction ends with dot
 700         rc_mode = v30b_op_orig.endswith('.')
 701         if rc_mode:
 702             v30b_op = v30b_op_orig[:-1]
 703         else:
 704             v30b_op = v30b_op_orig
 705
 706         # look up the 32-bit op (original, with "." if it has it)
 707         if v30b_op_orig in isa.instr:
 708             isa_instr = isa.instr[v30b_op_orig]
 709         else:
 710             raise Exception("opcode %s of '%s' not supported" %
 711                             (v30b_op_orig, insn))
 712
 713         # look up the svp64 op, first the original (with "." if it has it)
 714         if v30b_op_orig in svp64.instrs:
 715             rm = svp64.instrs[v30b_op_orig]  # one row of the svp64 RM CSV
 716         # then without the "." (if there was one)
 717         elif v30b_op in svp64.instrs:
 718             rm = svp64.instrs[v30b_op]  # one row of the svp64 RM CSV
 719         else:
 720             raise Exception(f"opcode {v30b_op_orig!r} of "
 721                             f"{insn!r} not an svp64 instruction")
 722
 723         # get regs info e.g. "RT,RA,RB"
 724         v30b_regs = isa_instr.regs[0]
 725         log("v3.0B op", v30b_op, "Rc=1" if rc_mode else '')
 726         log("v3.0B regs", opcode, v30b_regs)
 727         log("RM", rm)
 728
 729         # right.  the first thing to do is identify the ordering of
 730         # the registers, by name.  the EXTRA2/3 ordering is in
 731         # rm['0']..rm['3'] but those fields contain the names RA, BB
 732         # etc.  we have to read the pseudocode to understand which
 733         # reg is which in our instruction. sigh.
 734
 735         # first turn the svp64 rm into a "by name" dict, recording
 736         # which position in the RM EXTRA it goes into
 737         # also: record if the src or dest was a CR, for sanity-checking
 738         # (elwidth overrides on CRs are banned)
 739         decode = decode_extra(rm)
 740         dest_reg_cr, src_reg_cr, svp64_src, svp64_dest = decode
 741
 742         log("EXTRA field index, src", svp64_src)
 743         log("EXTRA field index, dest", svp64_dest)
 744
 745         # okaaay now we identify the field value (opcode N,N,N) with
 746         # the pseudo-code info (opcode RT, RA, RB)
 747         assert len(fields) == len(v30b_regs), \
 748             "length of fields %s must match insn `%s` fields %s" % \
 749             (str(v30b_regs), insn, str(fields))
 750         opregfields = zip(fields, v30b_regs)  # err that was easy
 751
 752         # now for each of those find its place in the EXTRA encoding
 753         # note there is the possibility (for LD/ST-with-update) of
 754         # RA occurring **TWICE**.  to avoid it getting added to the
 755         # v3.0B suffix twice, we spot it as a duplicate, here
 756         extras = OrderedDict()
 757         for idx, (field, regname) in enumerate(opregfields):
 758             imm, regname = decode_imm(regname)
 759             rtype = get_regtype(regname)
 760             log("    idx find", rtype, idx, field, regname, imm)
 761             if rtype is None:
 762                 # probably an immediate field, append it straight
 763                 extras[('imm', idx, False)] = (idx, field, None, None, None)
 764                 continue
 765             extra = svp64_src.get(regname, None)
 766             if extra is not None:
 767                 extra = ('s', extra, False)  # not a duplicate
 768                 extras[extra] = (idx, field, regname, rtype, imm)
 769                 log("    idx src", idx, extra, extras[extra])
 770             dextra = svp64_dest.get(regname, None)
 771             log("regname in", regname, dextra)
 772             if dextra is not None:
 773                 is_a_duplicate = extra is not None  # duplicate spotted
 774                 dextra = ('d', dextra, is_a_duplicate)
 775                 extras[dextra] = (idx, field, regname, rtype, imm)
 776                 log("    idx dst", idx, extra, extras[dextra])
 777
 778         # great! got the extra fields in their associated positions:
 779         # also we know the register type. now to create the EXTRA encodings
 780         etype = rm['Etype']  # Extra type: EXTRA3/EXTRA2
 781         ptype = rm['Ptype']  # Predication type: Twin / Single
 782         extra_bits = 0
 783         v30b_newfields = []
 784         for extra_idx, (idx, field, rname, rtype, iname) in extras.items():
 785             # is it a field we don't alter/examine?  if so just put it
 786             # into newfields
 787             if rtype is None:
 788                 v30b_newfields.append(field)
 789                 continue
 790
 791             # identify if this is a ld/st immediate(reg) thing
 792             ldst_imm = "(" in field and field[-1] == ')'
 793             if ldst_imm:
 794                 immed, field = field[:-1].split("(")
 795
 796             field, regmode = decode_reg(field, macros=macros)
 797             log("    ", extra_idx, rname, rtype,
 798                 regmode, iname, field, end=" ")
 799
 800             # see Mode field https://libre-soc.org/openpower/sv/svp64/
 801             # XXX TODO: the following is a bit of a laborious repeated
 802             # mess, which could (and should) easily be parameterised.
 803             # XXX also TODO: the LD/ST modes which are different
 804             # https://libre-soc.org/openpower/sv/ldst/
 805
 806             # rright.  SVP64 register numbering is from 0 to 127
 807             # for GPRs, FPRs *and* CR Fields, where for v3.0 the GPRs and RPFs
 808             # are 0-31 and CR Fields are only 0-7.  the SVP64 RM "Extra"
 809             # area is used to extend the numbering from the 32-bit
 810             # instruction, and also to record whether the register
 811             # is scalar or vector. on a per-operand basis.  this
 812             # results in a slightly finnicky encoding: here we go...
 813
 814             # encode SV-GPR and SV-FPR field into extra, v3.0field
 815             if rtype in ['GPR', 'FPR']:
 816                 sv_extra, field = get_extra_gpr(etype, regmode, field)
 817                 # now sanity-check. EXTRA3 is ok, EXTRA2 has limits
 818                 # (and shrink to a single bit if ok)
 819                 if etype == 'EXTRA2':
 820                     if regmode == 'scalar':
 821                         # range is r0-r63 in increments of 1
 822                         assert (sv_extra >> 1) == 0, \
 823                             "scalar GPR %s cannot fit into EXTRA2 %s" % \
 824                             (rname, str(extras[extra_idx]))
 825                         # all good: encode as scalar
 826                         sv_extra = sv_extra & 0b01
 827                     else:
 828                         # range is r0-r127 in increments of 2 (r0 r2 ... r126)
 829                         assert sv_extra & 0b01 == 0, \
 830                             "%s: vector field %s cannot fit " \
 831                             "into EXTRA2 %s" % \
 832                             (insn, rname, str(extras[extra_idx]))
 833                         # all good: encode as vector (bit 2 set)
 834                         sv_extra = 0b10 | (sv_extra >> 1)
 835                 elif regmode == 'vector':
 836                     # EXTRA3 vector bit needs marking
 837                     sv_extra |= 0b100
 838
 839             # encode SV-CR 3-bit field into extra, v3.0field.
 840             # 3-bit is for things like BF and BFA
 841             elif rtype == 'CR_3bit':
 842                 sv_extra, field = crf_extra(etype, regmode, field, extras)
 843
 844             # encode SV-CR 5-bit field into extra, v3.0field
 845             # 5-bit is for things like BA BB BC BT etc.
 846             # *sigh* this is the same as 3-bit except the 2 LSBs of the
 847             # 5-bit field are passed through unaltered.
 848             elif rtype == 'CR_5bit':
 849                 cr_subfield = field & 0b11  # record bottom 2 bits for later
 850                 field = field >> 2         # strip bottom 2 bits
 851                 # use the exact same 3-bit function for the top 3 bits
 852                 sv_extra, field = crf_extra(etype, regmode, field, extras)
 853                 # reconstruct the actual 5-bit CR field (preserving the
 854                 # bottom 2 bits, unaltered)
 855                 field = (field << 2) | cr_subfield
 856
 857             else:
 858                 raise Exception("no type match: %s" % rtype)
 859
 860             # capture the extra field info
 861             log("=>", "%5s" % bin(sv_extra), field)
 862             extras[extra_idx] = sv_extra
 863
 864             # append altered field value to v3.0b, differs for LDST
 865             # note that duplicates are skipped e.g. EXTRA2 contains
 866             # *BOTH* s:RA *AND* d:RA which happens on LD/ST-with-update
 867             srcdest, idx, duplicate = extra_idx
 868             if duplicate:  # skip adding to v3.0b fields, already added
 869                 continue
 870             if ldst_imm:
 871                 v30b_newfields.append(("%s(%s)" % (immed, str(field))))
 872             else:
 873                 v30b_newfields.append(str(field))
 874
 875         log("new v3.0B fields", v30b_op, v30b_newfields)
 876         log("extras", extras)
 877
 878         # rright. now we have all the info. start creating SVP64 instruction.
 879         db = Database(find_wiki_dir())
 880         svp64_insn = SVP64Instruction.pair(prefix=0, suffix=0)
 881         svp64_prefix = svp64_insn.prefix
 882         svp64_rm = svp64_insn.prefix.rm
 883
 884         # begin with EXTRA fields
 885         for idx, sv_extra in extras.items():
 886             log(idx)
 887             if idx is None:
 888                 continue
 889             if idx[0] == 'imm':
 890                 continue
 891             srcdest, idx, duplicate = idx
 892             if etype == 'EXTRA2':
 893                 svp64_rm.extra2[idx] = sv_extra
 894             else:
 895                 svp64_rm.extra3[idx] = sv_extra
 896
 897         # identify if the op is a LD/ST. the "blegh" way. copied
 898         # from power_enums.  TODO, split the list _insns down.
 899         is_ld = v30b_op in [
 900             "lbarx", "lbz", "lbzu", "lbzux", "lbzx",            # load byte
 901             "ld", "ldarx", "ldbrx", "ldu", "ldux", "ldx",       # load double
 902             "lfs", "lfsx", "lfsu", "lfsux",                     # FP load single
 903             "lfd", "lfdx", "lfdu", "lfdux", "lfiwzx", "lfiwax",  # FP load dbl
 904             "lha", "lharx", "lhau", "lhaux", "lhax",            # load half
 905             "lhbrx", "lhz", "lhzu", "lhzux", "lhzx",            # more load half
 906             "lwa", "lwarx", "lwaux", "lwax", "lwbrx",           # load word
 907             "lwz", "lwzcix", "lwzu", "lwzux", "lwzx",           # more load word
 908         ]
 909         is_st = v30b_op in [
 910             "stb", "stbcix", "stbcx", "stbu", "stbux", "stbx",
 911             "std", "stdbrx", "stdcx", "stdu", "stdux", "stdx",
 912             "stfs", "stfsx", "stfsu", "stfux",                  # FP store sgl
 913             "stfd", "stfdx", "stfdu", "stfdux", "stfiwx",       # FP store dbl
 914             "sth", "sthbrx", "sthcx", "sthu", "sthux", "sthx",
 915             "stw", "stwbrx", "stwcx", "stwu", "stwux", "stwx",
 916         ]
 917         # use this to determine if the SVP64 RM format is different.
 918         # see https://libre-soc.org/openpower/sv/ldst/
 919         is_ldst = is_ld or is_st
 920
 921         # branch-conditional detection
 922         is_bc = v30b_op in [
 923             "bc", "bclr",
 924         ]
 925
 926         # parts of svp64_rm
 927         mmode = 0  # bit 0
 928         pmask = 0  # bits 1-3
 929         destwid = 0  # bits 4-5
 930         srcwid = 0  # bits 6-7
 931         subvl = 0   # bits 8-9
 932         smask = 0  # bits 16-18 but only for twin-predication
 933         mode = 0  # bits 19-23
 934
 935         mask_m_specified = False
 936         has_pmask = False
 937         has_smask = False
 938
 939         saturation = None
 940         src_zero = 0
 941         dst_zero = 0
 942         sv_mode = None
 943
 944         mapreduce = False
 945         reverse_gear = False
 946         mapreduce_crm = False
 947         mapreduce_svm = False
 948
 949         predresult = False
 950         failfirst = False
 951         ldst_elstride = 0
 952
 953         # branch-conditional bits
 954         bc_all = 0
 955         bc_lru = 0
 956         bc_brc = 0
 957         bc_svstep = 0
 958         bc_vsb = 0
 959         bc_vlset = 0
 960         bc_vli = 0
 961         bc_snz = 0
 962
 963         # ok let's start identifying opcode augmentation fields
 964         for encmode in opmodes:
 965             # predicate mask (src and dest)
 966             if encmode.startswith("m="):
 967                 pme = encmode
 968                 pmmode, pmask = decode_predicate(encmode[2:])
 969                 smmode, smask = pmmode, pmask
 970                 mmode = pmmode
 971                 mask_m_specified = True
 972             # predicate mask (dest)
 973             elif encmode.startswith("dm="):
 974                 pme = encmode
 975                 pmmode, pmask = decode_predicate(encmode[3:])
 976                 mmode = pmmode
 977                 has_pmask = True
 978             # predicate mask (src, twin-pred)
 979             elif encmode.startswith("sm="):
 980                 sme = encmode
 981                 smmode, smask = decode_predicate(encmode[3:])
 982                 mmode = smmode
 983                 has_smask = True
 984             # vec2/3/4
 985             elif encmode.startswith("vec"):
 986                 subvl = decode_subvl(encmode[3:])
 987             # elwidth
 988             elif encmode.startswith("ew="):
 989                 destwid = decode_elwidth(encmode[3:])
 990             elif encmode.startswith("sw="):
 991                 srcwid = decode_elwidth(encmode[3:])
 992             # element-strided LD/ST
 993             elif encmode == 'els':
 994                 ldst_elstride = 1
 995             # saturation
 996             elif encmode == 'sats':
 997                 assert sv_mode is None
 998                 saturation = 1
 999                 sv_mode = 0b10
1000             elif encmode == 'satu':
1001                 assert sv_mode is None
1002                 sv_mode = 0b10
1003                 saturation = 0
1004             # predicate zeroing
1005             elif encmode == 'sz':
1006                 src_zero = 1
1007             elif encmode == 'dz':
1008                 dst_zero = 1
1009             # failfirst
1010             elif encmode.startswith("ff="):
1011                 assert sv_mode is None
1012                 sv_mode = 0b01
1013                 failfirst = decode_ffirst(encmode[3:])
1014             # predicate-result, interestingly same as fail-first
1015             elif encmode.startswith("pr="):
1016                 assert sv_mode is None
1017                 sv_mode = 0b11
1018                 predresult = decode_ffirst(encmode[3:])
1019             # map-reduce mode, reverse-gear
1020             elif encmode == 'mrr':
1021                 assert sv_mode is None
1022                 sv_mode = 0b00
1023                 mapreduce = True
1024                 reverse_gear = True
1025             # map-reduce mode
1026             elif encmode == 'mr':
1027                 assert sv_mode is None
1028                 sv_mode = 0b00
1029                 mapreduce = True
1030             elif encmode == 'crm':  # CR on map-reduce
1031                 assert sv_mode is None
1032                 sv_mode = 0b00
1033                 mapreduce_crm = True
1034             elif encmode == 'svm':  # sub-vector mode
1035                 mapreduce_svm = True
1036             elif is_bc:
1037                 if encmode == 'all':
1038                     bc_all = 1
1039                 elif encmode == 'st':  # svstep mode
1040                     bc_step = 1
1041                 elif encmode == 'sr':  # svstep BRc mode
1042                     bc_step = 1
1043                     bc_brc = 1
1044                 elif encmode == 'vs':  # VLSET mode
1045                     bc_vlset = 1
1046                 elif encmode == 'vsi':  # VLSET mode with VLI (VL inclusives)
1047                     bc_vlset = 1
1048                     bc_vli = 1
1049                 elif encmode == 'vsb':  # VLSET mode with VSb
1050                     bc_vlset = 1
1051                     bc_vsb = 1
1052                 elif encmode == 'vsbi':  # VLSET mode with VLI and VSb
1053                     bc_vlset = 1
1054                     bc_vli = 1
1055                     bc_vsb = 1
1056                 elif encmode == 'snz':  # sz (only) already set above
1057                     src_zero = 1
1058                     bc_snz = 1
1059                 elif encmode == 'lu':  # LR update mode
1060                     bc_lru = 1
1061                 else:
1062                     raise AssertionError("unknown encmode %s" % encmode)
1063             else:
1064                 raise AssertionError("unknown encmode %s" % encmode)
1065
1066         if ptype == '2P':
1067             # since m=xx takes precedence (overrides) sm=xx and dm=xx,
1068             # treat them as mutually exclusive
1069             if mask_m_specified:
1070                 assert not has_smask,\
1071                     "cannot have both source-mask and predicate mask"
1072                 assert not has_pmask,\
1073                     "cannot have both dest-mask and predicate mask"
1074             # since the default is INT predication (ALWAYS), if you
1075             # specify one CR mask, you must specify both, to avoid
1076             # mixing INT and CR reg types
1077             if has_pmask and pmmode == 1:
1078                 assert has_smask, \
1079                     "need explicit source-mask in CR twin predication"
1080             if has_smask and smmode == 1:
1081                 assert has_pmask, \
1082                     "need explicit dest-mask in CR twin predication"
1083             # sanity-check that 2Pred mask is same mode
1084             if has_pmask and has_smask:
1085                 assert smmode == pmmode, \
1086                     "predicate masks %s and %s must be same reg type" % \
1087                     (pme, sme)
1088
1089         # sanity-check that twin-predication mask only specified in 2P mode
1090         if ptype == '1P':
1091             assert not has_smask, \
1092                 "source-mask can only be specified on Twin-predicate ops"
1093             assert not has_pmask, \
1094                 "dest-mask can only be specified on Twin-predicate ops"
1095
1096         # construct the mode field, doing sanity-checking along the way
1097         if mapreduce_svm:
1098             assert sv_mode == 0b00, "sub-vector mode in mapreduce only"
1099             assert subvl != 0, "sub-vector mode not possible on SUBVL=1"
1100
1101         if src_zero:
1102             assert has_smask or mask_m_specified, \
1103                 "src zeroing requires a source predicate"
1104         if dst_zero:
1105             assert has_pmask or mask_m_specified, \
1106                 "dest zeroing requires a dest predicate"
1107
1108         # okaaay, so there are 4 different modes, here, which will be
1109         # partly-merged-in: is_ldst is merged in with "normal", but
1110         # is_bc is so different it's done separately.  likewise is_cr
1111         # (when it is done).  here are the maps:
1112
1113         # for "normal" arithmetic: https://libre-soc.org/openpower/sv/normal/
1114         """
1115             | 0-1 |  2  |  3   4  |  description              |
1116             | --- | --- |---------|-------------------------- |
1117             | 00  |   0 |  dz  sz | normal mode                      |
1118             | 00  |   1 | 0  RG   | scalar reduce mode (mapreduce), SUBVL=1 |
1119             | 00  |   1 | 1  /    | parallel reduce mode (mapreduce), SUBVL=1 |
1120             | 00  |   1 | SVM RG  | subvector reduce mode, SUBVL>1   |
1121             | 01  | inv | CR-bit  | Rc=1: ffirst CR sel              |
1122             | 01  | inv | VLi RC1 |  Rc=0: ffirst z/nonz |
1123             | 10  |   N | dz   sz |  sat mode: N=0/1 u/s |
1124             | 11  | inv | CR-bit  |  Rc=1: pred-result CR sel |
1125             | 11  | inv | dz  RC1 |  Rc=0: pred-result z/nonz |
1126         """
1127
1128         # https://libre-soc.org/openpower/sv/ldst/
1129         # for LD/ST-immediate:
1130         """
1131             | 0-1 |  2  |  3   4  |  description               |
1132             | --- | --- |---------|--------------------------- |
1133             | 00  | 0   |  dz els | normal mode                |
1134             | 00  | 1   |  dz shf | shift mode                 |
1135             | 01  | inv | CR-bit  | Rc=1: ffirst CR sel        |
1136             | 01  | inv | els RC1 |  Rc=0: ffirst z/nonz       |
1137             | 10  |   N | dz  els |  sat mode: N=0/1 u/s       |
1138             | 11  | inv | CR-bit  |  Rc=1: pred-result CR sel  |
1139             | 11  | inv | els RC1 |  Rc=0: pred-result z/nonz  |
1140         """
1141
1142         # for LD/ST-indexed (RA+RB):
1143         """
1144             | 0-1 |  2  |  3   4  |  description              |
1145             | --- | --- |---------|-------------------------- |
1146             | 00  | SEA |  dz  sz | normal mode        |
1147             | 01  | SEA | dz sz  | Strided (scalar only source)   |
1148             | 10  |   N | dz   sz |  sat mode: N=0/1 u/s |
1149             | 11  | inv | CR-bit  |  Rc=1: pred-result CR sel |
1150             | 11  | inv | dz  RC1 |  Rc=0: pred-result z/nonz |
1151         """
1152
1153         # and leaving out branches and cr_ops for now because they're
1154         # under development
1155         """ TODO branches and cr_ops
1156         """
1157
1158         # now create mode and (overridden) src/dst widths
1159         # XXX TODO: sanity-check bc modes
1160         if is_bc:
1161             sv_mode = ((bc_svstep << SVP64MODE.MOD2_MSB) |
1162                        (bc_vlset << SVP64MODE.MOD2_LSB) |
1163                        (bc_snz << SVP64MODE.BC_SNZ))
1164             srcwid = (bc_vsb << 1) | bc_lru
1165             destwid = (bc_lru << 1) | bc_all
1166
1167         else:
1168
1169             ######################################
1170             # "normal" mode
1171             if sv_mode is None:
1172                 mode |= src_zero << SVP64MODE.SZ  # predicate zeroing
1173                 mode |= dst_zero << SVP64MODE.DZ  # predicate zeroing
1174                 if is_ldst:
1175                     # TODO: for now, LD/ST-indexed is ignored.
1176                     mode |= ldst_elstride << SVP64MODE.ELS_NORMAL  # el-strided
1177                 else:
1178                     # TODO, reduce and subvector mode
1179                     # 00  1   dz CRM  reduce mode (mapreduce), SUBVL=1
1180                     # 00  1   SVM CRM subvector reduce mode, SUBVL>1
1181                     pass
1182                 sv_mode = 0b00
1183
1184             ######################################
1185             # "mapreduce" modes
1186             elif sv_mode == 0b00:
1187                 mode |= (0b1 << SVP64MODE.REDUCE)  # sets mapreduce
1188                 assert dst_zero == 0, "dest-zero not allowed in mapreduce mode"
1189                 if reverse_gear:
1190                     mode |= (0b1 << SVP64MODE.RG)  # sets Reverse-gear mode
1191                 if mapreduce_crm:
1192                     mode |= (0b1 << SVP64MODE.CRM)  # sets CRM mode
1193                     assert rc_mode, "CRM only allowed when Rc=1"
1194                 # bit of weird encoding to jam zero-pred or SVM mode in.
1195                 # SVM mode can be enabled only when SUBVL=2/3/4 (vec2/3/4)
1196                 if subvl == 0:
1197                     mode |= dst_zero << SVP64MODE.DZ  # predicate zeroing
1198                 elif mapreduce_svm:
1199                     mode |= (0b1 << SVP64MODE.SVM)  # sets SVM mode
1200
1201             ######################################
1202             # "failfirst" modes
1203             elif sv_mode == 0b01:
1204                 assert src_zero == 0, "dest-zero not allowed in failfirst mode"
1205                 if failfirst == 'RC1':
1206                     mode |= (0b1 << SVP64MODE.RC1)  # sets RC1 mode
1207                     mode |= (dst_zero << SVP64MODE.DZ)  # predicate dst-zeroing
1208                     assert rc_mode == False, "ffirst RC1 only ok when Rc=0"
1209                 elif failfirst == '~RC1':
1210                     mode |= (0b1 << SVP64MODE.RC1)  # sets RC1 mode
1211                     mode |= (dst_zero << SVP64MODE.DZ)  # predicate dst-zeroing
1212                     mode |= (0b1 << SVP64MODE.INV)  # ... with inversion
1213                     assert rc_mode == False, "ffirst RC1 only ok when Rc=0"
1214                 else:
1215                     assert dst_zero == 0, "dst-zero not allowed in ffirst BO"
1216                     assert rc_mode, "ffirst BO only possible when Rc=1"
1217                     mode |= (failfirst << SVP64MODE.BO_LSB)  # set BO
1218
1219             ######################################
1220             # "saturation" modes
1221             elif sv_mode == 0b10:
1222                 mode |= src_zero << SVP64MODE.SZ  # predicate zeroing
1223                 mode |= dst_zero << SVP64MODE.DZ  # predicate zeroing
1224                 mode |= (saturation << SVP64MODE.N)  # signed/us saturation
1225
1226             ######################################
1227             # "predicate-result" modes.  err... code-duplication from ffirst
1228             elif sv_mode == 0b11:
1229                 assert src_zero == 0, "dest-zero not allowed in predresult mode"
1230                 if predresult == 'RC1':
1231                     mode |= (0b1 << SVP64MODE.RC1)  # sets RC1 mode
1232                     mode |= (dst_zero << SVP64MODE.DZ)  # predicate dst-zeroing
1233                     assert rc_mode == False, "pr-mode RC1 only ok when Rc=0"
1234                 elif predresult == '~RC1':
1235                     mode |= (0b1 << SVP64MODE.RC1)  # sets RC1 mode
1236                     mode |= (dst_zero << SVP64MODE.DZ)  # predicate dst-zeroing
1237                     mode |= (0b1 << SVP64MODE.INV)  # ... with inversion
1238                     assert rc_mode == False, "pr-mode RC1 only ok when Rc=0"
1239                 else:
1240                     assert dst_zero == 0, "dst-zero not allowed in pr-mode BO"
1241                     assert rc_mode, "pr-mode BO only possible when Rc=1"
1242                     mode |= (predresult << SVP64MODE.BO_LSB)  # set BO
1243
1244         # whewww.... modes all done :)
1245         # now put into svp64_rm
1246         mode |= sv_mode
1247         # mode: bits 19-23
1248         svp64_rm.mode = mode
1249
1250         # put in predicate masks into svp64_rm
1251         if ptype == '2P':
1252             # source pred: bits 16-18
1253             svp64_rm.smask = smask
1254         # mask mode: bit 0
1255         svp64_rm.mmode = mmode
1256         # 1-pred: bits 1-3
1257         svp64_rm.mask = pmask
1258
1259         # and subvl: bits 8-9
1260         svp64_rm.subvl = subvl
1261
1262         # put in elwidths
1263         # srcwid: bits 6-7
1264         svp64_rm.ewsrc = srcwid
1265         # destwid: bits 4-5
1266         svp64_rm.elwidth = destwid
1267
1268         # nice debug printout. (and now for something completely different)
1269         # https://youtu.be/u0WOIwlXE9g?t=146
1270         svp64_rm_value = int(svp64_rm)
1271         log("svp64_rm", hex(svp64_rm_value), bin(svp64_rm_value))
1272         log("    mmode  0    :", bin(mmode))
1273         log("    pmask  1-3  :", bin(pmask))
1274         log("    dstwid 4-5  :", bin(destwid))
1275         log("    srcwid 6-7  :", bin(srcwid))
1276         log("    subvl  8-9  :", bin(subvl))
1277         log("    mode   19-23:", bin(mode))
1278         offs = 2 if etype == 'EXTRA2' else 3  # 2 or 3 bits
1279         for idx, sv_extra in extras.items():
1280             if idx is None:
1281                 continue
1282             if idx[0] == 'imm':
1283                 continue
1284             srcdest, idx, duplicate = idx
1285             start = (10+idx*offs)
1286             end = start + offs-1
1287             log("    extra%d %2d-%2d:" % (idx, start, end),
1288                 bin(sv_extra))
1289         if ptype == '2P':
1290             log("    smask  16-17:", bin(smask))
1291         log()
1292
1293         # update prefix PO and ID (aka PID)
1294         svp64_prefix.po = 0x1
1295         svp64_prefix.id = 0b11
1296
1297         # fiinally yield the svp64 prefix and the thingy.  v3.0b opcode
1298         rc = '.' if rc_mode else ''
1299         yield ".long 0x%08x" % int(svp64_prefix)
1300         log(v30b_op, v30b_newfields)
1301
1302         v30b_op_rc = v30b_op
1303         if not v30b_op.endswith('.'):
1304             v30b_op_rc += rc
1305
1306         # svstep is weird
1307         # FIXME(lkcl): should sv.svstep be like svstep?
1308         if v30b_op_rc in ("svstep", "svstep."):
1309             # compensate for `SVi -= 1` in svstep()
1310             v30b_newfields[1] = str(int(v30b_newfields[1]) + 1)
1311
1312         custom_insn_hook = CUSTOM_INSNS.get(v30b_op_rc)
1313         if custom_insn_hook is not None:
1314             fields = tuple(map(to_number, v30b_newfields))
1315             insn_num = custom_insn_hook(fields)
1316             log(opcode, bin(insn_num))
1317             yield ".long 0x%X # %s" % (insn_num, insn)
1318             return
1319         # argh, sv.fmadds etc. need to be done manually
1320         elif v30b_op == 'ffmadds':
1321             opcode = 59 << (32-6)    # bits 0..6 (MSB0)
1322             opcode |= int(v30b_newfields[0]) << (32-11)  # FRT
1323             opcode |= int(v30b_newfields[1]) << (32-16)  # FRA
1324             opcode |= int(v30b_newfields[2]) << (32-21)  # FRB
1325             opcode |= int(v30b_newfields[3]) << (32-26)  # FRC
1326             opcode |= 0b00101 << (32-31)   # bits 26-30
1327             if rc:
1328                 opcode |= 1  # Rc, bit 31.
1329             yield ".long 0x%x" % opcode
1330         # argh, sv.fdmadds need to be done manually
1331         elif v30b_op == 'fdmadds':
1332             opcode = 59 << (32-6)    # bits 0..6 (MSB0)
1333             opcode |= int(v30b_newfields[0]) << (32-11)  # FRT
1334             opcode |= int(v30b_newfields[1]) << (32-16)  # FRA
1335             opcode |= int(v30b_newfields[2]) << (32-21)  # FRB
1336             opcode |= int(v30b_newfields[3]) << (32-26)  # FRC
1337             opcode |= 0b01111 << (32-31)   # bits 26-30
1338             if rc:
1339                 opcode |= 1  # Rc, bit 31.
1340             yield ".long 0x%x" % opcode
1341         # argh, sv.ffadds etc. need to be done manually
1342         elif v30b_op == 'ffadds':
1343             opcode = 59 << (32-6)    # bits 0..6 (MSB0)
1344             opcode |= int(v30b_newfields[0]) << (32-11)  # FRT
1345             opcode |= int(v30b_newfields[1]) << (32-16)  # FRA
1346             opcode |= int(v30b_newfields[2]) << (32-21)  # FRB
1347             opcode |= 0b01101 << (32-31)   # bits 26-30
1348             if rc:
1349                 opcode |= 1  # Rc, bit 31.
1350             yield ".long 0x%x" % opcode
1351         else:
1352             if not v30b_op.endswith('.'):
1353                 v30b_op += rc
1354             yield "%s %s" % (v30b_op, ", ".join(v30b_newfields))
1355         log("new v3.0B fields", v30b_op, v30b_newfields)
1356
1357     def translate(self, lst):
1358         for insn in lst:
1359             yield from self.translate_one(insn)
1360
1361
1362 def macro_subst(macros, txt):
1363     again = True
1364     log("subst", txt, macros)
1365     while again:
1366         again = False
1367         for macro, value in macros.items():
1368             if macro == txt:
1369                 again = True
1370                 replaced = txt.replace(macro, value)
1371                 log("macro", txt, "replaced", replaced, macro, value)
1372                 txt = replaced
1373                 continue
1374             toreplace = '%s.s' % macro
1375             if toreplace == txt:
1376                 again = True
1377                 replaced = txt.replace(toreplace, "%s.s" % value)
1378                 log("macro", txt, "replaced", replaced, toreplace, value)
1379                 txt = replaced
1380                 continue
1381             toreplace = '%s.v' % macro
1382             if toreplace == txt:
1383                 again = True
1384                 replaced = txt.replace(toreplace, "%s.v" % value)
1385                 log("macro", txt, "replaced", replaced, toreplace, value)
1386                 txt = replaced
1387                 continue
1388             toreplace = '(%s)' % macro
1389             if toreplace in txt:
1390                 again = True
1391                 replaced = txt.replace(toreplace, '(%s)' % value)
1392                 log("macro", txt, "replaced", replaced, toreplace, value)
1393                 txt = replaced
1394                 continue
1395     log("    processed", txt)
1396     return txt
1397
1398
1399 def get_ws(line):
1400     # find whitespace
1401     ws = ''
1402     while line:
1403         if not line[0].isspace():
1404             break
1405         ws += line[0]
1406         line = line[1:]
1407     return ws, line
1408
1409
1410 def asm_process():
1411     # get an input file and an output file
1412     args = sys.argv[1:]
1413     if len(args) == 0:
1414         infile = sys.stdin
1415         outfile = sys.stdout
1416         # read the whole lot in advance in case of in-place
1417         lines = list(infile.readlines())
1418     elif len(args) != 2:
1419         print("pysvp64asm [infile | -] [outfile | -]", file=sys.stderr)
1420         exit(0)
1421     else:
1422         if args[0] == '--':
1423             infile = sys.stdin
1424         else:
1425             infile = open(args[0], "r")
1426         # read the whole lot in advance in case of in-place overwrite
1427         lines = list(infile.readlines())
1428
1429         if args[1] == '--':
1430             outfile = sys.stdout
1431         else:
1432             outfile = open(args[1], "w")
1433
1434     # read the line, look for custom insn, process it
1435     macros = {}  # macros which start ".set"
1436     isa = SVP64Asm([])
1437     for line in lines:
1438         op = line.split("#")[0].strip()
1439         # identify macros
1440         if op.startswith(".set"):
1441             macro = op[4:].split(",")
1442             (macro, value) = map(str.strip, macro)
1443             macros[macro] = value
1444         if not op.startswith('sv.') and not op.startswith(tuple(CUSTOM_INSNS)):
1445             outfile.write(line)
1446             continue
1447
1448         (ws, line) = get_ws(line)
1449         lst = isa.translate_one(op, macros)
1450         lst = '; '.join(lst)
1451         outfile.write("%s%s # %s\n" % (ws, lst, op))
1452
1453
1454 if __name__ == '__main__':
1455     lst = ['slw 3, 1, 4',
1456            'extsw 5, 3',
1457            'sv.extsw 5, 3',
1458            'sv.cmpi 5, 1, 3, 2',
1459            'sv.setb 5, 31',
1460            'sv.isel 64.v, 3, 2, 65.v',
1461            'sv.setb/dm=r3/sm=1<<r3 5, 31',
1462            'sv.setb/m=r3 5, 31',
1463            'sv.setb/vec2 5, 31',
1464            'sv.setb/sw=8/ew=16 5, 31',
1465            'sv.extsw./ff=eq 5, 31',
1466            'sv.extsw./satu/sz/dz/sm=r3/dm=r3 5, 31',
1467            'sv.extsw./pr=eq 5.v, 31',
1468            'sv.add. 5.v, 2.v, 1.v',
1469            'sv.add./m=r3 5.v, 2.v, 1.v',
1470            ]
1471     lst += [
1472         'sv.stw 5.v, 4(1.v)',
1473         'sv.ld 5.v, 4(1.v)',
1474         'setvl. 2, 3, 4, 0, 1, 1',
1475         'sv.setvl. 2, 3, 4, 0, 1, 1',
1476     ]
1477     lst = [
1478         "sv.stfsu 0.v, 16(4.v)",
1479     ]
1480     lst = [
1481         "sv.stfsu/els 0.v, 16(4)",
1482     ]
1483     lst = [
1484         'sv.add./mr 5.v, 2.v, 1.v',
1485     ]
1486     macros = {'win2': '50', 'win': '60'}
1487     lst = [
1488         'sv.addi win2.v, win.v, -1',
1489         'sv.add./mrr 5.v, 2.v, 1.v',
1490         #'sv.lhzsh 5.v, 11(9.v), 15',
1491         #'sv.lwzsh 5.v, 11(9.v), 15',
1492         'sv.ffmadds 6.v, 2.v, 4.v, 6.v',
1493     ]
1494     lst = [
1495         #'sv.fmadds 0.v, 8.v, 16.v, 4.v',
1496         #'sv.ffadds 0.v, 8.v, 4.v',
1497         'svremap 11, 0, 1, 2, 3, 2, 1',
1498         'svshape 8, 1, 1, 1, 0',
1499         'svshape 8, 1, 1, 1, 1',
1500     ]
1501     lst = [
1502         #'sv.lfssh 4.v, 11(8.v), 15',
1503         #'sv.lwzsh 4.v, 11(8.v), 15',
1504         #'sv.svstep. 2.v, 4, 0',
1505         #'sv.fcfids. 48.v, 64.v',
1506         'sv.fcoss. 80.v, 0.v',
1507         'sv.fcoss. 20.v, 0.v',
1508     ]
1509     lst = [
1510         'sv.bc/all 3,12,192',
1511         'sv.bclr/vsbi 3,81.v,192',
1512         'sv.ld 5.v, 4(1.v)',
1513         'sv.svstep. 2.v, 4, 0',
1514     ]
1515     lst = [
1516         'maxs 3,12,5',
1517         'maxs. 3,12,5',
1518         'avgadd 3,12,5',
1519         'absdu 3,12,5',
1520         'absds 3,12,5',
1521         'absdacu 3,12,5',
1522         'absdacs 3,12,5',
1523         'cprop 3,12,5',
1524         'svindex 0,0,1,0,0,0,0',
1525     ]
1526     lst = [
1527         'sv.svstep./m=r3 2.v, 4, 0',
1528         'ternlogi 0,0,0,0x5',
1529         'fmvis 5,65535',
1530         'fmvis 5,1',
1531         'fmvis 5,2',
1532         'fmvis 5,4',
1533         'fmvis 5,8',
1534         'fmvis 5,16',
1535         'fmvis 5,32',
1536         'fmvis 5,64',
1537         'fmvis 5,32768',
1538     ]
1539     lst = [
1540         'sv.andi. *80, *80, 1',
1541         'sv.ffmadds. 6.v, 2.v, 4.v, 6.v',  # incorrectly inserted 32-bit op
1542         'sv.ffmadds 6.v, 2.v, 4.v, 6.v',  # correctly converted to .long
1543     ]
1544     isa = SVP64Asm(lst, macros=macros)
1545     log("list:\n", "\n\t".join(list(isa)))
1546     # running svp64.py is designed to test hard-coded lists
1547     # (above) - which strictly speaking should all be unit tests.
1548     # if you need to actually do assembler translation at the
1549     # commandline use "pysvp64asm" - see setup.py
1550     # XXX NO. asm_process()