src/openpower/sv/trans/svp64.py

   1 # SPDX-License-Identifier: LGPLv3+
   2 # Copyright (C) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
   3 # Funded by NLnet http://nlnet.nl
   4
   5 """SVP64 OpenPOWER v3.0B assembly translator
   6
   7 This class takes raw svp64 assembly mnemonics (aliases excluded) and creates
   8 an EXT001-encoded "svp64 prefix" (as a .long) followed by a v3.0B opcode.
   9
  10 It is very simple and straightforward, the only weirdness being the
  11 extraction of the register information and conversion to v3.0B numbering.
  12
  13 Encoding format of svp64: https://libre-soc.org/openpower/sv/svp64/
  14 Encoding format of arithmetic: https://libre-soc.org/openpower/sv/normal/
  15 Encoding format of LDST: https://libre-soc.org/openpower/sv/ldst/
  16 **TODO format of branches: https://libre-soc.org/openpower/sv/branches/**
  17 **TODO format of CRs: https://libre-soc.org/openpower/sv/cr_ops/**
  18 Bugtracker: https://bugs.libre-soc.org/show_bug.cgi?id=578
  19 """
  20
  21 import functools
  22 import os
  23 import sys
  24 from collections import OrderedDict
  25
  26 from openpower.decoder.isa.caller import (SVP64PrefixFields, SV64P_MAJOR_SIZE,
  27                                           SV64P_PID_SIZE, SVP64RMFields,
  28                                           SVP64RM_EXTRA2_SPEC_SIZE,
  29                                           SVP64RM_EXTRA3_SPEC_SIZE,
  30                                           SVP64RM_MODE_SIZE,
  31                                           SVP64RM_SMASK_SIZE,
  32                                           SVP64RM_MMODE_SIZE,
  33                                           SVP64RM_MASK_SIZE,
  34                                           SVP64RM_SUBVL_SIZE,
  35                                           SVP64RM_EWSRC_SIZE,
  36                                           SVP64RM_ELWIDTH_SIZE)
  37 from openpower.decoder.pseudo.pagereader import ISA
  38 from openpower.decoder.power_svp64 import SVP64RM, get_regtype, decode_extra
  39 from openpower.decoder.selectable_int import SelectableInt
  40 from openpower.consts import SVP64MODE
  41
  42 # for debug logging
  43 from openpower.util import log
  44
  45
  46 def instruction(*fields):
  47     def instruction(insn, desc):
  48         (value, start, end) = desc
  49         bits = ((1,) * ((end + 1) - start))
  50         mask = 0
  51         for bit in bits:
  52             mask = ((mask << 1) | bit)
  53         return (insn | ((value & mask) << (31 - end)))
  54
  55     return functools.reduce(instruction, fields, 0)
  56
  57
  58 def setvl(fields, Rc):
  59     """
  60     setvl is a *32-bit-only* instruction. It controls SVSTATE.
  61     It is *not* a 64-bit-prefixed Vector instruction (no sv.setvl, yet),
  62     it is a Vector *control* instruction.
  63
  64     * setvl RT,RA,SVi,vf,vs,ms
  65
  66     1.6.28 SVL-FORM - from fields.txt
  67     |0     |6    |11    |16   |23 |24 |25 |26    |31 |
  68     | PO   |  RT |   RA | SVi |ms |vs |vf |   XO |Rc |
  69     """
  70     PO = 22
  71     XO = 0b11011
  72     # ARRRGH these are in a non-obvious order in openpower/isa/simplev.mdwn
  73     # compared to the SVL-Form above. sigh
  74     # setvl RT,RA,SVi,vf,vs,ms
  75     (RT, RA, SVi, vf, vs, ms) = fields
  76     SVi -= 1
  77     return instruction(
  78         (PO, 0, 5),
  79         (RT, 6, 10),
  80         (RA, 11, 15),
  81         (SVi, 16, 22),
  82         (ms, 23, 23),
  83         (vs, 24, 24),
  84         (vf, 25, 25),
  85         (XO, 26, 30),
  86         (Rc, 31, 31),
  87     )
  88
  89
  90 def svstep(fields, Rc):
  91     """
  92     svstep is a 32-bit instruction. It updates SVSTATE.
  93     It *can* be SVP64-prefixed, to indicate that its registers
  94     are Vectorised.
  95
  96     * svstep RT,SVi,vf
  97
  98     # 1.6.28 SVL-FORM - from fields.txt
  99     # |0     |6    |11    |16   |23 |24 |25 |26    |31 |
 100     # | PO   |  RT | /    | SVi |/  |/  |vf |   XO |Rc |
 101
 102     """
 103     PO = 22
 104     XO = 0b10011
 105     (RT, SVi, vf) = fields
 106     SVi -= 1
 107     return instruction(
 108         (PO, 0, 5),
 109         (RT, 6, 10),
 110         (0, 11, 15),
 111         (SVi, 16, 22),
 112         (0, 23, 23),
 113         (0, 24, 24),
 114         (vf, 25, 25),
 115         (XO, 26, 30),
 116         (Rc, 31, 31),
 117     )
 118
 119
 120 def svshape(fields):
 121     """
 122     svshape is a *32-bit-only* instruction. It updates SVSHAPE and SVSTATE.
 123     It is *not* a 64-bit-prefixed Vector instruction (no sv.svshape, yet),
 124     it is a Vector *control* instruction.
 125
 126     * svshape SVxd,SVyd,SVzd,SVrm,vf
 127
 128     # 1.6.33 SVM-FORM from fields.txt
 129     # |0     |6        |11      |16    |21    |25 |26    |31  |
 130     # | PO   |  SVxd   |   SVyd | SVzd | SVrm |vf |   XO      |
 131
 132     """
 133     PO = 22
 134     XO = 0b011001
 135     (SVxd, SVyd, SVzd, SVrm, vf) = fields
 136     SVxd -= 1
 137     SVyd -= 1
 138     SVzd -= 1
 139     return instruction(
 140         (PO, 0, 5),
 141         (SVxd, 6, 10),
 142         (SVyd, 11, 15),
 143         (SVzd, 16, 20),
 144         (SVrm, 21, 24),
 145         (vf, 25, 25),
 146         (XO, 26, 31),
 147     )
 148
 149
 150 def svindex(fields):
 151     """
 152     svindex is a *32-bit-only* instruction. It is a convenience
 153     instruction that reduces instruction count for Indexed REMAP
 154     Mode.
 155     It is *not* a 64-bit-prefixed Vector instruction (no sv.svindex, yet),
 156     it is a Vector *control* instruction.
 157
 158     1.6.28 SVI-FORM
 159       |0     |6    |11    |16   |21 |23|24|25|26    31|
 160       | PO   |  SVG|rmm   | SVd |ew |yx|mm|sk|   XO   |
 161     """
 162     # note that the dimension field one subtracted
 163     PO = 22
 164     XO = 0b101001
 165     (SVG, rmm, SVd, ew, yx, mm, sk) = fields
 166     SVd -= 1
 167     return instruction(
 168         (PO, 0, 5),
 169         (SVG, 6, 10),
 170         (rmm, 11, 15),
 171         (SVd, 16, 20),
 172         (ew, 21, 22),
 173         (yx, 23, 23),
 174         (mm, 24, 24),
 175         (sk, 25, 25),
 176         (XO, 26, 31),
 177     )
 178
 179
 180 def svremap(fields):
 181     """
 182     this is a *32-bit-only* instruction. It updates the SVSHAPE SPR
 183     it is *not* a 64-bit-prefixed Vector instruction (no sv.svremap),
 184     it is a Vector *control* instruction.
 185
 186     * svremap SVme,mi0,mi1,mi2,mo0,mo1,pst
 187
 188     # 1.6.34 SVRM-FORM
 189        |0     |6     |11  |13   |15   |17   |19   |21  |22   |26     |31 |
 190        | PO   | SVme |mi0 | mi1 | mi2 | mo0 | mo1 |pst |///  | XO        |
 191
 192     """
 193     PO = 22
 194     XO = 0b111001
 195     (SVme, mi0, mi1, mi2, mo0, mo1, pst) = fields
 196     return instruction(
 197         (PO, 0, 5),
 198         (SVme, 6, 10),
 199         (mi0, 11, 12),
 200         (mi1, 13, 14),
 201         (mi2, 15, 16),
 202         (mo0, 17, 18),
 203         (mo1, 19, 20),
 204         (pst, 21, 21),
 205         (0, 22, 25),
 206         (XO, 26, 31),
 207     )
 208
 209
 210 # ok from here-on down these are added as 32-bit instructions
 211 # and are here only because binutils (at present) doesn't have
 212 # them (that's being fixed!)
 213 # they can - if implementations then choose - be Vectorised
 214 # because they are general-purpose scalar instructions
 215 def bmask(fields):
 216     """
 217     1.6.2.2 BM2-FORM
 218     |0     |6    |11    |16    |21   |26 |27    31|
 219     | PO   |  RT |   RA |   RB |bm   |L  |   XO   |
 220     """
 221     PO = 22
 222     XO = 0b010001
 223     (RT, RA, RB, bm, L) = fields
 224     return instruction(
 225         (PO, 0, 5),
 226         (RT, 6, 10),
 227         (RA, 11, 15),
 228         (RB, 16, 20),
 229         (bm, 21, 25),
 230         (L, 26, 26),
 231         (XO, 27, 31),
 232     )
 233
 234
 235 def fsins(fields, Rc):
 236     # XXX WARNING THESE ARE NOT APPROVED BY OPF ISA WG
 237     # however we are out of space with opcode 22
 238     # 1.6.7 X-FORM
 239     # |0     |6 |7|8|9  |10  |11|12|13  |15|16|17     |20|21    |31  |
 240     # | PO   |   FRT         |     ///     |   FRB       |   XO |Rc  |
 241     PO = 59
 242     XO = 0b1000001110
 243     (FRT, FRB) = fields
 244     return instruction(
 245         (PO, 0, 5),
 246         (FRT, 6, 10),
 247         (0, 11, 15),
 248         (FRB, 16, 20),
 249         (XO, 21, 30),
 250         (Rc, 31, 31),
 251     )
 252
 253
 254 def fcoss(fields, Rc):
 255     # XXX WARNING THESE ARE NOT APPROVED BY OPF ISA WG
 256     # however we are out of space with opcode 22
 257     # 1.6.7 X-FORM
 258     # |0     |6 |7|8|9  |10  |11|12|13  |15|16|17     |20|21    |31  |
 259     # | PO   |   FRT         |     ///     |   FRB       |   XO |Rc  |
 260     PO = 59
 261     XO = 0b1000101110
 262     (FRT, FRB) = fields
 263     return instruction(
 264         (PO, 0, 5),
 265         (FRT, 6, 10),
 266         (0, 11, 15),
 267         (FRB, 16, 20),
 268         (XO, 21, 30),
 269         (Rc, 31, 31),
 270     )
 271
 272
 273 def ternlogi(fields, Rc):
 274     # XXX WARNING THESE ARE NOT APPROVED BY OPF ISA WG
 275     # however we are out of space with opcode 22
 276     # 1.6.34 TLI-FORM
 277     #   |0   |6   |11   |16   |21   |29  |31 |
 278     #   | PO | RT |  RA |  RB | TLI | XO |Rc |
 279     PO = 5
 280     XO = 0
 281     (RT, RA, RB, TLI) = fields
 282     return instruction(
 283         (PO, 0, 5),
 284         (RT, 6, 10),
 285         (RA, 11, 15),
 286         (RB, 16, 20),
 287         (TLI, 21, 28),
 288         (XO, 29, 30),
 289         (Rc, 31, 31),
 290     )
 291
 292
 293 def grev(fields, Rc, imm, wide):
 294     # XXX WARNING THESE ARE NOT APPROVED BY OPF ISA WG
 295     # however we are out of space with opcode 22
 296     insn = PO = 5
 297     # _ matches fields in table at:
 298     # https://libre-soc.org/openPOwer/sv/bitmanip/
 299     XO = 0b1_0010_110
 300     if wide:
 301         XO |= 0b100_000
 302     if imm:
 303         XO |= 0b1000_000
 304     (RT, RA, XBI) = fields
 305     insn = (insn << 5) | RT
 306     insn = (insn << 5) | RA
 307     if imm and not wide:
 308         assert 0 <= XBI < 64
 309         insn = (insn << 6) | XBI
 310         insn = (insn << 9) | XO
 311     else:
 312         assert 0 <= XBI < 32
 313         insn = (insn << 5) | XBI
 314         insn = (insn << 10) | XO
 315     insn = (insn << 1) | Rc
 316     return insn
 317
 318
 319 def av(fields, XO, Rc):
 320     # 1.6.7 X-FORM
 321     #   |0     |6 |7|8|9  |10  |11|12|13  |15|16|17     |20|21    |31  |
 322     #   | PO   |       RT      |    RA       |    RB       |   XO |Rc  |
 323     PO = 22
 324     (RT, RA, RB) = fields
 325     return instruction(
 326         (PO, 0, 5),
 327         (RT, 6, 10),
 328         (RA, 11, 15),
 329         (RB, 16, 20),
 330         (XO, 21, 30),
 331         (Rc, 31, 31),
 332     )
 333
 334
 335 def fmvis(fields):
 336     # XXX WARNING THESE ARE NOT APPROVED BY OPF ISA WG
 337     # V3.0B 1.6.6 DX-FORM
 338     # |0     |6 |7|8|9  |10  |11|12|13  |15|16|17     |26|27    |31  |
 339     # | PO   |   FRS         |     d1      |      d0  |      XO |d2  |
 340     PO = 22
 341     XO = 0b00011
 342     (FRS, imm) = fields
 343     # first split imm into d1, d0 and d2. sigh
 344     d2 = (imm & 1)  # LSB (0)
 345     d1 = (imm >> 1) & 0b11111  # bits 1-5
 346     d0 = (imm >> 6)  # MSBs 6-15
 347     return instruction(
 348         (PO, 0, 5),
 349         (FRS, 6, 10),
 350         (d1,  11, 15),
 351         (d0,  16, 25),
 352         (XO, 26, 30),
 353         (d2, 31, 31),
 354     )
 355
 356
 357 def fishmv(fields):
 358     # XXX WARNING THESE ARE NOT APPROVED BY OPF ISA WG
 359     # V3.0B 1.6.6 DX-FORM
 360     # |0     |6 |7|8|9  |10  |11|12|13  |15|16|17     |26|27   |31  |
 361     # | PO   |   FRS         |     d1      |      d0  |     XO |d2  |
 362     PO = 22
 363     XO = 0b01011
 364     (FRS, imm) = fields
 365     # first split imm into d1, d0 and d2. sigh
 366     d2 = (imm & 1)  # LSB (0)
 367     d1 = (imm >> 1) & 0b11111  # bits 1-5
 368     d0 = (imm >> 6)  # MSBs 6-15
 369     return instruction(
 370         (PO, 0, 5),
 371         (FRS, 6, 10),
 372         (d1,  11, 15),
 373         (d0,  16, 25),
 374         (XO, 26, 30),
 375         (d2, 31, 31),
 376     )
 377
 378
 379 CUSTOM_INSNS = {}
 380 for (name, hook) in (
 381     ("setvl", setvl),
 382     ("svstep", svstep),
 383     ("fsins", fsins),
 384     ("fcoss", fcoss),
 385     ("ternlogi", ternlogi),
 386 ):
 387     CUSTOM_INSNS[name] = functools.partial(hook, Rc=False)
 388     CUSTOM_INSNS[f"{name}."] = functools.partial(hook, Rc=True)
 389 CUSTOM_INSNS["bmask"] = bmask
 390 CUSTOM_INSNS["svshape"] = svshape
 391 CUSTOM_INSNS["svindex"] = svindex
 392 CUSTOM_INSNS["svremap"] = svremap
 393 CUSTOM_INSNS["fmvis"] = fmvis
 394 CUSTOM_INSNS["fishmv"] = fishmv
 395
 396 for (name, imm, wide) in (
 397     ("grev", False, False),
 398     ("grevi", True, False),
 399     ("grevw", False, True),
 400     ("grevwi", True, True),
 401 ):
 402     CUSTOM_INSNS[name] = functools.partial(grev,
 403                                            imm=("i" in name), wide=("w" in name), Rc=False)
 404     CUSTOM_INSNS[f"{name}."] = functools.partial(grev,
 405                                                  imm=("i" in name), wide=("w" in name), Rc=True)
 406
 407 for (name, XO) in (
 408     ("maxs", 0b0111001110),
 409     ("maxu", 0b0011001110),
 410     ("minu", 0b0001001110),
 411     ("mins", 0b0101001110),
 412     ("absdu", 0b1011110110),
 413     ("absds", 0b1001110110),
 414     ("avgadd", 0b1101001110),
 415     ("absdacu", 0b1111110110),
 416     ("absdacs", 0b0111110110),
 417     ("cprop", 0b0110001110),
 418 ):
 419     CUSTOM_INSNS[name] = functools.partial(av, XO=XO, Rc=False)
 420     CUSTOM_INSNS[f"{name}."] = functools.partial(av, XO=XO, Rc=True)
 421
 422
 423 # decode GPR into sv extra
 424 def get_extra_gpr(etype, regmode, field):
 425     if regmode == 'scalar':
 426         # cut into 2-bits 5-bits SS FFFFF
 427         sv_extra = field >> 5
 428         field = field & 0b11111
 429     else:
 430         # cut into 5-bits 2-bits FFFFF SS
 431         sv_extra = field & 0b11
 432         field = field >> 2
 433     return sv_extra, field
 434
 435
 436 # decode 3-bit CR into sv extra
 437 def get_extra_cr_3bit(etype, regmode, field):
 438     if regmode == 'scalar':
 439         # cut into 2-bits 3-bits SS FFF
 440         sv_extra = field >> 3
 441         field = field & 0b111
 442     else:
 443         # cut into 3-bits 4-bits FFF SSSS but will cut 2 zeros off later
 444         sv_extra = field & 0b1111
 445         field = field >> 4
 446     return sv_extra, field
 447
 448
 449 # decodes SUBVL
 450 def decode_subvl(encoding):
 451     pmap = {'2': 0b01, '3': 0b10, '4': 0b11}
 452     assert encoding in pmap, \
 453         "encoding %s for SUBVL not recognised" % encoding
 454     return pmap[encoding]
 455
 456
 457 # decodes elwidth
 458 def decode_elwidth(encoding):
 459     pmap = {'8': 0b11, '16': 0b10, '32': 0b01}
 460     assert encoding in pmap, \
 461         "encoding %s for elwidth not recognised" % encoding
 462     return pmap[encoding]
 463
 464
 465 # decodes predicate register encoding
 466 def decode_predicate(encoding):
 467     pmap = {  # integer
 468         '1<<r3': (0, 0b001),
 469         'r3': (0, 0b010),
 470         '~r3': (0, 0b011),
 471         'r10': (0, 0b100),
 472         '~r10': (0, 0b101),
 473         'r30': (0, 0b110),
 474         '~r30': (0, 0b111),
 475         # CR
 476         'lt': (1, 0b000),
 477         'nl': (1, 0b001), 'ge': (1, 0b001),  # same value
 478         'gt': (1, 0b010),
 479         'ng': (1, 0b011), 'le': (1, 0b011),  # same value
 480         'eq': (1, 0b100),
 481         'ne': (1, 0b101),
 482         'so': (1, 0b110), 'un': (1, 0b110),  # same value
 483         'ns': (1, 0b111), 'nu': (1, 0b111),  # same value
 484     }
 485     assert encoding in pmap, \
 486         "encoding %s for predicate not recognised" % encoding
 487     return pmap[encoding]
 488
 489
 490 # decodes "Mode" in similar way to BO field (supposed to, anyway)
 491 def decode_bo(encoding):
 492     pmap = {  # TODO: double-check that these are the same as Branch BO
 493         'lt': 0b000,
 494         'nl': 0b001, 'ge': 0b001,  # same value
 495         'gt': 0b010,
 496         'ng': 0b011, 'le': 0b011,  # same value
 497         'eq': 0b100,
 498         'ne': 0b101,
 499         'so': 0b110, 'un': 0b110,  # same value
 500         'ns': 0b111, 'nu': 0b111,  # same value
 501     }
 502     assert encoding in pmap, \
 503         "encoding %s for BO Mode not recognised" % encoding
 504     return pmap[encoding]
 505
 506 # partial-decode fail-first mode
 507
 508
 509 def decode_ffirst(encoding):
 510     if encoding in ['RC1', '~RC1']:
 511         return encoding
 512     return decode_bo(encoding)
 513
 514
 515 def decode_reg(field, macros=None):
 516     if macros is None:
 517         macros = {}
 518     # decode the field number. "5.v" or "3.s" or "9"
 519     # and now also "*0", and "*%0".  note: *NOT* to add "*%rNNN" etc.
 520     # https://bugs.libre-soc.org/show_bug.cgi?id=884#c0
 521     if field.startswith(("*%", "*")):
 522         if field.startswith("*%"):
 523             field = field[2:]
 524         else:
 525             field = field[1:]
 526         while field in macros:
 527             field = macros[field]
 528         return int(field), "vector"  # actual register number
 529
 530     # try old convention (to be retired)
 531     field = field.split(".")
 532     regmode = 'scalar'  # default
 533     if len(field) == 2:
 534         if field[1] == 's':
 535             regmode = 'scalar'
 536         elif field[1] == 'v':
 537             regmode = 'vector'
 538     field = int(field[0])  # actual register number
 539     return field, regmode
 540
 541
 542 def decode_imm(field):
 543     ldst_imm = "(" in field and field[-1] == ')'
 544     if ldst_imm:
 545         return field[:-1].split("(")
 546     else:
 547         return None, field
 548
 549
 550 def crf_extra(etype, regmode, field, extras):
 551     """takes a CR Field number (CR0-CR127), splits into EXTRA2/3 and v3.0
 552     the scalar/vector mode (crNN.v or crNN.s) changes both the format
 553     of the EXTRA2/3 encoding as well as what range of registers is possible.
 554     this function can be used for both BF/BFA and BA/BB/BT by first removing
 555     the bottom 2 bits of BA/BB/BT then re-instating them after encoding.
 556     see https://libre-soc.org/openpower/sv/svp64/appendix/#cr_extra
 557     for specification
 558     """
 559     sv_extra, field = get_extra_cr_3bit(etype, regmode, field)
 560     # now sanity-check (and shrink afterwards)
 561     if etype == 'EXTRA2':
 562         # 3-bit CR Field (BF, BFA) EXTRA2 encoding
 563         if regmode == 'scalar':
 564             # range is CR0-CR15 in increments of 1
 565             assert (sv_extra >> 1) == 0, \
 566                 "scalar CR %s cannot fit into EXTRA2 %s" % \
 567                 (rname, str(extras[extra_idx]))
 568             # all good: encode as scalar
 569             sv_extra = sv_extra & 0b01
 570         else:  # vector
 571             # range is CR0-CR127 in increments of 16
 572             assert sv_extra & 0b111 == 0, \
 573                 "vector CR %s cannot fit into EXTRA2 %s" % \
 574                 (rname, str(extras[extra_idx]))
 575             # all good: encode as vector (bit 2 set)
 576             sv_extra = 0b10 | (sv_extra >> 3)
 577     else:
 578         # 3-bit CR Field (BF, BFA) EXTRA3 encoding
 579         if regmode == 'scalar':
 580             # range is CR0-CR31 in increments of 1
 581             assert (sv_extra >> 2) == 0, \
 582                 "scalar CR %s cannot fit into EXTRA3 %s" % \
 583                 (rname, str(extras[extra_idx]))
 584             # all good: encode as scalar
 585             sv_extra = sv_extra & 0b11
 586         else:  # vector
 587             # range is CR0-CR127 in increments of 8
 588             assert sv_extra & 0b11 == 0, \
 589                 "vector CR %s cannot fit into EXTRA3 %s" % \
 590                 (rname, str(extras[extra_idx]))
 591             # all good: encode as vector (bit 3 set)
 592             sv_extra = 0b100 | (sv_extra >> 2)
 593     return sv_extra, field
 594
 595
 596 def to_number(field):
 597     if field.startswith("0x"):
 598         return eval(field)
 599     if field.startswith("0b"):
 600         return eval(field)
 601     return int(field)
 602
 603
 604 # decodes svp64 assembly listings and creates EXT001 svp64 prefixes
 605 class SVP64Asm:
 606     def __init__(self, lst, bigendian=False, macros=None):
 607         if macros is None:
 608             macros = {}
 609         self.macros = macros
 610         self.lst = lst
 611         self.trans = self.translate(lst)
 612         self.isa = ISA()  # reads the v3.0B pseudo-code markdown files
 613         self.svp64 = SVP64RM()  # reads the svp64 Remap entries for registers
 614         assert bigendian == False, "error, bigendian not supported yet"
 615
 616     def __iter__(self):
 617         yield from self.trans
 618
 619     def translate_one(self, insn, macros=None):
 620         if macros is None:
 621             macros = {}
 622         macros.update(self.macros)
 623         isa = self.isa
 624         svp64 = self.svp64
 625         insn_no_comments = insn.partition('#')[0]
 626         # find first space, to get opcode
 627         ls = insn_no_comments.split(' ')
 628         opcode = ls[0]
 629         # now find opcode fields
 630         fields = ''.join(ls[1:]).split(',')
 631         mfields = list(map(str.strip, fields))
 632         log("opcode, fields", ls, opcode, mfields)
 633         fields = []
 634         # macro substitution
 635         for field in mfields:
 636             fields.append(macro_subst(macros, field))
 637         log("opcode, fields substed", ls, opcode, fields)
 638
 639         # identify if it is a special instruction
 640         custom_insn_hook = CUSTOM_INSNS.get(opcode)
 641         if custom_insn_hook is not None:
 642             fields = tuple(map(to_number, fields))
 643             insn_num = custom_insn_hook(fields)
 644             log(opcode, bin(insn_num))
 645             yield ".long 0x%X # %s" % (insn_num, insn)
 646             return
 647
 648         # identify if is a svp64 mnemonic
 649         if not opcode.startswith('sv.'):
 650             yield insn  # unaltered
 651             return
 652         opcode = opcode[3:]  # strip leading "sv"
 653
 654         # start working on decoding the svp64 op: sv.basev30Bop/vec2/mode
 655         opmodes = opcode.split("/")  # split at "/"
 656         v30b_op_orig = opmodes.pop(0)    # first is the v3.0B
 657         # check instruction ends with dot
 658         rc_mode = v30b_op_orig.endswith('.')
 659         if rc_mode:
 660             v30b_op = v30b_op_orig[:-1]
 661         else:
 662             v30b_op = v30b_op_orig
 663
 664         # look up the 32-bit op (original, with "." if it has it)
 665         if v30b_op_orig in isa.instr:
 666             isa_instr = isa.instr[v30b_op_orig]
 667         else:
 668             raise Exception("opcode %s of '%s' not supported" %
 669                             (v30b_op_orig, insn))
 670
 671         # look up the svp64 op, first the original (with "." if it has it)
 672         if v30b_op_orig in svp64.instrs:
 673             rm = svp64.instrs[v30b_op_orig]  # one row of the svp64 RM CSV
 674         # then without the "." (if there was one)
 675         elif v30b_op in svp64.instrs:
 676             rm = svp64.instrs[v30b_op]  # one row of the svp64 RM CSV
 677         else:
 678             raise Exception(f"opcode {v30b_op_orig!r} of "
 679                                 f"{insn!r} not an svp64 instruction")
 680
 681         # get regs info e.g. "RT,RA,RB"
 682         v30b_regs = isa_instr.regs[0]
 683         log("v3.0B op", v30b_op, "Rc=1" if rc_mode else '')
 684         log("v3.0B regs", opcode, v30b_regs)
 685         log("RM", rm)
 686
 687         # right.  the first thing to do is identify the ordering of
 688         # the registers, by name.  the EXTRA2/3 ordering is in
 689         # rm['0']..rm['3'] but those fields contain the names RA, BB
 690         # etc.  we have to read the pseudocode to understand which
 691         # reg is which in our instruction. sigh.
 692
 693         # first turn the svp64 rm into a "by name" dict, recording
 694         # which position in the RM EXTRA it goes into
 695         # also: record if the src or dest was a CR, for sanity-checking
 696         # (elwidth overrides on CRs are banned)
 697         decode = decode_extra(rm)
 698         dest_reg_cr, src_reg_cr, svp64_src, svp64_dest = decode
 699
 700         log("EXTRA field index, src", svp64_src)
 701         log("EXTRA field index, dest", svp64_dest)
 702
 703         # okaaay now we identify the field value (opcode N,N,N) with
 704         # the pseudo-code info (opcode RT, RA, RB)
 705         assert len(fields) == len(v30b_regs), \
 706             "length of fields %s must match insn `%s` fields %s" % \
 707             (str(v30b_regs), insn, str(fields))
 708         opregfields = zip(fields, v30b_regs)  # err that was easy
 709
 710         # now for each of those find its place in the EXTRA encoding
 711         # note there is the possibility (for LD/ST-with-update) of
 712         # RA occurring **TWICE**.  to avoid it getting added to the
 713         # v3.0B suffix twice, we spot it as a duplicate, here
 714         extras = OrderedDict()
 715         for idx, (field, regname) in enumerate(opregfields):
 716             imm, regname = decode_imm(regname)
 717             rtype = get_regtype(regname)
 718             log("    idx find", rtype, idx, field, regname, imm)
 719             if rtype is None:
 720                 # probably an immediate field, append it straight
 721                 extras[('imm', idx, False)] = (idx, field, None, None, None)
 722                 continue
 723             extra = svp64_src.get(regname, None)
 724             if extra is not None:
 725                 extra = ('s', extra, False)  # not a duplicate
 726                 extras[extra] = (idx, field, regname, rtype, imm)
 727                 log("    idx src", idx, extra, extras[extra])
 728             dextra = svp64_dest.get(regname, None)
 729             log("regname in", regname, dextra)
 730             if dextra is not None:
 731                 is_a_duplicate = extra is not None  # duplicate spotted
 732                 dextra = ('d', dextra, is_a_duplicate)
 733                 extras[dextra] = (idx, field, regname, rtype, imm)
 734                 log("    idx dst", idx, extra, extras[dextra])
 735
 736         # great! got the extra fields in their associated positions:
 737         # also we know the register type. now to create the EXTRA encodings
 738         etype = rm['Etype']  # Extra type: EXTRA3/EXTRA2
 739         ptype = rm['Ptype']  # Predication type: Twin / Single
 740         extra_bits = 0
 741         v30b_newfields = []
 742         for extra_idx, (idx, field, rname, rtype, iname) in extras.items():
 743             # is it a field we don't alter/examine?  if so just put it
 744             # into newfields
 745             if rtype is None:
 746                 v30b_newfields.append(field)
 747                 continue
 748
 749             # identify if this is a ld/st immediate(reg) thing
 750             ldst_imm = "(" in field and field[-1] == ')'
 751             if ldst_imm:
 752                 immed, field = field[:-1].split("(")
 753
 754             field, regmode = decode_reg(field, macros=macros)
 755             log("    ", extra_idx, rname, rtype,
 756                 regmode, iname, field, end=" ")
 757
 758             # see Mode field https://libre-soc.org/openpower/sv/svp64/
 759             # XXX TODO: the following is a bit of a laborious repeated
 760             # mess, which could (and should) easily be parameterised.
 761             # XXX also TODO: the LD/ST modes which are different
 762             # https://libre-soc.org/openpower/sv/ldst/
 763
 764             # rright.  SVP64 register numbering is from 0 to 127
 765             # for GPRs, FPRs *and* CR Fields, where for v3.0 the GPRs and RPFs
 766             # are 0-31 and CR Fields are only 0-7.  the SVP64 RM "Extra"
 767             # area is used to extend the numbering from the 32-bit
 768             # instruction, and also to record whether the register
 769             # is scalar or vector. on a per-operand basis.  this
 770             # results in a slightly finnicky encoding: here we go...
 771
 772             # encode SV-GPR and SV-FPR field into extra, v3.0field
 773             if rtype in ['GPR', 'FPR']:
 774                 sv_extra, field = get_extra_gpr(etype, regmode, field)
 775                 # now sanity-check. EXTRA3 is ok, EXTRA2 has limits
 776                 # (and shrink to a single bit if ok)
 777                 if etype == 'EXTRA2':
 778                     if regmode == 'scalar':
 779                         # range is r0-r63 in increments of 1
 780                         assert (sv_extra >> 1) == 0, \
 781                             "scalar GPR %s cannot fit into EXTRA2 %s" % \
 782                             (rname, str(extras[extra_idx]))
 783                         # all good: encode as scalar
 784                         sv_extra = sv_extra & 0b01
 785                     else:
 786                         # range is r0-r127 in increments of 2 (r0 r2 ... r126)
 787                         assert sv_extra & 0b01 == 0, \
 788                             "%s: vector field %s cannot fit " \
 789                             "into EXTRA2 %s" % \
 790                             (insn, rname, str(extras[extra_idx]))
 791                         # all good: encode as vector (bit 2 set)
 792                         sv_extra = 0b10 | (sv_extra >> 1)
 793                 elif regmode == 'vector':
 794                     # EXTRA3 vector bit needs marking
 795                     sv_extra |= 0b100
 796
 797             # encode SV-CR 3-bit field into extra, v3.0field.
 798             # 3-bit is for things like BF and BFA
 799             elif rtype == 'CR_3bit':
 800                 sv_extra, field = crf_extra(etype, regmode, field, extras)
 801
 802             # encode SV-CR 5-bit field into extra, v3.0field
 803             # 5-bit is for things like BA BB BC BT etc.
 804             # *sigh* this is the same as 3-bit except the 2 LSBs of the
 805             # 5-bit field are passed through unaltered.
 806             elif rtype == 'CR_5bit':
 807                 cr_subfield = field & 0b11  # record bottom 2 bits for later
 808                 field = field >> 2         # strip bottom 2 bits
 809                 # use the exact same 3-bit function for the top 3 bits
 810                 sv_extra, field = crf_extra(etype, regmode, field, extras)
 811                 # reconstruct the actual 5-bit CR field (preserving the
 812                 # bottom 2 bits, unaltered)
 813                 field = (field << 2) | cr_subfield
 814
 815             else:
 816                 raise Exception("no type match: %s" % rtype)
 817
 818             # capture the extra field info
 819             log("=>", "%5s" % bin(sv_extra), field)
 820             extras[extra_idx] = sv_extra
 821
 822             # append altered field value to v3.0b, differs for LDST
 823             # note that duplicates are skipped e.g. EXTRA2 contains
 824             # *BOTH* s:RA *AND* d:RA which happens on LD/ST-with-update
 825             srcdest, idx, duplicate = extra_idx
 826             if duplicate:  # skip adding to v3.0b fields, already added
 827                 continue
 828             if ldst_imm:
 829                 v30b_newfields.append(("%s(%s)" % (immed, str(field))))
 830             else:
 831                 v30b_newfields.append(str(field))
 832
 833         log("new v3.0B fields", v30b_op, v30b_newfields)
 834         log("extras", extras)
 835
 836         # rright.  now we have all the info. start creating SVP64 RM
 837         svp64_rm = SVP64RMFields()
 838
 839         # begin with EXTRA fields
 840         for idx, sv_extra in extras.items():
 841             log(idx)
 842             if idx is None:
 843                 continue
 844             if idx[0] == 'imm':
 845                 continue
 846             srcdest, idx, duplicate = idx
 847             if etype == 'EXTRA2':
 848                 svp64_rm.extra2[idx].eq(
 849                     SelectableInt(sv_extra, SVP64RM_EXTRA2_SPEC_SIZE))
 850             else:
 851                 svp64_rm.extra3[idx].eq(
 852                     SelectableInt(sv_extra, SVP64RM_EXTRA3_SPEC_SIZE))
 853
 854         # identify if the op is a LD/ST. the "blegh" way. copied
 855         # from power_enums.  TODO, split the list _insns down.
 856         is_ld = v30b_op in [
 857             "lbarx", "lbz", "lbzu", "lbzux", "lbzx",            # load byte
 858             "ld", "ldarx", "ldbrx", "ldu", "ldux", "ldx",       # load double
 859             "lfs", "lfsx", "lfsu", "lfsux",                     # FP load single
 860             "lfd", "lfdx", "lfdu", "lfdux", "lfiwzx", "lfiwax",  # FP load dbl
 861             "lha", "lharx", "lhau", "lhaux", "lhax",            # load half
 862             "lhbrx", "lhz", "lhzu", "lhzux", "lhzx",            # more load half
 863             "lwa", "lwarx", "lwaux", "lwax", "lwbrx",           # load word
 864             "lwz", "lwzcix", "lwzu", "lwzux", "lwzx",           # more load word
 865         ]
 866         is_st = v30b_op in [
 867             "stb", "stbcix", "stbcx", "stbu", "stbux", "stbx",
 868             "std", "stdbrx", "stdcx", "stdu", "stdux", "stdx",
 869             "stfs", "stfsx", "stfsu", "stfux",                  # FP store sgl
 870             "stfd", "stfdx", "stfdu", "stfdux", "stfiwx",       # FP store dbl
 871             "sth", "sthbrx", "sthcx", "sthu", "sthux", "sthx",
 872             "stw", "stwbrx", "stwcx", "stwu", "stwux", "stwx",
 873         ]
 874         # use this to determine if the SVP64 RM format is different.
 875         # see https://libre-soc.org/openpower/sv/ldst/
 876         is_ldst = is_ld or is_st
 877
 878         # branch-conditional detection
 879         is_bc = v30b_op in [
 880             "bc", "bclr",
 881         ]
 882
 883         # parts of svp64_rm
 884         mmode = 0  # bit 0
 885         pmask = 0  # bits 1-3
 886         destwid = 0  # bits 4-5
 887         srcwid = 0  # bits 6-7
 888         subvl = 0   # bits 8-9
 889         smask = 0  # bits 16-18 but only for twin-predication
 890         mode = 0  # bits 19-23
 891
 892         mask_m_specified = False
 893         has_pmask = False
 894         has_smask = False
 895
 896         saturation = None
 897         src_zero = 0
 898         dst_zero = 0
 899         sv_mode = None
 900
 901         mapreduce = False
 902         reverse_gear = False
 903         mapreduce_crm = False
 904         mapreduce_svm = False
 905
 906         predresult = False
 907         failfirst = False
 908         ldst_elstride = 0
 909
 910         # branch-conditional bits
 911         bc_all = 0
 912         bc_lru = 0
 913         bc_brc = 0
 914         bc_svstep = 0
 915         bc_vsb = 0
 916         bc_vlset = 0
 917         bc_vli = 0
 918         bc_snz = 0
 919
 920         # ok let's start identifying opcode augmentation fields
 921         for encmode in opmodes:
 922             # predicate mask (src and dest)
 923             if encmode.startswith("m="):
 924                 pme = encmode
 925                 pmmode, pmask = decode_predicate(encmode[2:])
 926                 smmode, smask = pmmode, pmask
 927                 mmode = pmmode
 928                 mask_m_specified = True
 929             # predicate mask (dest)
 930             elif encmode.startswith("dm="):
 931                 pme = encmode
 932                 pmmode, pmask = decode_predicate(encmode[3:])
 933                 mmode = pmmode
 934                 has_pmask = True
 935             # predicate mask (src, twin-pred)
 936             elif encmode.startswith("sm="):
 937                 sme = encmode
 938                 smmode, smask = decode_predicate(encmode[3:])
 939                 mmode = smmode
 940                 has_smask = True
 941             # vec2/3/4
 942             elif encmode.startswith("vec"):
 943                 subvl = decode_subvl(encmode[3:])
 944             # elwidth
 945             elif encmode.startswith("ew="):
 946                 destwid = decode_elwidth(encmode[3:])
 947             elif encmode.startswith("sw="):
 948                 srcwid = decode_elwidth(encmode[3:])
 949             # element-strided LD/ST
 950             elif encmode == 'els':
 951                 ldst_elstride = 1
 952             # saturation
 953             elif encmode == 'sats':
 954                 assert sv_mode is None
 955                 saturation = 1
 956                 sv_mode = 0b10
 957             elif encmode == 'satu':
 958                 assert sv_mode is None
 959                 sv_mode = 0b10
 960                 saturation = 0
 961             # predicate zeroing
 962             elif encmode == 'sz':
 963                 src_zero = 1
 964             elif encmode == 'dz':
 965                 dst_zero = 1
 966             # failfirst
 967             elif encmode.startswith("ff="):
 968                 assert sv_mode is None
 969                 sv_mode = 0b01
 970                 failfirst = decode_ffirst(encmode[3:])
 971             # predicate-result, interestingly same as fail-first
 972             elif encmode.startswith("pr="):
 973                 assert sv_mode is None
 974                 sv_mode = 0b11
 975                 predresult = decode_ffirst(encmode[3:])
 976             # map-reduce mode, reverse-gear
 977             elif encmode == 'mrr':
 978                 assert sv_mode is None
 979                 sv_mode = 0b00
 980                 mapreduce = True
 981                 reverse_gear = True
 982             # map-reduce mode
 983             elif encmode == 'mr':
 984                 assert sv_mode is None
 985                 sv_mode = 0b00
 986                 mapreduce = True
 987             elif encmode == 'crm':  # CR on map-reduce
 988                 assert sv_mode is None
 989                 sv_mode = 0b00
 990                 mapreduce_crm = True
 991             elif encmode == 'svm':  # sub-vector mode
 992                 mapreduce_svm = True
 993             elif is_bc:
 994                 if encmode == 'all':
 995                     bc_all = 1
 996                 elif encmode == 'st':  # svstep mode
 997                     bc_step = 1
 998                 elif encmode == 'sr':  # svstep BRc mode
 999                     bc_step = 1
1000                     bc_brc = 1
1001                 elif encmode == 'vs':  # VLSET mode
1002                     bc_vlset = 1
1003                 elif encmode == 'vsi':  # VLSET mode with VLI (VL inclusives)
1004                     bc_vlset = 1
1005                     bc_vli = 1
1006                 elif encmode == 'vsb':  # VLSET mode with VSb
1007                     bc_vlset = 1
1008                     bc_vsb = 1
1009                 elif encmode == 'vsbi':  # VLSET mode with VLI and VSb
1010                     bc_vlset = 1
1011                     bc_vli = 1
1012                     bc_vsb = 1
1013                 elif encmode == 'snz':  # sz (only) already set above
1014                     src_zero = 1
1015                     bc_snz = 1
1016                 elif encmode == 'lu':  # LR update mode
1017                     bc_lru = 1
1018                 else:
1019                     raise AssertionError("unknown encmode %s" % encmode)
1020             else:
1021                 raise AssertionError("unknown encmode %s" % encmode)
1022
1023         if ptype == '2P':
1024             # since m=xx takes precedence (overrides) sm=xx and dm=xx,
1025             # treat them as mutually exclusive
1026             if mask_m_specified:
1027                 assert not has_smask,\
1028                     "cannot have both source-mask and predicate mask"
1029                 assert not has_pmask,\
1030                     "cannot have both dest-mask and predicate mask"
1031             # since the default is INT predication (ALWAYS), if you
1032             # specify one CR mask, you must specify both, to avoid
1033             # mixing INT and CR reg types
1034             if has_pmask and pmmode == 1:
1035                 assert has_smask, \
1036                     "need explicit source-mask in CR twin predication"
1037             if has_smask and smmode == 1:
1038                 assert has_pmask, \
1039                     "need explicit dest-mask in CR twin predication"
1040             # sanity-check that 2Pred mask is same mode
1041             if has_pmask and has_smask:
1042                 assert smmode == pmmode, \
1043                     "predicate masks %s and %s must be same reg type" % \
1044                     (pme, sme)
1045
1046         # sanity-check that twin-predication mask only specified in 2P mode
1047         if ptype == '1P':
1048             assert not has_smask, \
1049                 "source-mask can only be specified on Twin-predicate ops"
1050             assert not has_pmask, \
1051                 "dest-mask can only be specified on Twin-predicate ops"
1052
1053         # construct the mode field, doing sanity-checking along the way
1054         if mapreduce_svm:
1055             assert sv_mode == 0b00, "sub-vector mode in mapreduce only"
1056             assert subvl != 0, "sub-vector mode not possible on SUBVL=1"
1057
1058         if src_zero:
1059             assert has_smask or mask_m_specified, \
1060                 "src zeroing requires a source predicate"
1061         if dst_zero:
1062             assert has_pmask or mask_m_specified, \
1063                 "dest zeroing requires a dest predicate"
1064
1065         # okaaay, so there are 4 different modes, here, which will be
1066         # partly-merged-in: is_ldst is merged in with "normal", but
1067         # is_bc is so different it's done separately.  likewise is_cr
1068         # (when it is done).  here are the maps:
1069
1070         # for "normal" arithmetic: https://libre-soc.org/openpower/sv/normal/
1071         """
1072             | 0-1 |  2  |  3   4  |  description              |
1073             | --- | --- |---------|-------------------------- |
1074             | 00  |   0 |  dz  sz | normal mode                      |
1075             | 00  |   1 | 0  RG   | scalar reduce mode (mapreduce), SUBVL=1 |
1076             | 00  |   1 | 1  /    | parallel reduce mode (mapreduce), SUBVL=1 |
1077             | 00  |   1 | SVM RG  | subvector reduce mode, SUBVL>1   |
1078             | 01  | inv | CR-bit  | Rc=1: ffirst CR sel              |
1079             | 01  | inv | VLi RC1 |  Rc=0: ffirst z/nonz |
1080             | 10  |   N | dz   sz |  sat mode: N=0/1 u/s |
1081             | 11  | inv | CR-bit  |  Rc=1: pred-result CR sel |
1082             | 11  | inv | dz  RC1 |  Rc=0: pred-result z/nonz |
1083         """
1084
1085         # https://libre-soc.org/openpower/sv/ldst/
1086         # for LD/ST-immediate:
1087         """
1088             | 0-1 |  2  |  3   4  |  description               |
1089             | --- | --- |---------|--------------------------- |
1090             | 00  | 0   |  dz els | normal mode                |
1091             | 00  | 1   |  dz shf | shift mode                 |
1092             | 01  | inv | CR-bit  | Rc=1: ffirst CR sel        |
1093             | 01  | inv | els RC1 |  Rc=0: ffirst z/nonz       |
1094             | 10  |   N | dz  els |  sat mode: N=0/1 u/s       |
1095             | 11  | inv | CR-bit  |  Rc=1: pred-result CR sel  |
1096             | 11  | inv | els RC1 |  Rc=0: pred-result z/nonz  |
1097         """
1098
1099         # for LD/ST-indexed (RA+RB):
1100         """
1101             | 0-1 |  2  |  3   4  |  description              |
1102             | --- | --- |---------|-------------------------- |
1103             | 00  | SEA |  dz  sz | normal mode        |
1104             | 01  | SEA | dz sz  | Strided (scalar only source)   |
1105             | 10  |   N | dz   sz |  sat mode: N=0/1 u/s |
1106             | 11  | inv | CR-bit  |  Rc=1: pred-result CR sel |
1107             | 11  | inv | dz  RC1 |  Rc=0: pred-result z/nonz |
1108         """
1109
1110         # and leaving out branches and cr_ops for now because they're
1111         # under development
1112         """ TODO branches and cr_ops
1113         """
1114
1115         # now create mode and (overridden) src/dst widths
1116         # XXX TODO: sanity-check bc modes
1117         if is_bc:
1118             sv_mode = ((bc_svstep << SVP64MODE.MOD2_MSB) |
1119                        (bc_vlset << SVP64MODE.MOD2_LSB) |
1120                        (bc_snz << SVP64MODE.BC_SNZ))
1121             srcwid = (bc_vsb << 1) | bc_lru
1122             destwid = (bc_lru << 1) | bc_all
1123
1124         else:
1125
1126             ######################################
1127             # "normal" mode
1128             if sv_mode is None:
1129                 mode |= src_zero << SVP64MODE.SZ  # predicate zeroing
1130                 mode |= dst_zero << SVP64MODE.DZ  # predicate zeroing
1131                 if is_ldst:
1132                     # TODO: for now, LD/ST-indexed is ignored.
1133                     mode |= ldst_elstride << SVP64MODE.ELS_NORMAL  # el-strided
1134                 else:
1135                     # TODO, reduce and subvector mode
1136                     # 00  1   dz CRM  reduce mode (mapreduce), SUBVL=1
1137                     # 00  1   SVM CRM subvector reduce mode, SUBVL>1
1138                     pass
1139                 sv_mode = 0b00
1140
1141             ######################################
1142             # "mapreduce" modes
1143             elif sv_mode == 0b00:
1144                 mode |= (0b1 << SVP64MODE.REDUCE)  # sets mapreduce
1145                 assert dst_zero == 0, "dest-zero not allowed in mapreduce mode"
1146                 if reverse_gear:
1147                     mode |= (0b1 << SVP64MODE.RG)  # sets Reverse-gear mode
1148                 if mapreduce_crm:
1149                     mode |= (0b1 << SVP64MODE.CRM)  # sets CRM mode
1150                     assert rc_mode, "CRM only allowed when Rc=1"
1151                 # bit of weird encoding to jam zero-pred or SVM mode in.
1152                 # SVM mode can be enabled only when SUBVL=2/3/4 (vec2/3/4)
1153                 if subvl == 0:
1154                     mode |= dst_zero << SVP64MODE.DZ  # predicate zeroing
1155                 elif mapreduce_svm:
1156                     mode |= (0b1 << SVP64MODE.SVM)  # sets SVM mode
1157
1158             ######################################
1159             # "failfirst" modes
1160             elif sv_mode == 0b01:
1161                 assert src_zero == 0, "dest-zero not allowed in failfirst mode"
1162                 if failfirst == 'RC1':
1163                     mode |= (0b1 << SVP64MODE.RC1)  # sets RC1 mode
1164                     mode |= (dst_zero << SVP64MODE.DZ)  # predicate dst-zeroing
1165                     assert rc_mode == False, "ffirst RC1 only ok when Rc=0"
1166                 elif failfirst == '~RC1':
1167                     mode |= (0b1 << SVP64MODE.RC1)  # sets RC1 mode
1168                     mode |= (dst_zero << SVP64MODE.DZ)  # predicate dst-zeroing
1169                     mode |= (0b1 << SVP64MODE.INV)  # ... with inversion
1170                     assert rc_mode == False, "ffirst RC1 only ok when Rc=0"
1171                 else:
1172                     assert dst_zero == 0, "dst-zero not allowed in ffirst BO"
1173                     assert rc_mode, "ffirst BO only possible when Rc=1"
1174                     mode |= (failfirst << SVP64MODE.BO_LSB)  # set BO
1175
1176             ######################################
1177             # "saturation" modes
1178             elif sv_mode == 0b10:
1179                 mode |= src_zero << SVP64MODE.SZ  # predicate zeroing
1180                 mode |= dst_zero << SVP64MODE.DZ  # predicate zeroing
1181                 mode |= (saturation << SVP64MODE.N)  # signed/us saturation
1182
1183             ######################################
1184             # "predicate-result" modes.  err... code-duplication from ffirst
1185             elif sv_mode == 0b11:
1186                 assert src_zero == 0, "dest-zero not allowed in predresult mode"
1187                 if predresult == 'RC1':
1188                     mode |= (0b1 << SVP64MODE.RC1)  # sets RC1 mode
1189                     mode |= (dst_zero << SVP64MODE.DZ)  # predicate dst-zeroing
1190                     assert rc_mode == False, "pr-mode RC1 only ok when Rc=0"
1191                 elif predresult == '~RC1':
1192                     mode |= (0b1 << SVP64MODE.RC1)  # sets RC1 mode
1193                     mode |= (dst_zero << SVP64MODE.DZ)  # predicate dst-zeroing
1194                     mode |= (0b1 << SVP64MODE.INV)  # ... with inversion
1195                     assert rc_mode == False, "pr-mode RC1 only ok when Rc=0"
1196                 else:
1197                     assert dst_zero == 0, "dst-zero not allowed in pr-mode BO"
1198                     assert rc_mode, "pr-mode BO only possible when Rc=1"
1199                     mode |= (predresult << SVP64MODE.BO_LSB)  # set BO
1200
1201         # whewww.... modes all done :)
1202         # now put into svp64_rm
1203         mode |= sv_mode
1204         # mode: bits 19-23
1205         svp64_rm.mode.eq(SelectableInt(mode, SVP64RM_MODE_SIZE))
1206
1207         # put in predicate masks into svp64_rm
1208         if ptype == '2P':
1209             # source pred: bits 16-18
1210             svp64_rm.smask.eq(SelectableInt(smask, SVP64RM_SMASK_SIZE))
1211         # mask mode: bit 0
1212         svp64_rm.mmode.eq(SelectableInt(mmode, SVP64RM_MMODE_SIZE))
1213         # 1-pred: bits 1-3
1214         svp64_rm.mask.eq(SelectableInt(pmask, SVP64RM_MASK_SIZE))
1215
1216         # and subvl: bits 8-9
1217         svp64_rm.subvl.eq(SelectableInt(subvl, SVP64RM_SUBVL_SIZE))
1218
1219         # put in elwidths
1220         # srcwid: bits 6-7
1221         svp64_rm.ewsrc.eq(SelectableInt(srcwid, SVP64RM_EWSRC_SIZE))
1222         # destwid: bits 4-5
1223         svp64_rm.elwidth.eq(SelectableInt(destwid, SVP64RM_ELWIDTH_SIZE))
1224
1225         # nice debug printout. (and now for something completely different)
1226         # https://youtu.be/u0WOIwlXE9g?t=146
1227         svp64_rm_value = svp64_rm.spr.value
1228         log("svp64_rm", hex(svp64_rm_value), bin(svp64_rm_value))
1229         log("    mmode  0    :", bin(mmode))
1230         log("    pmask  1-3  :", bin(pmask))
1231         log("    dstwid 4-5  :", bin(destwid))
1232         log("    srcwid 6-7  :", bin(srcwid))
1233         log("    subvl  8-9  :", bin(subvl))
1234         log("    mode   19-23:", bin(mode))
1235         offs = 2 if etype == 'EXTRA2' else 3  # 2 or 3 bits
1236         for idx, sv_extra in extras.items():
1237             if idx is None:
1238                 continue
1239             if idx[0] == 'imm':
1240                 continue
1241             srcdest, idx, duplicate = idx
1242             start = (10+idx*offs)
1243             end = start + offs-1
1244             log("    extra%d %2d-%2d:" % (idx, start, end),
1245                 bin(sv_extra))
1246         if ptype == '2P':
1247             log("    smask  16-17:", bin(smask))
1248         log()
1249
1250         # first, construct the prefix from its subfields
1251         svp64_prefix = SVP64PrefixFields()
1252         svp64_prefix.major.eq(SelectableInt(0x1, SV64P_MAJOR_SIZE))
1253         svp64_prefix.pid.eq(SelectableInt(0b11, SV64P_PID_SIZE))
1254         svp64_prefix.rm.eq(svp64_rm.spr)
1255
1256         # fiinally yield the svp64 prefix and the thingy.  v3.0b opcode
1257         rc = '.' if rc_mode else ''
1258         yield ".long 0x%08x" % svp64_prefix.insn.value
1259         log(v30b_op, v30b_newfields)
1260         # argh, sv.fmadds etc. need to be done manually
1261         if v30b_op == 'ffmadds':
1262             opcode = 59 << (32-6)    # bits 0..6 (MSB0)
1263             opcode |= int(v30b_newfields[0]) << (32-11)  # FRT
1264             opcode |= int(v30b_newfields[1]) << (32-16)  # FRA
1265             opcode |= int(v30b_newfields[2]) << (32-21)  # FRB
1266             opcode |= int(v30b_newfields[3]) << (32-26)  # FRC
1267             opcode |= 0b00101 << (32-31)   # bits 26-30
1268             if rc:
1269                 opcode |= 1  # Rc, bit 31.
1270             yield ".long 0x%x" % opcode
1271         # argh, sv.fdmadds need to be done manually
1272         elif v30b_op == 'fdmadds':
1273             opcode = 59 << (32-6)    # bits 0..6 (MSB0)
1274             opcode |= int(v30b_newfields[0]) << (32-11)  # FRT
1275             opcode |= int(v30b_newfields[1]) << (32-16)  # FRA
1276             opcode |= int(v30b_newfields[2]) << (32-21)  # FRB
1277             opcode |= int(v30b_newfields[3]) << (32-26)  # FRC
1278             opcode |= 0b01111 << (32-31)   # bits 26-30
1279             if rc:
1280                 opcode |= 1  # Rc, bit 31.
1281             yield ".long 0x%x" % opcode
1282         # argh, sv.ffadds etc. need to be done manually
1283         elif v30b_op == 'ffadds':
1284             opcode = 59 << (32-6)    # bits 0..6 (MSB0)
1285             opcode |= int(v30b_newfields[0]) << (32-11)  # FRT
1286             opcode |= int(v30b_newfields[1]) << (32-16)  # FRA
1287             opcode |= int(v30b_newfields[2]) << (32-21)  # FRB
1288             opcode |= 0b01101 << (32-31)   # bits 26-30
1289             if rc:
1290                 opcode |= 1  # Rc, bit 31.
1291             yield ".long 0x%x" % opcode
1292         # sigh have to do svstep here manually for now...
1293         elif v30b_op in ["svstep", "svstep."]:
1294             insn = 22 << (31-5)          # opcode 22, bits 0-5
1295             insn |= int(v30b_newfields[0]) << (31-10)  # RT       , bits 6-10
1296             insn |= int(v30b_newfields[1]) << (31-22)  # SVi      , bits 16-22
1297             insn |= int(v30b_newfields[2]) << (31-25)  # vf       , bit  25
1298             insn |= 0b10011 << (31-30)  # XO       , bits 26..30
1299             if opcode == 'svstep.':
1300                 insn |= 1 << (31-31)     # Rc=1     , bit 31
1301             log("svstep", bin(insn))
1302             yield ".long 0x%x" % insn
1303         # argh, sv.fcoss etc. need to be done manually
1304         elif v30b_op in ["fcoss", "fcoss."]:
1305             insn = 59 << (31-5)  # opcode 59, bits 0-5
1306             insn |= int(v30b_newfields[0]) << (31-10)  # RT       , bits 6-10
1307             insn |= int(v30b_newfields[1]) << (31-20)  # RB       , bits 16-20
1308             insn |= 0b1000101110 << (31-30)  # XO       , bits 21..30
1309             if opcode == 'fcoss.':
1310                 insn |= 1 << (31-31)     # Rc=1     , bit 31
1311             log("fcoss", bin(insn))
1312             yield ".long 0x%x" % insn
1313         else:
1314             if not v30b_op.endswith('.'):
1315                 v30b_op += rc
1316             yield "%s %s" % (v30b_op, ", ".join(v30b_newfields))
1317         yield f"# {insn}"
1318         log("new v3.0B fields", v30b_op, v30b_newfields)
1319
1320     def translate(self, lst):
1321         for insn in lst:
1322             yield from self.translate_one(insn)
1323
1324
1325 def macro_subst(macros, txt):
1326     again = True
1327     log("subst", txt, macros)
1328     while again:
1329         again = False
1330         for macro, value in macros.items():
1331             if macro == txt:
1332                 again = True
1333                 replaced = txt.replace(macro, value)
1334                 log("macro", txt, "replaced", replaced, macro, value)
1335                 txt = replaced
1336                 continue
1337             toreplace = '%s.s' % macro
1338             if toreplace == txt:
1339                 again = True
1340                 replaced = txt.replace(toreplace, "%s.s" % value)
1341                 log("macro", txt, "replaced", replaced, toreplace, value)
1342                 txt = replaced
1343                 continue
1344             toreplace = '%s.v' % macro
1345             if toreplace == txt:
1346                 again = True
1347                 replaced = txt.replace(toreplace, "%s.v" % value)
1348                 log("macro", txt, "replaced", replaced, toreplace, value)
1349                 txt = replaced
1350                 continue
1351             toreplace = '(%s)' % macro
1352             if toreplace in txt:
1353                 again = True
1354                 replaced = txt.replace(toreplace, '(%s)' % value)
1355                 log("macro", txt, "replaced", replaced, toreplace, value)
1356                 txt = replaced
1357                 continue
1358     log("    processed", txt)
1359     return txt
1360
1361
1362 def get_ws(line):
1363     # find whitespace
1364     ws = ''
1365     while line:
1366         if not line[0].isspace():
1367             break
1368         ws += line[0]
1369         line = line[1:]
1370     return ws, line
1371
1372
1373 def asm_process():
1374     # get an input file and an output file
1375     args = sys.argv[1:]
1376     if len(args) == 0:
1377         infile = sys.stdin
1378         outfile = sys.stdout
1379         # read the whole lot in advance in case of in-place
1380         lines = list(infile.readlines())
1381     elif len(args) != 2:
1382         print("pysvp64asm [infile | -] [outfile | -]", file=sys.stderr)
1383         exit(0)
1384     else:
1385         if args[0] == '--':
1386             infile = sys.stdin
1387         else:
1388             infile = open(args[0], "r")
1389         # read the whole lot in advance in case of in-place overwrite
1390         lines = list(infile.readlines())
1391
1392         if args[1] == '--':
1393             outfile = sys.stdout
1394         else:
1395             outfile = open(args[1], "w")
1396
1397     # read the line, look for custom insn, process it
1398     macros = {}  # macros which start ".set"
1399     isa = SVP64Asm([])
1400     for line in lines:
1401         op = line.split("#")[0].strip()
1402         # identify macros
1403         if op.startswith(".set"):
1404             macro = op[4:].split(",")
1405             (macro, value) = map(str.strip, macro)
1406             macros[macro] = value
1407         if not op.startswith('sv.') and not op.startswith(tuple(CUSTOM_INSNS)):
1408             outfile.write(line)
1409             continue
1410
1411         (ws, line) = get_ws(line)
1412         lst = isa.translate_one(op, macros)
1413         lst = '; '.join(lst)
1414         outfile.write("%s%s # %s\n" % (ws, lst, op))
1415
1416
1417 if __name__ == '__main__':
1418     lst = ['slw 3, 1, 4',
1419            'extsw 5, 3',
1420            'sv.extsw 5, 3',
1421            'sv.cmpi 5, 1, 3, 2',
1422            'sv.setb 5, 31',
1423            'sv.isel 64.v, 3, 2, 65.v',
1424            'sv.setb/dm=r3/sm=1<<r3 5, 31',
1425            'sv.setb/m=r3 5, 31',
1426            'sv.setb/vec2 5, 31',
1427            'sv.setb/sw=8/ew=16 5, 31',
1428            'sv.extsw./ff=eq 5, 31',
1429            'sv.extsw./satu/sz/dz/sm=r3/dm=r3 5, 31',
1430            'sv.extsw./pr=eq 5.v, 31',
1431            'sv.add. 5.v, 2.v, 1.v',
1432            'sv.add./m=r3 5.v, 2.v, 1.v',
1433            ]
1434     lst += [
1435         'sv.stw 5.v, 4(1.v)',
1436         'sv.ld 5.v, 4(1.v)',
1437         'setvl. 2, 3, 4, 0, 1, 1',
1438         'sv.setvl. 2, 3, 4, 0, 1, 1',
1439     ]
1440     lst = [
1441         "sv.stfsu 0.v, 16(4.v)",
1442     ]
1443     lst = [
1444         "sv.stfsu/els 0.v, 16(4)",
1445     ]
1446     lst = [
1447         'sv.add./mr 5.v, 2.v, 1.v',
1448     ]
1449     macros = {'win2': '50', 'win': '60'}
1450     lst = [
1451         'sv.addi win2.v, win.v, -1',
1452         'sv.add./mrr 5.v, 2.v, 1.v',
1453         #'sv.lhzsh 5.v, 11(9.v), 15',
1454         #'sv.lwzsh 5.v, 11(9.v), 15',
1455         'sv.ffmadds 6.v, 2.v, 4.v, 6.v',
1456     ]
1457     lst = [
1458         #'sv.fmadds 0.v, 8.v, 16.v, 4.v',
1459         #'sv.ffadds 0.v, 8.v, 4.v',
1460         'svremap 11, 0, 1, 2, 3, 2, 1',
1461         'svshape 8, 1, 1, 1, 0',
1462         'svshape 8, 1, 1, 1, 1',
1463     ]
1464     lst = [
1465         #'sv.lfssh 4.v, 11(8.v), 15',
1466         #'sv.lwzsh 4.v, 11(8.v), 15',
1467         #'sv.svstep. 2.v, 4, 0',
1468         #'sv.fcfids. 48.v, 64.v',
1469         'sv.fcoss. 80.v, 0.v',
1470         'sv.fcoss. 20.v, 0.v',
1471     ]
1472     lst = [
1473         'sv.bc/all 3,12,192',
1474         'sv.bclr/vsbi 3,81.v,192',
1475         'sv.ld 5.v, 4(1.v)',
1476         'sv.svstep. 2.v, 4, 0',
1477     ]
1478     lst = [
1479         'maxs 3,12,5',
1480         'maxs. 3,12,5',
1481         'avgadd 3,12,5',
1482         'absdu 3,12,5',
1483         'absds 3,12,5',
1484         'absdacu 3,12,5',
1485         'absdacs 3,12,5',
1486         'cprop 3,12,5',
1487         'svindex 0,0,1,0,0,0,0',
1488     ]
1489     lst = [
1490         'sv.svstep./m=r3 2.v, 4, 0',
1491         'ternlogi 0,0,0,0x5',
1492         'fmvis 5,65535',
1493         'fmvis 5,1',
1494         'fmvis 5,2',
1495         'fmvis 5,4',
1496         'fmvis 5,8',
1497         'fmvis 5,16',
1498         'fmvis 5,32',
1499         'fmvis 5,64',
1500         'fmvis 5,32768',
1501     ]
1502     lst = [
1503         'sv.andi. *80, *80, 1',
1504         'sv.ffmadds. 6.v, 2.v, 4.v, 6.v', # incorrectly inserted 32-bit op
1505         'sv.ffmadds 6.v, 2.v, 4.v, 6.v',  # correctly converted to .long
1506     ]
1507     isa = SVP64Asm(lst, macros=macros)
1508     log("list:\n", "\n\t".join(list(isa)))
1509     # running svp64.py is designed to test hard-coded lists
1510     # (above) - which strictly speaking should all be unit tests.
1511     # if you need to actually do assembler translation at the
1512     # commandline use "pysvp64asm" - see setup.py
1513     # XXX NO. asm_process()