src/openpower/sv/trans/svp64.py

   1 # SPDX-License-Identifier: LGPLv3+
   2 # Copyright (C) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
   3 # Funded by NLnet http://nlnet.nl
   4
   5 """SVP64 OpenPOWER v3.0B assembly translator
   6
   7 This class takes raw svp64 assembly mnemonics (aliases excluded) and creates
   8 an EXT001-encoded "svp64 prefix" (as a .long) followed by a v3.0B opcode.
   9
  10 It is very simple and straightforward, the only weirdness being the
  11 extraction of the register information and conversion to v3.0B numbering.
  12
  13 Encoding format of svp64: https://libre-soc.org/openpower/sv/svp64/
  14 Encoding format of arithmetic: https://libre-soc.org/openpower/sv/normal/
  15 Encoding format of LDST: https://libre-soc.org/openpower/sv/ldst/
  16 **TODO format of branches: https://libre-soc.org/openpower/sv/branches/**
  17 **TODO format of CRs: https://libre-soc.org/openpower/sv/cr_ops/**
  18 Bugtracker: https://bugs.libre-soc.org/show_bug.cgi?id=578
  19 """
  20
  21 import functools
  22 import os
  23 import sys
  24 from collections import OrderedDict
  25
  26 from openpower.decoder.pseudo.pagereader import ISA
  27 from openpower.decoder.power_svp64 import SVP64RM, get_regtype, decode_extra
  28 from openpower.decoder.selectable_int import SelectableInt
  29 from openpower.consts import SVP64MODE
  30 from openpower.decoder.power_insn import SVP64Instruction
  31 from openpower.decoder.power_insn import Database
  32 from openpower.decoder.power_enums import find_wiki_dir
  33
  34 # for debug logging
  35 from openpower.util import log
  36
  37
  38 def instruction(*fields):
  39     def instruction(insn, desc):
  40         (value, start, end) = desc
  41         bits = ((1,) * ((end + 1) - start))
  42         mask = 0
  43         for bit in bits:
  44             mask = ((mask << 1) | bit)
  45         return (insn | ((value & mask) << (31 - end)))
  46
  47     return functools.reduce(instruction, fields, 0)
  48
  49
  50 def setvl(fields, Rc):
  51     """
  52     setvl is a *32-bit-only* instruction. It controls SVSTATE.
  53     It is *not* a 64-bit-prefixed Vector instruction (no sv.setvl, yet),
  54     it is a Vector *control* instruction.
  55
  56     * setvl RT,RA,SVi,vf,vs,ms
  57
  58     1.6.28 SVL-FORM - from fields.txt
  59     |0     |6    |11    |16   |23 |24 |25 |26    |31 |
  60     | PO   |  RT |   RA | SVi |ms |vs |vf |   XO |Rc |
  61     """
  62     PO = 22
  63     XO = 0b11011
  64     # ARRRGH these are in a non-obvious order in openpower/isa/simplev.mdwn
  65     # compared to the SVL-Form above. sigh
  66     # setvl RT,RA,SVi,vf,vs,ms
  67     (RT, RA, SVi, vf, vs, ms) = fields
  68     SVi -= 1
  69     return instruction(
  70         (PO, 0, 5),
  71         (RT, 6, 10),
  72         (RA, 11, 15),
  73         (SVi, 16, 22),
  74         (ms, 23, 23),
  75         (vs, 24, 24),
  76         (vf, 25, 25),
  77         (XO, 26, 30),
  78         (Rc, 31, 31),
  79     )
  80
  81
  82 def svstep(fields, Rc):
  83     """
  84     svstep is a 32-bit instruction. It updates SVSTATE.
  85     It *can* be SVP64-prefixed, to indicate that its registers
  86     are Vectorised.
  87
  88     * svstep RT,SVi,vf
  89
  90     # 1.6.28 SVL-FORM - from fields.txt
  91     # |0     |6    |11    |16   |23 |24 |25 |26    |31 |
  92     # | PO   |  RT | /    | SVi |/  |/  |vf |   XO |Rc |
  93
  94     """
  95     PO = 22
  96     XO = 0b10011
  97     (RT, SVi, vf) = fields
  98     SVi -= 1
  99     return instruction(
 100         (PO, 0, 5),
 101         (RT, 6, 10),
 102         (0, 11, 15),
 103         (SVi, 16, 22),
 104         (0, 23, 23),
 105         (0, 24, 24),
 106         (vf, 25, 25),
 107         (XO, 26, 30),
 108         (Rc, 31, 31),
 109     )
 110
 111
 112 def svshape(fields):
 113     """
 114     svshape is a *32-bit-only* instruction. It updates SVSHAPE and SVSTATE.
 115     It is *not* a 64-bit-prefixed Vector instruction (no sv.svshape, yet),
 116     it is a Vector *control* instruction.
 117
 118     * svshape SVxd,SVyd,SVzd,SVrm,vf
 119
 120     # 1.6.33 SVM-FORM from fields.txt
 121     # |0     |6        |11      |16    |21    |25 |26    |31  |
 122     # | PO   |  SVxd   |   SVyd | SVzd | SVrm |vf |   XO      |
 123
 124     """
 125     PO = 22
 126     XO = 0b011001
 127     (SVxd, SVyd, SVzd, SVrm, vf) = fields
 128     SVxd -= 1
 129     SVyd -= 1
 130     SVzd -= 1
 131     return instruction(
 132         (PO, 0, 5),
 133         (SVxd, 6, 10),
 134         (SVyd, 11, 15),
 135         (SVzd, 16, 20),
 136         (SVrm, 21, 24),
 137         (vf, 25, 25),
 138         (XO, 26, 31),
 139     )
 140
 141
 142 def svindex(fields):
 143     """
 144     svindex is a *32-bit-only* instruction. It is a convenience
 145     instruction that reduces instruction count for Indexed REMAP
 146     Mode.
 147     It is *not* a 64-bit-prefixed Vector instruction (no sv.svindex, yet),
 148     it is a Vector *control* instruction.
 149
 150     1.6.28 SVI-FORM
 151       |0     |6    |11    |16   |21 |23|24|25|26    31|
 152       | PO   |  SVG|rmm   | SVd |ew |yx|mm|sk|   XO   |
 153     """
 154     # note that the dimension field one subtracted
 155     PO = 22
 156     XO = 0b101001
 157     (SVG, rmm, SVd, ew, yx, mm, sk) = fields
 158     SVd -= 1
 159     return instruction(
 160         (PO, 0, 5),
 161         (SVG, 6, 10),
 162         (rmm, 11, 15),
 163         (SVd, 16, 20),
 164         (ew, 21, 22),
 165         (yx, 23, 23),
 166         (mm, 24, 24),
 167         (sk, 25, 25),
 168         (XO, 26, 31),
 169     )
 170
 171
 172 def svremap(fields):
 173     """
 174     this is a *32-bit-only* instruction. It updates the SVSHAPE SPR
 175     it is *not* a 64-bit-prefixed Vector instruction (no sv.svremap),
 176     it is a Vector *control* instruction.
 177
 178     * svremap SVme,mi0,mi1,mi2,mo0,mo1,pst
 179
 180     # 1.6.34 SVRM-FORM
 181        |0     |6     |11  |13   |15   |17   |19   |21  |22   |26     |31 |
 182        | PO   | SVme |mi0 | mi1 | mi2 | mo0 | mo1 |pst |///  | XO        |
 183
 184     """
 185     PO = 22
 186     XO = 0b111001
 187     (SVme, mi0, mi1, mi2, mo0, mo1, pst) = fields
 188     return instruction(
 189         (PO, 0, 5),
 190         (SVme, 6, 10),
 191         (mi0, 11, 12),
 192         (mi1, 13, 14),
 193         (mi2, 15, 16),
 194         (mo0, 17, 18),
 195         (mo1, 19, 20),
 196         (pst, 21, 21),
 197         (0, 22, 25),
 198         (XO, 26, 31),
 199     )
 200
 201
 202 # ok from here-on down these are added as 32-bit instructions
 203 # and are here only because binutils (at present) doesn't have
 204 # them (that's being fixed!)
 205 # they can - if implementations then choose - be Vectorised
 206 # because they are general-purpose scalar instructions
 207 def bmask(fields):
 208     """
 209     1.6.2.2 BM2-FORM
 210     |0     |6    |11    |16    |21   |26 |27    31|
 211     | PO   |  RT |   RA |   RB |bm   |L  |   XO   |
 212     """
 213     PO = 22
 214     XO = 0b010001
 215     (RT, RA, RB, bm, L) = fields
 216     return instruction(
 217         (PO, 0, 5),
 218         (RT, 6, 10),
 219         (RA, 11, 15),
 220         (RB, 16, 20),
 221         (bm, 21, 25),
 222         (L, 26, 26),
 223         (XO, 27, 31),
 224     )
 225
 226
 227 def fsins(fields, Rc):
 228     # XXX WARNING THESE ARE NOT APPROVED BY OPF ISA WG
 229     # however we are out of space with opcode 22
 230     # 1.6.7 X-FORM
 231     # |0     |6 |7|8|9  |10  |11|12|13  |15|16|17     |20|21    |31  |
 232     # | PO   |   FRT         |     ///     |   FRB       |   XO |Rc  |
 233     PO = 59
 234     XO = 0b1000001110
 235     (FRT, FRB) = fields
 236     return instruction(
 237         (PO, 0, 5),
 238         (FRT, 6, 10),
 239         (0, 11, 15),
 240         (FRB, 16, 20),
 241         (XO, 21, 30),
 242         (Rc, 31, 31),
 243     )
 244
 245
 246 def fcoss(fields, Rc):
 247     # XXX WARNING THESE ARE NOT APPROVED BY OPF ISA WG
 248     # however we are out of space with opcode 22
 249     # 1.6.7 X-FORM
 250     # |0     |6 |7|8|9  |10  |11|12|13  |15|16|17     |20|21    |31  |
 251     # | PO   |   FRT         |     ///     |   FRB       |   XO |Rc  |
 252     PO = 59
 253     XO = 0b1000101110
 254     (FRT, FRB) = fields
 255     return instruction(
 256         (PO, 0, 5),
 257         (FRT, 6, 10),
 258         (0, 11, 15),
 259         (FRB, 16, 20),
 260         (XO, 21, 30),
 261         (Rc, 31, 31),
 262     )
 263
 264
 265 def ternlogi(fields, Rc):
 266     # XXX WARNING THESE ARE NOT APPROVED BY OPF ISA WG
 267     # however we are out of space with opcode 22
 268     # 1.6.34 TLI-FORM
 269     #   |0   |6   |11   |16   |21   |29  |31 |
 270     #   | PO | RT |  RA |  RB | TLI | XO |Rc |
 271     PO = 5
 272     XO = 0
 273     (RT, RA, RB, TLI) = fields
 274     return instruction(
 275         (PO, 0, 5),
 276         (RT, 6, 10),
 277         (RA, 11, 15),
 278         (RB, 16, 20),
 279         (TLI, 21, 28),
 280         (XO, 29, 30),
 281         (Rc, 31, 31),
 282     )
 283
 284
 285 def grev(fields, Rc, imm, wide):
 286     # XXX WARNING THESE ARE NOT APPROVED BY OPF ISA WG
 287     # however we are out of space with opcode 22
 288     insn = PO = 5
 289     # _ matches fields in table at:
 290     # https://libre-soc.org/openPOwer/sv/bitmanip/
 291     XO = 0b1_0010_110
 292     if wide:
 293         XO |= 0b100_000
 294     if imm:
 295         XO |= 0b1000_000
 296     (RT, RA, XBI) = fields
 297     insn = (insn << 5) | RT
 298     insn = (insn << 5) | RA
 299     if imm and not wide:
 300         assert 0 <= XBI < 64
 301         insn = (insn << 6) | XBI
 302         insn = (insn << 9) | XO
 303     else:
 304         assert 0 <= XBI < 32
 305         insn = (insn << 5) | XBI
 306         insn = (insn << 10) | XO
 307     insn = (insn << 1) | Rc
 308     return insn
 309
 310
 311 def av(fields, XO, Rc):
 312     # 1.6.7 X-FORM
 313     #   |0     |6 |7|8|9  |10  |11|12|13  |15|16|17     |20|21    |31  |
 314     #   | PO   |       RT      |    RA       |    RB       |   XO |Rc  |
 315     PO = 22
 316     (RT, RA, RB) = fields
 317     return instruction(
 318         (PO, 0, 5),
 319         (RT, 6, 10),
 320         (RA, 11, 15),
 321         (RB, 16, 20),
 322         (XO, 21, 30),
 323         (Rc, 31, 31),
 324     )
 325
 326
 327 def fmvis(fields):
 328     # XXX WARNING THESE ARE NOT APPROVED BY OPF ISA WG
 329     # V3.0B 1.6.6 DX-FORM
 330     # |0     |6 |7|8|9  |10  |11|12|13  |15|16|17     |26|27    |31  |
 331     # | PO   |   FRS         |     d1      |      d0  |      XO |d2  |
 332     PO = 22
 333     XO = 0b00011
 334     (FRS, imm) = fields
 335     # first split imm into d1, d0 and d2. sigh
 336     d2 = (imm & 1)  # LSB (0)
 337     d1 = (imm >> 1) & 0b11111  # bits 1-5
 338     d0 = (imm >> 6)  # MSBs 6-15
 339     return instruction(
 340         (PO, 0, 5),
 341         (FRS, 6, 10),
 342         (d1,  11, 15),
 343         (d0,  16, 25),
 344         (XO, 26, 30),
 345         (d2, 31, 31),
 346     )
 347
 348
 349 def fishmv(fields):
 350     # XXX WARNING THESE ARE NOT APPROVED BY OPF ISA WG
 351     # V3.0B 1.6.6 DX-FORM
 352     # |0     |6 |7|8|9  |10  |11|12|13  |15|16|17     |26|27   |31  |
 353     # | PO   |   FRS         |     d1      |      d0  |     XO |d2  |
 354     PO = 22
 355     XO = 0b01011
 356     (FRS, imm) = fields
 357     # first split imm into d1, d0 and d2. sigh
 358     d2 = (imm & 1)  # LSB (0)
 359     d1 = (imm >> 1) & 0b11111  # bits 1-5
 360     d0 = (imm >> 6)  # MSBs 6-15
 361     return instruction(
 362         (PO, 0, 5),
 363         (FRS, 6, 10),
 364         (d1,  11, 15),
 365         (d0,  16, 25),
 366         (XO, 26, 30),
 367         (d2, 31, 31),
 368     )
 369
 370
 371 CUSTOM_INSNS = {}
 372 for (name, hook) in (
 373     ("setvl", setvl),
 374     ("svstep", svstep),
 375     ("fsins", fsins),
 376     ("fcoss", fcoss),
 377     ("ternlogi", ternlogi),
 378 ):
 379     CUSTOM_INSNS[name] = functools.partial(hook, Rc=False)
 380     CUSTOM_INSNS[f"{name}."] = functools.partial(hook, Rc=True)
 381 CUSTOM_INSNS["bmask"] = bmask
 382 CUSTOM_INSNS["svshape"] = svshape
 383 CUSTOM_INSNS["svindex"] = svindex
 384 CUSTOM_INSNS["svremap"] = svremap
 385 CUSTOM_INSNS["fmvis"] = fmvis
 386 CUSTOM_INSNS["fishmv"] = fishmv
 387
 388 for (name, imm, wide) in (
 389     ("grev", False, False),
 390     ("grevi", True, False),
 391     ("grevw", False, True),
 392     ("grevwi", True, True),
 393 ):
 394     CUSTOM_INSNS[name] = functools.partial(grev,
 395                                            imm=("i" in name), wide=("w" in name), Rc=False)
 396     CUSTOM_INSNS[f"{name}."] = functools.partial(grev,
 397                                                  imm=("i" in name), wide=("w" in name), Rc=True)
 398
 399 for (name, XO) in (
 400     ("maxs", 0b0111001110),
 401     ("maxu", 0b0011001110),
 402     ("minu", 0b0001001110),
 403     ("mins", 0b0101001110),
 404     ("absdu", 0b1011110110),
 405     ("absds", 0b1001110110),
 406     ("avgadd", 0b1101001110),
 407     ("absdacu", 0b1111110110),
 408     ("absdacs", 0b0111110110),
 409     ("cprop", 0b0110001110),
 410 ):
 411     CUSTOM_INSNS[name] = functools.partial(av, XO=XO, Rc=False)
 412     CUSTOM_INSNS[f"{name}."] = functools.partial(av, XO=XO, Rc=True)
 413
 414
 415 # decode GPR into sv extra
 416 def get_extra_gpr(etype, regmode, field):
 417     if regmode == 'scalar':
 418         # cut into 2-bits 5-bits SS FFFFF
 419         sv_extra = field >> 5
 420         field = field & 0b11111
 421     else:
 422         # cut into 5-bits 2-bits FFFFF SS
 423         sv_extra = field & 0b11
 424         field = field >> 2
 425     return sv_extra, field
 426
 427
 428 # decode 3-bit CR into sv extra
 429 def get_extra_cr_3bit(etype, regmode, field):
 430     if regmode == 'scalar':
 431         # cut into 2-bits 3-bits SS FFF
 432         sv_extra = field >> 3
 433         field = field & 0b111
 434     else:
 435         # cut into 3-bits 4-bits FFF SSSS but will cut 2 zeros off later
 436         sv_extra = field & 0b1111
 437         field = field >> 4
 438     return sv_extra, field
 439
 440
 441 # decodes SUBVL
 442 def decode_subvl(encoding):
 443     pmap = {'2': 0b01, '3': 0b10, '4': 0b11}
 444     assert encoding in pmap, \
 445         "encoding %s for SUBVL not recognised" % encoding
 446     return pmap[encoding]
 447
 448
 449 # decodes elwidth
 450 def decode_elwidth(encoding):
 451     pmap = {'8': 0b11, '16': 0b10, '32': 0b01}
 452     assert encoding in pmap, \
 453         "encoding %s for elwidth not recognised" % encoding
 454     return pmap[encoding]
 455
 456
 457 # decodes predicate register encoding
 458 def decode_predicate(encoding):
 459     pmap = {  # integer
 460         '1<<r3': (0, 0b001),
 461         'r3': (0, 0b010),
 462         '~r3': (0, 0b011),
 463         'r10': (0, 0b100),
 464         '~r10': (0, 0b101),
 465         'r30': (0, 0b110),
 466         '~r30': (0, 0b111),
 467         # CR
 468         'lt': (1, 0b000),
 469         'nl': (1, 0b001), 'ge': (1, 0b001),  # same value
 470         'gt': (1, 0b010),
 471         'ng': (1, 0b011), 'le': (1, 0b011),  # same value
 472         'eq': (1, 0b100),
 473         'ne': (1, 0b101),
 474         'so': (1, 0b110), 'un': (1, 0b110),  # same value
 475         'ns': (1, 0b111), 'nu': (1, 0b111),  # same value
 476     }
 477     assert encoding in pmap, \
 478         "encoding %s for predicate not recognised" % encoding
 479     return pmap[encoding]
 480
 481
 482 # decodes "Mode" in similar way to BO field (supposed to, anyway)
 483 def decode_bo(encoding):
 484     pmap = {  # TODO: double-check that these are the same as Branch BO
 485         'lt': 0b000,
 486         'nl': 0b001, 'ge': 0b001,  # same value
 487         'gt': 0b010,
 488         'ng': 0b011, 'le': 0b011,  # same value
 489         'eq': 0b100,
 490         'ne': 0b101,
 491         'so': 0b110, 'un': 0b110,  # same value
 492         'ns': 0b111, 'nu': 0b111,  # same value
 493     }
 494     assert encoding in pmap, \
 495         "encoding %s for BO Mode not recognised" % encoding
 496     return pmap[encoding]
 497
 498 # partial-decode fail-first mode
 499
 500
 501 def decode_ffirst(encoding):
 502     if encoding in ['RC1', '~RC1']:
 503         return encoding
 504     return decode_bo(encoding)
 505
 506
 507 def decode_reg(field, macros=None):
 508     if macros is None:
 509         macros = {}
 510     # decode the field number. "5.v" or "3.s" or "9"
 511     # and now also "*0", and "*%0".  note: *NOT* to add "*%rNNN" etc.
 512     # https://bugs.libre-soc.org/show_bug.cgi?id=884#c0
 513     if field.startswith(("*%", "*")):
 514         if field.startswith("*%"):
 515             field = field[2:]
 516         else:
 517             field = field[1:]
 518         while field in macros:
 519             field = macros[field]
 520         return int(field), "vector"  # actual register number
 521
 522     # try old convention (to be retired)
 523     field = field.split(".")
 524     regmode = 'scalar'  # default
 525     if len(field) == 2:
 526         if field[1] == 's':
 527             regmode = 'scalar'
 528         elif field[1] == 'v':
 529             regmode = 'vector'
 530     field = int(field[0])  # actual register number
 531     return field, regmode
 532
 533
 534 def decode_imm(field):
 535     ldst_imm = "(" in field and field[-1] == ')'
 536     if ldst_imm:
 537         return field[:-1].split("(")
 538     else:
 539         return None, field
 540
 541
 542 def crf_extra(etype, regmode, field, extras):
 543     """takes a CR Field number (CR0-CR127), splits into EXTRA2/3 and v3.0
 544     the scalar/vector mode (crNN.v or crNN.s) changes both the format
 545     of the EXTRA2/3 encoding as well as what range of registers is possible.
 546     this function can be used for both BF/BFA and BA/BB/BT by first removing
 547     the bottom 2 bits of BA/BB/BT then re-instating them after encoding.
 548     see https://libre-soc.org/openpower/sv/svp64/appendix/#cr_extra
 549     for specification
 550     """
 551     sv_extra, field = get_extra_cr_3bit(etype, regmode, field)
 552     # now sanity-check (and shrink afterwards)
 553     if etype == 'EXTRA2':
 554         # 3-bit CR Field (BF, BFA) EXTRA2 encoding
 555         if regmode == 'scalar':
 556             # range is CR0-CR15 in increments of 1
 557             assert (sv_extra >> 1) == 0, \
 558                 "scalar CR %s cannot fit into EXTRA2 %s" % \
 559                 (rname, str(extras[extra_idx]))
 560             # all good: encode as scalar
 561             sv_extra = sv_extra & 0b01
 562         else:  # vector
 563             # range is CR0-CR127 in increments of 16
 564             assert sv_extra & 0b111 == 0, \
 565                 "vector CR %s cannot fit into EXTRA2 %s" % \
 566                 (rname, str(extras[extra_idx]))
 567             # all good: encode as vector (bit 2 set)
 568             sv_extra = 0b10 | (sv_extra >> 3)
 569     else:
 570         # 3-bit CR Field (BF, BFA) EXTRA3 encoding
 571         if regmode == 'scalar':
 572             # range is CR0-CR31 in increments of 1
 573             assert (sv_extra >> 2) == 0, \
 574                 "scalar CR %s cannot fit into EXTRA3 %s" % \
 575                 (rname, str(extras[extra_idx]))
 576             # all good: encode as scalar
 577             sv_extra = sv_extra & 0b11
 578         else:  # vector
 579             # range is CR0-CR127 in increments of 8
 580             assert sv_extra & 0b11 == 0, \
 581                 "vector CR %s cannot fit into EXTRA3 %s" % \
 582                 (rname, str(extras[extra_idx]))
 583             # all good: encode as vector (bit 3 set)
 584             sv_extra = 0b100 | (sv_extra >> 2)
 585     return sv_extra, field
 586
 587
 588 def to_number(field):
 589     if field.startswith("0x"):
 590         return eval(field)
 591     if field.startswith("0b"):
 592         return eval(field)
 593     return int(field)
 594
 595
 596 # decodes svp64 assembly listings and creates EXT001 svp64 prefixes
 597 class SVP64Asm:
 598     def __init__(self, lst, bigendian=False, macros=None):
 599         if macros is None:
 600             macros = {}
 601         self.macros = macros
 602         self.lst = lst
 603         self.trans = self.translate(lst)
 604         self.isa = ISA()  # reads the v3.0B pseudo-code markdown files
 605         self.svp64 = SVP64RM()  # reads the svp64 Remap entries for registers
 606         assert bigendian == False, "error, bigendian not supported yet"
 607
 608     def __iter__(self):
 609         yield from self.trans
 610
 611     def translate_one(self, insn, macros=None):
 612         if macros is None:
 613             macros = {}
 614         macros.update(self.macros)
 615         isa = self.isa
 616         svp64 = self.svp64
 617         insn_no_comments = insn.partition('#')[0]
 618         # find first space, to get opcode
 619         ls = insn_no_comments.split(' ')
 620         opcode = ls[0]
 621         # now find opcode fields
 622         fields = ''.join(ls[1:]).split(',')
 623         mfields = list(map(str.strip, fields))
 624         log("opcode, fields", ls, opcode, mfields)
 625         fields = []
 626         # macro substitution
 627         for field in mfields:
 628             fields.append(macro_subst(macros, field))
 629         log("opcode, fields substed", ls, opcode, fields)
 630
 631         # identify if it is a special instruction
 632         custom_insn_hook = CUSTOM_INSNS.get(opcode)
 633         if custom_insn_hook is not None:
 634             fields = tuple(map(to_number, fields))
 635             insn_num = custom_insn_hook(fields)
 636             log(opcode, bin(insn_num))
 637             yield ".long 0x%X # %s" % (insn_num, insn)
 638             return
 639
 640         # identify if is a svp64 mnemonic
 641         if not opcode.startswith('sv.'):
 642             yield insn  # unaltered
 643             return
 644         opcode = opcode[3:]  # strip leading "sv"
 645
 646         # start working on decoding the svp64 op: sv.basev30Bop/vec2/mode
 647         opmodes = opcode.split("/")  # split at "/"
 648         v30b_op_orig = opmodes.pop(0)    # first is the v3.0B
 649         # check instruction ends with dot
 650         rc_mode = v30b_op_orig.endswith('.')
 651         if rc_mode:
 652             v30b_op = v30b_op_orig[:-1]
 653         else:
 654             v30b_op = v30b_op_orig
 655
 656         # look up the 32-bit op (original, with "." if it has it)
 657         if v30b_op_orig in isa.instr:
 658             isa_instr = isa.instr[v30b_op_orig]
 659         else:
 660             raise Exception("opcode %s of '%s' not supported" %
 661                             (v30b_op_orig, insn))
 662
 663         # look up the svp64 op, first the original (with "." if it has it)
 664         if v30b_op_orig in svp64.instrs:
 665             rm = svp64.instrs[v30b_op_orig]  # one row of the svp64 RM CSV
 666         # then without the "." (if there was one)
 667         elif v30b_op in svp64.instrs:
 668             rm = svp64.instrs[v30b_op]  # one row of the svp64 RM CSV
 669         else:
 670             raise Exception(f"opcode {v30b_op_orig!r} of "
 671                                 f"{insn!r} not an svp64 instruction")
 672
 673         # get regs info e.g. "RT,RA,RB"
 674         v30b_regs = isa_instr.regs[0]
 675         log("v3.0B op", v30b_op, "Rc=1" if rc_mode else '')
 676         log("v3.0B regs", opcode, v30b_regs)
 677         log("RM", rm)
 678
 679         # right.  the first thing to do is identify the ordering of
 680         # the registers, by name.  the EXTRA2/3 ordering is in
 681         # rm['0']..rm['3'] but those fields contain the names RA, BB
 682         # etc.  we have to read the pseudocode to understand which
 683         # reg is which in our instruction. sigh.
 684
 685         # first turn the svp64 rm into a "by name" dict, recording
 686         # which position in the RM EXTRA it goes into
 687         # also: record if the src or dest was a CR, for sanity-checking
 688         # (elwidth overrides on CRs are banned)
 689         decode = decode_extra(rm)
 690         dest_reg_cr, src_reg_cr, svp64_src, svp64_dest = decode
 691
 692         log("EXTRA field index, src", svp64_src)
 693         log("EXTRA field index, dest", svp64_dest)
 694
 695         # okaaay now we identify the field value (opcode N,N,N) with
 696         # the pseudo-code info (opcode RT, RA, RB)
 697         assert len(fields) == len(v30b_regs), \
 698             "length of fields %s must match insn `%s` fields %s" % \
 699             (str(v30b_regs), insn, str(fields))
 700         opregfields = zip(fields, v30b_regs)  # err that was easy
 701
 702         # now for each of those find its place in the EXTRA encoding
 703         # note there is the possibility (for LD/ST-with-update) of
 704         # RA occurring **TWICE**.  to avoid it getting added to the
 705         # v3.0B suffix twice, we spot it as a duplicate, here
 706         extras = OrderedDict()
 707         for idx, (field, regname) in enumerate(opregfields):
 708             imm, regname = decode_imm(regname)
 709             rtype = get_regtype(regname)
 710             log("    idx find", rtype, idx, field, regname, imm)
 711             if rtype is None:
 712                 # probably an immediate field, append it straight
 713                 extras[('imm', idx, False)] = (idx, field, None, None, None)
 714                 continue
 715             extra = svp64_src.get(regname, None)
 716             if extra is not None:
 717                 extra = ('s', extra, False)  # not a duplicate
 718                 extras[extra] = (idx, field, regname, rtype, imm)
 719                 log("    idx src", idx, extra, extras[extra])
 720             dextra = svp64_dest.get(regname, None)
 721             log("regname in", regname, dextra)
 722             if dextra is not None:
 723                 is_a_duplicate = extra is not None  # duplicate spotted
 724                 dextra = ('d', dextra, is_a_duplicate)
 725                 extras[dextra] = (idx, field, regname, rtype, imm)
 726                 log("    idx dst", idx, extra, extras[dextra])
 727
 728         # great! got the extra fields in their associated positions:
 729         # also we know the register type. now to create the EXTRA encodings
 730         etype = rm['Etype']  # Extra type: EXTRA3/EXTRA2
 731         ptype = rm['Ptype']  # Predication type: Twin / Single
 732         extra_bits = 0
 733         v30b_newfields = []
 734         for extra_idx, (idx, field, rname, rtype, iname) in extras.items():
 735             # is it a field we don't alter/examine?  if so just put it
 736             # into newfields
 737             if rtype is None:
 738                 v30b_newfields.append(field)
 739                 continue
 740
 741             # identify if this is a ld/st immediate(reg) thing
 742             ldst_imm = "(" in field and field[-1] == ')'
 743             if ldst_imm:
 744                 immed, field = field[:-1].split("(")
 745
 746             field, regmode = decode_reg(field, macros=macros)
 747             log("    ", extra_idx, rname, rtype,
 748                 regmode, iname, field, end=" ")
 749
 750             # see Mode field https://libre-soc.org/openpower/sv/svp64/
 751             # XXX TODO: the following is a bit of a laborious repeated
 752             # mess, which could (and should) easily be parameterised.
 753             # XXX also TODO: the LD/ST modes which are different
 754             # https://libre-soc.org/openpower/sv/ldst/
 755
 756             # rright.  SVP64 register numbering is from 0 to 127
 757             # for GPRs, FPRs *and* CR Fields, where for v3.0 the GPRs and RPFs
 758             # are 0-31 and CR Fields are only 0-7.  the SVP64 RM "Extra"
 759             # area is used to extend the numbering from the 32-bit
 760             # instruction, and also to record whether the register
 761             # is scalar or vector. on a per-operand basis.  this
 762             # results in a slightly finnicky encoding: here we go...
 763
 764             # encode SV-GPR and SV-FPR field into extra, v3.0field
 765             if rtype in ['GPR', 'FPR']:
 766                 sv_extra, field = get_extra_gpr(etype, regmode, field)
 767                 # now sanity-check. EXTRA3 is ok, EXTRA2 has limits
 768                 # (and shrink to a single bit if ok)
 769                 if etype == 'EXTRA2':
 770                     if regmode == 'scalar':
 771                         # range is r0-r63 in increments of 1
 772                         assert (sv_extra >> 1) == 0, \
 773                             "scalar GPR %s cannot fit into EXTRA2 %s" % \
 774                             (rname, str(extras[extra_idx]))
 775                         # all good: encode as scalar
 776                         sv_extra = sv_extra & 0b01
 777                     else:
 778                         # range is r0-r127 in increments of 2 (r0 r2 ... r126)
 779                         assert sv_extra & 0b01 == 0, \
 780                             "%s: vector field %s cannot fit " \
 781                             "into EXTRA2 %s" % \
 782                             (insn, rname, str(extras[extra_idx]))
 783                         # all good: encode as vector (bit 2 set)
 784                         sv_extra = 0b10 | (sv_extra >> 1)
 785                 elif regmode == 'vector':
 786                     # EXTRA3 vector bit needs marking
 787                     sv_extra |= 0b100
 788
 789             # encode SV-CR 3-bit field into extra, v3.0field.
 790             # 3-bit is for things like BF and BFA
 791             elif rtype == 'CR_3bit':
 792                 sv_extra, field = crf_extra(etype, regmode, field, extras)
 793
 794             # encode SV-CR 5-bit field into extra, v3.0field
 795             # 5-bit is for things like BA BB BC BT etc.
 796             # *sigh* this is the same as 3-bit except the 2 LSBs of the
 797             # 5-bit field are passed through unaltered.
 798             elif rtype == 'CR_5bit':
 799                 cr_subfield = field & 0b11  # record bottom 2 bits for later
 800                 field = field >> 2         # strip bottom 2 bits
 801                 # use the exact same 3-bit function for the top 3 bits
 802                 sv_extra, field = crf_extra(etype, regmode, field, extras)
 803                 # reconstruct the actual 5-bit CR field (preserving the
 804                 # bottom 2 bits, unaltered)
 805                 field = (field << 2) | cr_subfield
 806
 807             else:
 808                 raise Exception("no type match: %s" % rtype)
 809
 810             # capture the extra field info
 811             log("=>", "%5s" % bin(sv_extra), field)
 812             extras[extra_idx] = sv_extra
 813
 814             # append altered field value to v3.0b, differs for LDST
 815             # note that duplicates are skipped e.g. EXTRA2 contains
 816             # *BOTH* s:RA *AND* d:RA which happens on LD/ST-with-update
 817             srcdest, idx, duplicate = extra_idx
 818             if duplicate:  # skip adding to v3.0b fields, already added
 819                 continue
 820             if ldst_imm:
 821                 v30b_newfields.append(("%s(%s)" % (immed, str(field))))
 822             else:
 823                 v30b_newfields.append(str(field))
 824
 825         log("new v3.0B fields", v30b_op, v30b_newfields)
 826         log("extras", extras)
 827
 828         # rright. now we have all the info. start creating SVP64 instruction.
 829         db = Database(find_wiki_dir())
 830         svp64_insn = SVP64Instruction.pair(prefix=0, suffix=0)
 831         svp64_prefix = svp64_insn.prefix
 832         svp64_rm = svp64_insn.prefix.rm
 833
 834         # begin with EXTRA fields
 835         for idx, sv_extra in extras.items():
 836             log(idx)
 837             if idx is None:
 838                 continue
 839             if idx[0] == 'imm':
 840                 continue
 841             srcdest, idx, duplicate = idx
 842             if etype == 'EXTRA2':
 843                 svp64_rm.extra2[idx] = sv_extra
 844             else:
 845                 svp64_rm.extra3[idx] = sv_extra
 846
 847         # identify if the op is a LD/ST. the "blegh" way. copied
 848         # from power_enums.  TODO, split the list _insns down.
 849         is_ld = v30b_op in [
 850             "lbarx", "lbz", "lbzu", "lbzux", "lbzx",            # load byte
 851             "ld", "ldarx", "ldbrx", "ldu", "ldux", "ldx",       # load double
 852             "lfs", "lfsx", "lfsu", "lfsux",                     # FP load single
 853             "lfd", "lfdx", "lfdu", "lfdux", "lfiwzx", "lfiwax",  # FP load dbl
 854             "lha", "lharx", "lhau", "lhaux", "lhax",            # load half
 855             "lhbrx", "lhz", "lhzu", "lhzux", "lhzx",            # more load half
 856             "lwa", "lwarx", "lwaux", "lwax", "lwbrx",           # load word
 857             "lwz", "lwzcix", "lwzu", "lwzux", "lwzx",           # more load word
 858         ]
 859         is_st = v30b_op in [
 860             "stb", "stbcix", "stbcx", "stbu", "stbux", "stbx",
 861             "std", "stdbrx", "stdcx", "stdu", "stdux", "stdx",
 862             "stfs", "stfsx", "stfsu", "stfux",                  # FP store sgl
 863             "stfd", "stfdx", "stfdu", "stfdux", "stfiwx",       # FP store dbl
 864             "sth", "sthbrx", "sthcx", "sthu", "sthux", "sthx",
 865             "stw", "stwbrx", "stwcx", "stwu", "stwux", "stwx",
 866         ]
 867         # use this to determine if the SVP64 RM format is different.
 868         # see https://libre-soc.org/openpower/sv/ldst/
 869         is_ldst = is_ld or is_st
 870
 871         # branch-conditional detection
 872         is_bc = v30b_op in [
 873             "bc", "bclr",
 874         ]
 875
 876         # parts of svp64_rm
 877         mmode = 0  # bit 0
 878         pmask = 0  # bits 1-3
 879         destwid = 0  # bits 4-5
 880         srcwid = 0  # bits 6-7
 881         subvl = 0   # bits 8-9
 882         smask = 0  # bits 16-18 but only for twin-predication
 883         mode = 0  # bits 19-23
 884
 885         mask_m_specified = False
 886         has_pmask = False
 887         has_smask = False
 888
 889         saturation = None
 890         src_zero = 0
 891         dst_zero = 0
 892         sv_mode = None
 893
 894         mapreduce = False
 895         reverse_gear = False
 896         mapreduce_crm = False
 897         mapreduce_svm = False
 898
 899         predresult = False
 900         failfirst = False
 901         ldst_elstride = 0
 902
 903         # branch-conditional bits
 904         bc_all = 0
 905         bc_lru = 0
 906         bc_brc = 0
 907         bc_svstep = 0
 908         bc_vsb = 0
 909         bc_vlset = 0
 910         bc_vli = 0
 911         bc_snz = 0
 912
 913         # ok let's start identifying opcode augmentation fields
 914         for encmode in opmodes:
 915             # predicate mask (src and dest)
 916             if encmode.startswith("m="):
 917                 pme = encmode
 918                 pmmode, pmask = decode_predicate(encmode[2:])
 919                 smmode, smask = pmmode, pmask
 920                 mmode = pmmode
 921                 mask_m_specified = True
 922             # predicate mask (dest)
 923             elif encmode.startswith("dm="):
 924                 pme = encmode
 925                 pmmode, pmask = decode_predicate(encmode[3:])
 926                 mmode = pmmode
 927                 has_pmask = True
 928             # predicate mask (src, twin-pred)
 929             elif encmode.startswith("sm="):
 930                 sme = encmode
 931                 smmode, smask = decode_predicate(encmode[3:])
 932                 mmode = smmode
 933                 has_smask = True
 934             # vec2/3/4
 935             elif encmode.startswith("vec"):
 936                 subvl = decode_subvl(encmode[3:])
 937             # elwidth
 938             elif encmode.startswith("ew="):
 939                 destwid = decode_elwidth(encmode[3:])
 940             elif encmode.startswith("sw="):
 941                 srcwid = decode_elwidth(encmode[3:])
 942             # element-strided LD/ST
 943             elif encmode == 'els':
 944                 ldst_elstride = 1
 945             # saturation
 946             elif encmode == 'sats':
 947                 assert sv_mode is None
 948                 saturation = 1
 949                 sv_mode = 0b10
 950             elif encmode == 'satu':
 951                 assert sv_mode is None
 952                 sv_mode = 0b10
 953                 saturation = 0
 954             # predicate zeroing
 955             elif encmode == 'sz':
 956                 src_zero = 1
 957             elif encmode == 'dz':
 958                 dst_zero = 1
 959             # failfirst
 960             elif encmode.startswith("ff="):
 961                 assert sv_mode is None
 962                 sv_mode = 0b01
 963                 failfirst = decode_ffirst(encmode[3:])
 964             # predicate-result, interestingly same as fail-first
 965             elif encmode.startswith("pr="):
 966                 assert sv_mode is None
 967                 sv_mode = 0b11
 968                 predresult = decode_ffirst(encmode[3:])
 969             # map-reduce mode, reverse-gear
 970             elif encmode == 'mrr':
 971                 assert sv_mode is None
 972                 sv_mode = 0b00
 973                 mapreduce = True
 974                 reverse_gear = True
 975             # map-reduce mode
 976             elif encmode == 'mr':
 977                 assert sv_mode is None
 978                 sv_mode = 0b00
 979                 mapreduce = True
 980             elif encmode == 'crm':  # CR on map-reduce
 981                 assert sv_mode is None
 982                 sv_mode = 0b00
 983                 mapreduce_crm = True
 984             elif encmode == 'svm':  # sub-vector mode
 985                 mapreduce_svm = True
 986             elif is_bc:
 987                 if encmode == 'all':
 988                     bc_all = 1
 989                 elif encmode == 'st':  # svstep mode
 990                     bc_step = 1
 991                 elif encmode == 'sr':  # svstep BRc mode
 992                     bc_step = 1
 993                     bc_brc = 1
 994                 elif encmode == 'vs':  # VLSET mode
 995                     bc_vlset = 1
 996                 elif encmode == 'vsi':  # VLSET mode with VLI (VL inclusives)
 997                     bc_vlset = 1
 998                     bc_vli = 1
 999                 elif encmode == 'vsb':  # VLSET mode with VSb
1000                     bc_vlset = 1
1001                     bc_vsb = 1
1002                 elif encmode == 'vsbi':  # VLSET mode with VLI and VSb
1003                     bc_vlset = 1
1004                     bc_vli = 1
1005                     bc_vsb = 1
1006                 elif encmode == 'snz':  # sz (only) already set above
1007                     src_zero = 1
1008                     bc_snz = 1
1009                 elif encmode == 'lu':  # LR update mode
1010                     bc_lru = 1
1011                 else:
1012                     raise AssertionError("unknown encmode %s" % encmode)
1013             else:
1014                 raise AssertionError("unknown encmode %s" % encmode)
1015
1016         if ptype == '2P':
1017             # since m=xx takes precedence (overrides) sm=xx and dm=xx,
1018             # treat them as mutually exclusive
1019             if mask_m_specified:
1020                 assert not has_smask,\
1021                     "cannot have both source-mask and predicate mask"
1022                 assert not has_pmask,\
1023                     "cannot have both dest-mask and predicate mask"
1024             # since the default is INT predication (ALWAYS), if you
1025             # specify one CR mask, you must specify both, to avoid
1026             # mixing INT and CR reg types
1027             if has_pmask and pmmode == 1:
1028                 assert has_smask, \
1029                     "need explicit source-mask in CR twin predication"
1030             if has_smask and smmode == 1:
1031                 assert has_pmask, \
1032                     "need explicit dest-mask in CR twin predication"
1033             # sanity-check that 2Pred mask is same mode
1034             if has_pmask and has_smask:
1035                 assert smmode == pmmode, \
1036                     "predicate masks %s and %s must be same reg type" % \
1037                     (pme, sme)
1038
1039         # sanity-check that twin-predication mask only specified in 2P mode
1040         if ptype == '1P':
1041             assert not has_smask, \
1042                 "source-mask can only be specified on Twin-predicate ops"
1043             assert not has_pmask, \
1044                 "dest-mask can only be specified on Twin-predicate ops"
1045
1046         # construct the mode field, doing sanity-checking along the way
1047         if mapreduce_svm:
1048             assert sv_mode == 0b00, "sub-vector mode in mapreduce only"
1049             assert subvl != 0, "sub-vector mode not possible on SUBVL=1"
1050
1051         if src_zero:
1052             assert has_smask or mask_m_specified, \
1053                 "src zeroing requires a source predicate"
1054         if dst_zero:
1055             assert has_pmask or mask_m_specified, \
1056                 "dest zeroing requires a dest predicate"
1057
1058         # okaaay, so there are 4 different modes, here, which will be
1059         # partly-merged-in: is_ldst is merged in with "normal", but
1060         # is_bc is so different it's done separately.  likewise is_cr
1061         # (when it is done).  here are the maps:
1062
1063         # for "normal" arithmetic: https://libre-soc.org/openpower/sv/normal/
1064         """
1065             | 0-1 |  2  |  3   4  |  description              |
1066             | --- | --- |---------|-------------------------- |
1067             | 00  |   0 |  dz  sz | normal mode                      |
1068             | 00  |   1 | 0  RG   | scalar reduce mode (mapreduce), SUBVL=1 |
1069             | 00  |   1 | 1  /    | parallel reduce mode (mapreduce), SUBVL=1 |
1070             | 00  |   1 | SVM RG  | subvector reduce mode, SUBVL>1   |
1071             | 01  | inv | CR-bit  | Rc=1: ffirst CR sel              |
1072             | 01  | inv | VLi RC1 |  Rc=0: ffirst z/nonz |
1073             | 10  |   N | dz   sz |  sat mode: N=0/1 u/s |
1074             | 11  | inv | CR-bit  |  Rc=1: pred-result CR sel |
1075             | 11  | inv | dz  RC1 |  Rc=0: pred-result z/nonz |
1076         """
1077
1078         # https://libre-soc.org/openpower/sv/ldst/
1079         # for LD/ST-immediate:
1080         """
1081             | 0-1 |  2  |  3   4  |  description               |
1082             | --- | --- |---------|--------------------------- |
1083             | 00  | 0   |  dz els | normal mode                |
1084             | 00  | 1   |  dz shf | shift mode                 |
1085             | 01  | inv | CR-bit  | Rc=1: ffirst CR sel        |
1086             | 01  | inv | els RC1 |  Rc=0: ffirst z/nonz       |
1087             | 10  |   N | dz  els |  sat mode: N=0/1 u/s       |
1088             | 11  | inv | CR-bit  |  Rc=1: pred-result CR sel  |
1089             | 11  | inv | els RC1 |  Rc=0: pred-result z/nonz  |
1090         """
1091
1092         # for LD/ST-indexed (RA+RB):
1093         """
1094             | 0-1 |  2  |  3   4  |  description              |
1095             | --- | --- |---------|-------------------------- |
1096             | 00  | SEA |  dz  sz | normal mode        |
1097             | 01  | SEA | dz sz  | Strided (scalar only source)   |
1098             | 10  |   N | dz   sz |  sat mode: N=0/1 u/s |
1099             | 11  | inv | CR-bit  |  Rc=1: pred-result CR sel |
1100             | 11  | inv | dz  RC1 |  Rc=0: pred-result z/nonz |
1101         """
1102
1103         # and leaving out branches and cr_ops for now because they're
1104         # under development
1105         """ TODO branches and cr_ops
1106         """
1107
1108         # now create mode and (overridden) src/dst widths
1109         # XXX TODO: sanity-check bc modes
1110         if is_bc:
1111             sv_mode = ((bc_svstep << SVP64MODE.MOD2_MSB) |
1112                        (bc_vlset << SVP64MODE.MOD2_LSB) |
1113                        (bc_snz << SVP64MODE.BC_SNZ))
1114             srcwid = (bc_vsb << 1) | bc_lru
1115             destwid = (bc_lru << 1) | bc_all
1116
1117         else:
1118
1119             ######################################
1120             # "normal" mode
1121             if sv_mode is None:
1122                 mode |= src_zero << SVP64MODE.SZ  # predicate zeroing
1123                 mode |= dst_zero << SVP64MODE.DZ  # predicate zeroing
1124                 if is_ldst:
1125                     # TODO: for now, LD/ST-indexed is ignored.
1126                     mode |= ldst_elstride << SVP64MODE.ELS_NORMAL  # el-strided
1127                 else:
1128                     # TODO, reduce and subvector mode
1129                     # 00  1   dz CRM  reduce mode (mapreduce), SUBVL=1
1130                     # 00  1   SVM CRM subvector reduce mode, SUBVL>1
1131                     pass
1132                 sv_mode = 0b00
1133
1134             ######################################
1135             # "mapreduce" modes
1136             elif sv_mode == 0b00:
1137                 mode |= (0b1 << SVP64MODE.REDUCE)  # sets mapreduce
1138                 assert dst_zero == 0, "dest-zero not allowed in mapreduce mode"
1139                 if reverse_gear:
1140                     mode |= (0b1 << SVP64MODE.RG)  # sets Reverse-gear mode
1141                 if mapreduce_crm:
1142                     mode |= (0b1 << SVP64MODE.CRM)  # sets CRM mode
1143                     assert rc_mode, "CRM only allowed when Rc=1"
1144                 # bit of weird encoding to jam zero-pred or SVM mode in.
1145                 # SVM mode can be enabled only when SUBVL=2/3/4 (vec2/3/4)
1146                 if subvl == 0:
1147                     mode |= dst_zero << SVP64MODE.DZ  # predicate zeroing
1148                 elif mapreduce_svm:
1149                     mode |= (0b1 << SVP64MODE.SVM)  # sets SVM mode
1150
1151             ######################################
1152             # "failfirst" modes
1153             elif sv_mode == 0b01:
1154                 assert src_zero == 0, "dest-zero not allowed in failfirst mode"
1155                 if failfirst == 'RC1':
1156                     mode |= (0b1 << SVP64MODE.RC1)  # sets RC1 mode
1157                     mode |= (dst_zero << SVP64MODE.DZ)  # predicate dst-zeroing
1158                     assert rc_mode == False, "ffirst RC1 only ok when Rc=0"
1159                 elif failfirst == '~RC1':
1160                     mode |= (0b1 << SVP64MODE.RC1)  # sets RC1 mode
1161                     mode |= (dst_zero << SVP64MODE.DZ)  # predicate dst-zeroing
1162                     mode |= (0b1 << SVP64MODE.INV)  # ... with inversion
1163                     assert rc_mode == False, "ffirst RC1 only ok when Rc=0"
1164                 else:
1165                     assert dst_zero == 0, "dst-zero not allowed in ffirst BO"
1166                     assert rc_mode, "ffirst BO only possible when Rc=1"
1167                     mode |= (failfirst << SVP64MODE.BO_LSB)  # set BO
1168
1169             ######################################
1170             # "saturation" modes
1171             elif sv_mode == 0b10:
1172                 mode |= src_zero << SVP64MODE.SZ  # predicate zeroing
1173                 mode |= dst_zero << SVP64MODE.DZ  # predicate zeroing
1174                 mode |= (saturation << SVP64MODE.N)  # signed/us saturation
1175
1176             ######################################
1177             # "predicate-result" modes.  err... code-duplication from ffirst
1178             elif sv_mode == 0b11:
1179                 assert src_zero == 0, "dest-zero not allowed in predresult mode"
1180                 if predresult == 'RC1':
1181                     mode |= (0b1 << SVP64MODE.RC1)  # sets RC1 mode
1182                     mode |= (dst_zero << SVP64MODE.DZ)  # predicate dst-zeroing
1183                     assert rc_mode == False, "pr-mode RC1 only ok when Rc=0"
1184                 elif predresult == '~RC1':
1185                     mode |= (0b1 << SVP64MODE.RC1)  # sets RC1 mode
1186                     mode |= (dst_zero << SVP64MODE.DZ)  # predicate dst-zeroing
1187                     mode |= (0b1 << SVP64MODE.INV)  # ... with inversion
1188                     assert rc_mode == False, "pr-mode RC1 only ok when Rc=0"
1189                 else:
1190                     assert dst_zero == 0, "dst-zero not allowed in pr-mode BO"
1191                     assert rc_mode, "pr-mode BO only possible when Rc=1"
1192                     mode |= (predresult << SVP64MODE.BO_LSB)  # set BO
1193
1194         # whewww.... modes all done :)
1195         # now put into svp64_rm
1196         mode |= sv_mode
1197         # mode: bits 19-23
1198         svp64_rm.mode = mode
1199
1200         # put in predicate masks into svp64_rm
1201         if ptype == '2P':
1202             # source pred: bits 16-18
1203             svp64_rm.smask = smask
1204         # mask mode: bit 0
1205         svp64_rm.mmode = mmode
1206         # 1-pred: bits 1-3
1207         svp64_rm.mask = pmask
1208
1209         # and subvl: bits 8-9
1210         svp64_rm.subvl = subvl
1211
1212         # put in elwidths
1213         # srcwid: bits 6-7
1214         svp64_rm.ewsrc = srcwid
1215         # destwid: bits 4-5
1216         svp64_rm.elwidth = destwid
1217
1218         # nice debug printout. (and now for something completely different)
1219         # https://youtu.be/u0WOIwlXE9g?t=146
1220         svp64_rm_value = int(svp64_rm)
1221         log("svp64_rm", hex(svp64_rm_value), bin(svp64_rm_value))
1222         log("    mmode  0    :", bin(mmode))
1223         log("    pmask  1-3  :", bin(pmask))
1224         log("    dstwid 4-5  :", bin(destwid))
1225         log("    srcwid 6-7  :", bin(srcwid))
1226         log("    subvl  8-9  :", bin(subvl))
1227         log("    mode   19-23:", bin(mode))
1228         offs = 2 if etype == 'EXTRA2' else 3  # 2 or 3 bits
1229         for idx, sv_extra in extras.items():
1230             if idx is None:
1231                 continue
1232             if idx[0] == 'imm':
1233                 continue
1234             srcdest, idx, duplicate = idx
1235             start = (10+idx*offs)
1236             end = start + offs-1
1237             log("    extra%d %2d-%2d:" % (idx, start, end),
1238                 bin(sv_extra))
1239         if ptype == '2P':
1240             log("    smask  16-17:", bin(smask))
1241         log()
1242
1243         # update prefix PO and ID (aka PID)
1244         svp64_prefix.PO = 0x1
1245         svp64_prefix.id = 0b11
1246
1247         # fiinally yield the svp64 prefix and the thingy.  v3.0b opcode
1248         rc = '.' if rc_mode else ''
1249         yield ".long 0x%08x" % int(svp64_prefix)
1250         log(v30b_op, v30b_newfields)
1251         # argh, sv.fmadds etc. need to be done manually
1252         if v30b_op == 'ffmadds':
1253             opcode = 59 << (32-6)    # bits 0..6 (MSB0)
1254             opcode |= int(v30b_newfields[0]) << (32-11)  # FRT
1255             opcode |= int(v30b_newfields[1]) << (32-16)  # FRA
1256             opcode |= int(v30b_newfields[2]) << (32-21)  # FRB
1257             opcode |= int(v30b_newfields[3]) << (32-26)  # FRC
1258             opcode |= 0b00101 << (32-31)   # bits 26-30
1259             if rc:
1260                 opcode |= 1  # Rc, bit 31.
1261             yield ".long 0x%x" % opcode
1262         # argh, sv.fdmadds need to be done manually
1263         elif v30b_op == 'fdmadds':
1264             opcode = 59 << (32-6)    # bits 0..6 (MSB0)
1265             opcode |= int(v30b_newfields[0]) << (32-11)  # FRT
1266             opcode |= int(v30b_newfields[1]) << (32-16)  # FRA
1267             opcode |= int(v30b_newfields[2]) << (32-21)  # FRB
1268             opcode |= int(v30b_newfields[3]) << (32-26)  # FRC
1269             opcode |= 0b01111 << (32-31)   # bits 26-30
1270             if rc:
1271                 opcode |= 1  # Rc, bit 31.
1272             yield ".long 0x%x" % opcode
1273         # argh, sv.ffadds etc. need to be done manually
1274         elif v30b_op == 'ffadds':
1275             opcode = 59 << (32-6)    # bits 0..6 (MSB0)
1276             opcode |= int(v30b_newfields[0]) << (32-11)  # FRT
1277             opcode |= int(v30b_newfields[1]) << (32-16)  # FRA
1278             opcode |= int(v30b_newfields[2]) << (32-21)  # FRB
1279             opcode |= 0b01101 << (32-31)   # bits 26-30
1280             if rc:
1281                 opcode |= 1  # Rc, bit 31.
1282             yield ".long 0x%x" % opcode
1283         # sigh have to do svstep here manually for now...
1284         elif v30b_op in ["svstep", "svstep."]:
1285             insn = 22 << (31-5)          # opcode 22, bits 0-5
1286             insn |= int(v30b_newfields[0]) << (31-10)  # RT       , bits 6-10
1287             insn |= int(v30b_newfields[1]) << (31-22)  # SVi      , bits 16-22
1288             insn |= int(v30b_newfields[2]) << (31-25)  # vf       , bit  25
1289             insn |= 0b10011 << (31-30)  # XO       , bits 26..30
1290             if opcode == 'svstep.':
1291                 insn |= 1 << (31-31)     # Rc=1     , bit 31
1292             log("svstep", bin(insn))
1293             yield ".long 0x%x" % insn
1294         # argh, sv.fcoss etc. need to be done manually
1295         elif v30b_op in ["fcoss", "fcoss."]:
1296             insn = 59 << (31-5)  # opcode 59, bits 0-5
1297             insn |= int(v30b_newfields[0]) << (31-10)  # RT       , bits 6-10
1298             insn |= int(v30b_newfields[1]) << (31-20)  # RB       , bits 16-20
1299             insn |= 0b1000101110 << (31-30)  # XO       , bits 21..30
1300             if opcode == 'fcoss.':
1301                 insn |= 1 << (31-31)     # Rc=1     , bit 31
1302             log("fcoss", bin(insn))
1303             yield ".long 0x%x" % insn
1304         else:
1305             if not v30b_op.endswith('.'):
1306                 v30b_op += rc
1307             yield "%s %s" % (v30b_op, ", ".join(v30b_newfields))
1308         log("new v3.0B fields", v30b_op, v30b_newfields)
1309
1310     def translate(self, lst):
1311         for insn in lst:
1312             yield from self.translate_one(insn)
1313
1314
1315 def macro_subst(macros, txt):
1316     again = True
1317     log("subst", txt, macros)
1318     while again:
1319         again = False
1320         for macro, value in macros.items():
1321             if macro == txt:
1322                 again = True
1323                 replaced = txt.replace(macro, value)
1324                 log("macro", txt, "replaced", replaced, macro, value)
1325                 txt = replaced
1326                 continue
1327             toreplace = '%s.s' % macro
1328             if toreplace == txt:
1329                 again = True
1330                 replaced = txt.replace(toreplace, "%s.s" % value)
1331                 log("macro", txt, "replaced", replaced, toreplace, value)
1332                 txt = replaced
1333                 continue
1334             toreplace = '%s.v' % macro
1335             if toreplace == txt:
1336                 again = True
1337                 replaced = txt.replace(toreplace, "%s.v" % value)
1338                 log("macro", txt, "replaced", replaced, toreplace, value)
1339                 txt = replaced
1340                 continue
1341             toreplace = '(%s)' % macro
1342             if toreplace in txt:
1343                 again = True
1344                 replaced = txt.replace(toreplace, '(%s)' % value)
1345                 log("macro", txt, "replaced", replaced, toreplace, value)
1346                 txt = replaced
1347                 continue
1348     log("    processed", txt)
1349     return txt
1350
1351
1352 def get_ws(line):
1353     # find whitespace
1354     ws = ''
1355     while line:
1356         if not line[0].isspace():
1357             break
1358         ws += line[0]
1359         line = line[1:]
1360     return ws, line
1361
1362
1363 def asm_process():
1364     # get an input file and an output file
1365     args = sys.argv[1:]
1366     if len(args) == 0:
1367         infile = sys.stdin
1368         outfile = sys.stdout
1369         # read the whole lot in advance in case of in-place
1370         lines = list(infile.readlines())
1371     elif len(args) != 2:
1372         print("pysvp64asm [infile | -] [outfile | -]", file=sys.stderr)
1373         exit(0)
1374     else:
1375         if args[0] == '--':
1376             infile = sys.stdin
1377         else:
1378             infile = open(args[0], "r")
1379         # read the whole lot in advance in case of in-place overwrite
1380         lines = list(infile.readlines())
1381
1382         if args[1] == '--':
1383             outfile = sys.stdout
1384         else:
1385             outfile = open(args[1], "w")
1386
1387     # read the line, look for custom insn, process it
1388     macros = {}  # macros which start ".set"
1389     isa = SVP64Asm([])
1390     for line in lines:
1391         op = line.split("#")[0].strip()
1392         # identify macros
1393         if op.startswith(".set"):
1394             macro = op[4:].split(",")
1395             (macro, value) = map(str.strip, macro)
1396             macros[macro] = value
1397         if not op.startswith('sv.') and not op.startswith(tuple(CUSTOM_INSNS)):
1398             outfile.write(line)
1399             continue
1400
1401         (ws, line) = get_ws(line)
1402         lst = isa.translate_one(op, macros)
1403         lst = '; '.join(lst)
1404         outfile.write("%s%s # %s\n" % (ws, lst, op))
1405
1406
1407 if __name__ == '__main__':
1408     lst = ['slw 3, 1, 4',
1409            'extsw 5, 3',
1410            'sv.extsw 5, 3',
1411            'sv.cmpi 5, 1, 3, 2',
1412            'sv.setb 5, 31',
1413            'sv.isel 64.v, 3, 2, 65.v',
1414            'sv.setb/dm=r3/sm=1<<r3 5, 31',
1415            'sv.setb/m=r3 5, 31',
1416            'sv.setb/vec2 5, 31',
1417            'sv.setb/sw=8/ew=16 5, 31',
1418            'sv.extsw./ff=eq 5, 31',
1419            'sv.extsw./satu/sz/dz/sm=r3/dm=r3 5, 31',
1420            'sv.extsw./pr=eq 5.v, 31',
1421            'sv.add. 5.v, 2.v, 1.v',
1422            'sv.add./m=r3 5.v, 2.v, 1.v',
1423            ]
1424     lst += [
1425         'sv.stw 5.v, 4(1.v)',
1426         'sv.ld 5.v, 4(1.v)',
1427         'setvl. 2, 3, 4, 0, 1, 1',
1428         'sv.setvl. 2, 3, 4, 0, 1, 1',
1429     ]
1430     lst = [
1431         "sv.stfsu 0.v, 16(4.v)",
1432     ]
1433     lst = [
1434         "sv.stfsu/els 0.v, 16(4)",
1435     ]
1436     lst = [
1437         'sv.add./mr 5.v, 2.v, 1.v',
1438     ]
1439     macros = {'win2': '50', 'win': '60'}
1440     lst = [
1441         'sv.addi win2.v, win.v, -1',
1442         'sv.add./mrr 5.v, 2.v, 1.v',
1443         #'sv.lhzsh 5.v, 11(9.v), 15',
1444         #'sv.lwzsh 5.v, 11(9.v), 15',
1445         'sv.ffmadds 6.v, 2.v, 4.v, 6.v',
1446     ]
1447     lst = [
1448         #'sv.fmadds 0.v, 8.v, 16.v, 4.v',
1449         #'sv.ffadds 0.v, 8.v, 4.v',
1450         'svremap 11, 0, 1, 2, 3, 2, 1',
1451         'svshape 8, 1, 1, 1, 0',
1452         'svshape 8, 1, 1, 1, 1',
1453     ]
1454     lst = [
1455         #'sv.lfssh 4.v, 11(8.v), 15',
1456         #'sv.lwzsh 4.v, 11(8.v), 15',
1457         #'sv.svstep. 2.v, 4, 0',
1458         #'sv.fcfids. 48.v, 64.v',
1459         'sv.fcoss. 80.v, 0.v',
1460         'sv.fcoss. 20.v, 0.v',
1461     ]
1462     lst = [
1463         'sv.bc/all 3,12,192',
1464         'sv.bclr/vsbi 3,81.v,192',
1465         'sv.ld 5.v, 4(1.v)',
1466         'sv.svstep. 2.v, 4, 0',
1467     ]
1468     lst = [
1469         'maxs 3,12,5',
1470         'maxs. 3,12,5',
1471         'avgadd 3,12,5',
1472         'absdu 3,12,5',
1473         'absds 3,12,5',
1474         'absdacu 3,12,5',
1475         'absdacs 3,12,5',
1476         'cprop 3,12,5',
1477         'svindex 0,0,1,0,0,0,0',
1478     ]
1479     lst = [
1480         'sv.svstep./m=r3 2.v, 4, 0',
1481         'ternlogi 0,0,0,0x5',
1482         'fmvis 5,65535',
1483         'fmvis 5,1',
1484         'fmvis 5,2',
1485         'fmvis 5,4',
1486         'fmvis 5,8',
1487         'fmvis 5,16',
1488         'fmvis 5,32',
1489         'fmvis 5,64',
1490         'fmvis 5,32768',
1491     ]
1492     lst = [
1493         'sv.andi. *80, *80, 1',
1494         'sv.ffmadds. 6.v, 2.v, 4.v, 6.v', # incorrectly inserted 32-bit op
1495         'sv.ffmadds 6.v, 2.v, 4.v, 6.v',  # correctly converted to .long
1496     ]
1497     isa = SVP64Asm(lst, macros=macros)
1498     log("list:\n", "\n\t".join(list(isa)))
1499     # running svp64.py is designed to test hard-coded lists
1500     # (above) - which strictly speaking should all be unit tests.
1501     # if you need to actually do assembler translation at the
1502     # commandline use "pysvp64asm" - see setup.py
1503     # XXX NO. asm_process()