src/openpower/sv/trans/svp64.py

   1 # SPDX-License-Identifier: LGPLv3+
   2 # Copyright (C) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
   3 # Funded by NLnet http://nlnet.nl
   4
   5 """SVP64 OpenPOWER v3.0B assembly translator
   6
   7 This class takes raw svp64 assembly mnemonics (aliases excluded) and creates
   8 an EXT001-encoded "svp64 prefix" (as a .long) followed by a v3.0B opcode.
   9
  10 It is very simple and straightforward, the only weirdness being the
  11 extraction of the register information and conversion to v3.0B numbering.
  12
  13 Encoding format of svp64: https://libre-soc.org/openpower/sv/svp64/
  14 Encoding format of arithmetic: https://libre-soc.org/openpower/sv/normal/
  15 Encoding format of LDST: https://libre-soc.org/openpower/sv/ldst/
  16 **TODO format of branches: https://libre-soc.org/openpower/sv/branches/**
  17 **TODO format of CRs: https://libre-soc.org/openpower/sv/cr_ops/**
  18 Bugtracker: https://bugs.libre-soc.org/show_bug.cgi?id=578
  19 """
  20
  21 import functools
  22 import os
  23 import sys
  24 from collections import OrderedDict
  25
  26 from openpower.decoder.pseudo.pagereader import ISA
  27 from openpower.decoder.power_svp64 import SVP64RM, get_regtype, decode_extra
  28 from openpower.decoder.selectable_int import SelectableInt
  29 from openpower.consts import SVP64MODE
  30 from openpower.decoder.power_insn import SVP64Instruction
  31 from openpower.decoder.power_insn import Database
  32 from openpower.decoder.power_enums import find_wiki_dir
  33
  34 # for debug logging
  35 from openpower.util import log
  36
  37
  38 def instruction(*fields):
  39     def instruction(insn, desc):
  40         (value, start, end) = desc
  41         bits = ((1,) * ((end + 1) - start))
  42         mask = 0
  43         for bit in bits:
  44             mask = ((mask << 1) | bit)
  45         return (insn | ((value & mask) << (31 - end)))
  46
  47     return functools.reduce(instruction, fields, 0)
  48
  49
  50 def setvl(fields, Rc):
  51     """
  52     setvl is a *32-bit-only* instruction. It controls SVSTATE.
  53     It is *not* a 64-bit-prefixed Vector instruction (no sv.setvl, yet),
  54     it is a Vector *control* instruction.
  55
  56     * setvl RT,RA,SVi,vf,vs,ms
  57
  58     1.6.28 SVL-FORM - from fields.txt
  59     |0     |6    |11    |16   |23 |24 |25 |26    |31 |
  60     | PO   |  RT |   RA | SVi |ms |vs |vf |   XO |Rc |
  61     """
  62     PO = 22
  63     XO = 0b11011
  64     # ARRRGH these are in a non-obvious order in openpower/isa/simplev.mdwn
  65     # compared to the SVL-Form above. sigh
  66     # setvl RT,RA,SVi,vf,vs,ms
  67     (RT, RA, SVi, vf, vs, ms) = fields
  68     SVi -= 1
  69     return instruction(
  70         (PO, 0, 5),
  71         (RT, 6, 10),
  72         (RA, 11, 15),
  73         (SVi, 16, 22),
  74         (ms, 23, 23),
  75         (vs, 24, 24),
  76         (vf, 25, 25),
  77         (XO, 26, 30),
  78         (Rc, 31, 31),
  79     )
  80
  81
  82 def svstep(fields, Rc):
  83     """
  84     svstep is a 32-bit instruction. It updates SVSTATE.
  85     It *can* be SVP64-prefixed, to indicate that its registers
  86     are Vectorised.
  87
  88     * svstep RT,SVi,vf
  89
  90     # 1.6.28 SVL-FORM - from fields.txt
  91     # |0     |6    |11    |16   |23 |24 |25 |26    |31 |
  92     # | PO   |  RT | /    | SVi |/  |/  |vf |   XO |Rc |
  93
  94     """
  95     PO = 22
  96     XO = 0b10011
  97     (RT, SVi, vf) = fields
  98     SVi -= 1
  99     return instruction(
 100         (PO, 0, 5),
 101         (RT, 6, 10),
 102         (0, 11, 15),
 103         (SVi, 16, 22),
 104         (0, 23, 23),
 105         (0, 24, 24),
 106         (vf, 25, 25),
 107         (XO, 26, 30),
 108         (Rc, 31, 31),
 109     )
 110
 111
 112 def svshape(fields):
 113     """
 114     svshape is a *32-bit-only* instruction. It updates SVSHAPE and SVSTATE.
 115     It is *not* a 64-bit-prefixed Vector instruction (no sv.svshape, yet),
 116     it is a Vector *control* instruction.
 117
 118     * svshape SVxd,SVyd,SVzd,SVrm,vf
 119
 120     # 1.6.33 SVM-FORM from fields.txt
 121     # |0     |6        |11      |16    |21    |25 |26    |31  |
 122     # | PO   |  SVxd   |   SVyd | SVzd | SVrm |vf |   XO      |
 123
 124     """
 125     PO = 22
 126     XO = 0b011001
 127     (SVxd, SVyd, SVzd, SVrm, vf) = fields
 128     SVxd -= 1
 129     SVyd -= 1
 130     SVzd -= 1
 131     return instruction(
 132         (PO, 0, 5),
 133         (SVxd, 6, 10),
 134         (SVyd, 11, 15),
 135         (SVzd, 16, 20),
 136         (SVrm, 21, 24),
 137         (vf, 25, 25),
 138         (XO, 26, 31),
 139     )
 140
 141
 142 def svindex(fields):
 143     """
 144     svindex is a *32-bit-only* instruction. It is a convenience
 145     instruction that reduces instruction count for Indexed REMAP
 146     Mode.
 147     It is *not* a 64-bit-prefixed Vector instruction (no sv.svindex, yet),
 148     it is a Vector *control* instruction.
 149
 150     1.6.28 SVI-FORM
 151       |0     |6    |11    |16   |21 |23|24|25|26    31|
 152       | PO   |  SVG|rmm   | SVd |ew |yx|mm|sk|   XO   |
 153     """
 154     # note that the dimension field one subtracted
 155     PO = 22
 156     XO = 0b101001
 157     (SVG, rmm, SVd, ew, yx, mm, sk) = fields
 158     SVd -= 1
 159     return instruction(
 160         (PO, 0, 5),
 161         (SVG, 6, 10),
 162         (rmm, 11, 15),
 163         (SVd, 16, 20),
 164         (ew, 21, 22),
 165         (yx, 23, 23),
 166         (mm, 24, 24),
 167         (sk, 25, 25),
 168         (XO, 26, 31),
 169     )
 170
 171
 172 def svremap(fields):
 173     """
 174     this is a *32-bit-only* instruction. It updates the SVSHAPE SPR
 175     it is *not* a 64-bit-prefixed Vector instruction (no sv.svremap),
 176     it is a Vector *control* instruction.
 177
 178     * svremap SVme,mi0,mi1,mi2,mo0,mo1,pst
 179
 180     # 1.6.34 SVRM-FORM
 181        |0     |6     |11  |13   |15   |17   |19   |21  |22   |26     |31 |
 182        | PO   | SVme |mi0 | mi1 | mi2 | mo0 | mo1 |pst |///  | XO        |
 183
 184     """
 185     PO = 22
 186     XO = 0b111001
 187     (SVme, mi0, mi1, mi2, mo0, mo1, pst) = fields
 188     return instruction(
 189         (PO, 0, 5),
 190         (SVme, 6, 10),
 191         (mi0, 11, 12),
 192         (mi1, 13, 14),
 193         (mi2, 15, 16),
 194         (mo0, 17, 18),
 195         (mo1, 19, 20),
 196         (pst, 21, 21),
 197         (0, 22, 25),
 198         (XO, 26, 31),
 199     )
 200
 201
 202 # ok from here-on down these are added as 32-bit instructions
 203 # and are here only because binutils (at present) doesn't have
 204 # them (that's being fixed!)
 205 # they can - if implementations then choose - be Vectorised
 206 # because they are general-purpose scalar instructions
 207 def bmask(fields):
 208     """
 209     1.6.2.2 BM2-FORM
 210     |0     |6    |11    |16    |21   |26 |27    31|
 211     | PO   |  RT |   RA |   RB |bm   |L  |   XO   |
 212     """
 213     PO = 22
 214     XO = 0b010001
 215     (RT, RA, RB, bm, L) = fields
 216     return instruction(
 217         (PO, 0, 5),
 218         (RT, 6, 10),
 219         (RA, 11, 15),
 220         (RB, 16, 20),
 221         (bm, 21, 25),
 222         (L, 26, 26),
 223         (XO, 27, 31),
 224     )
 225
 226
 227 def fsins(fields, Rc):
 228     # XXX WARNING THESE ARE NOT APPROVED BY OPF ISA WG
 229     # however we are out of space with opcode 22
 230     # 1.6.7 X-FORM
 231     # |0     |6 |7|8|9  |10  |11|12|13  |15|16|17     |20|21    |31  |
 232     # | PO   |   FRT         |     ///     |   FRB       |   XO |Rc  |
 233     PO = 59
 234     XO = 0b1000001110
 235     (FRT, FRB) = fields
 236     return instruction(
 237         (PO, 0, 5),
 238         (FRT, 6, 10),
 239         (0, 11, 15),
 240         (FRB, 16, 20),
 241         (XO, 21, 30),
 242         (Rc, 31, 31),
 243     )
 244
 245
 246 def fcoss(fields, Rc):
 247     # XXX WARNING THESE ARE NOT APPROVED BY OPF ISA WG
 248     # however we are out of space with opcode 22
 249     # 1.6.7 X-FORM
 250     # |0     |6 |7|8|9  |10  |11|12|13  |15|16|17     |20|21    |31  |
 251     # | PO   |   FRT         |     ///     |   FRB       |   XO |Rc  |
 252     PO = 59
 253     XO = 0b1000101110
 254     (FRT, FRB) = fields
 255     return instruction(
 256         (PO, 0, 5),
 257         (FRT, 6, 10),
 258         (0, 11, 15),
 259         (FRB, 16, 20),
 260         (XO, 21, 30),
 261         (Rc, 31, 31),
 262     )
 263
 264
 265 def ternlogi(fields, Rc):
 266     # XXX WARNING THESE ARE NOT APPROVED BY OPF ISA WG
 267     # however we are out of space with opcode 22
 268     # 1.6.34 TLI-FORM
 269     #   |0   |6   |11   |16   |21   |29  |31 |
 270     #   | PO | RT |  RA |  RB | TLI | XO |Rc |
 271     PO = 5
 272     XO = 0
 273     (RT, RA, RB, TLI) = fields
 274     return instruction(
 275         (PO, 0, 5),
 276         (RT, 6, 10),
 277         (RA, 11, 15),
 278         (RB, 16, 20),
 279         (TLI, 21, 28),
 280         (XO, 29, 30),
 281         (Rc, 31, 31),
 282     )
 283
 284
 285 def grev(fields, Rc, imm, wide):
 286     # XXX WARNING THESE ARE NOT APPROVED BY OPF ISA WG
 287     # however we are out of space with opcode 22
 288     insn = PO = 5
 289     # _ matches fields in table at:
 290     # https://libre-soc.org/openPOwer/sv/bitmanip/
 291     XO = 0b1_0010_110
 292     if wide:
 293         XO |= 0b100_000
 294     if imm:
 295         XO |= 0b1000_000
 296     (RT, RA, XBI) = fields
 297     insn = (insn << 5) | RT
 298     insn = (insn << 5) | RA
 299     if imm and not wide:
 300         assert 0 <= XBI < 64
 301         insn = (insn << 6) | XBI
 302         insn = (insn << 9) | XO
 303     else:
 304         assert 0 <= XBI < 32
 305         insn = (insn << 5) | XBI
 306         insn = (insn << 10) | XO
 307     insn = (insn << 1) | Rc
 308     return insn
 309
 310
 311 def av(fields, XO, Rc):
 312     # 1.6.7 X-FORM
 313     #   |0     |6 |7|8|9  |10  |11|12|13  |15|16|17     |20|21    |31  |
 314     #   | PO   |       RT      |    RA       |    RB       |   XO |Rc  |
 315     PO = 22
 316     (RT, RA, RB) = fields
 317     return instruction(
 318         (PO, 0, 5),
 319         (RT, 6, 10),
 320         (RA, 11, 15),
 321         (RB, 16, 20),
 322         (XO, 21, 30),
 323         (Rc, 31, 31),
 324     )
 325
 326
 327 def fmvis(fields):
 328     # XXX WARNING THESE ARE NOT APPROVED BY OPF ISA WG
 329     # V3.0B 1.6.6 DX-FORM
 330     # |0     |6 |7|8|9  |10  |11|12|13  |15|16|17     |26|27    |31  |
 331     # | PO   |   FRS         |     d1      |      d0  |      XO |d2  |
 332     PO = 22
 333     XO = 0b00011
 334     (FRS, imm) = fields
 335     # first split imm into d1, d0 and d2. sigh
 336     d2 = (imm & 1)  # LSB (0)
 337     d1 = (imm >> 1) & 0b11111  # bits 1-5
 338     d0 = (imm >> 6)  # MSBs 6-15
 339     return instruction(
 340         (PO, 0, 5),
 341         (FRS, 6, 10),
 342         (d1,  11, 15),
 343         (d0,  16, 25),
 344         (XO, 26, 30),
 345         (d2, 31, 31),
 346     )
 347
 348
 349 def fishmv(fields):
 350     # XXX WARNING THESE ARE NOT APPROVED BY OPF ISA WG
 351     # V3.0B 1.6.6 DX-FORM
 352     # |0     |6 |7|8|9  |10  |11|12|13  |15|16|17     |26|27   |31  |
 353     # | PO   |   FRS         |     d1      |      d0  |     XO |d2  |
 354     PO = 22
 355     XO = 0b01011
 356     (FRS, imm) = fields
 357     # first split imm into d1, d0 and d2. sigh
 358     d2 = (imm & 1)  # LSB (0)
 359     d1 = (imm >> 1) & 0b11111  # bits 1-5
 360     d0 = (imm >> 6)  # MSBs 6-15
 361     return instruction(
 362         (PO, 0, 5),
 363         (FRS, 6, 10),
 364         (d1,  11, 15),
 365         (d0,  16, 25),
 366         (XO, 26, 30),
 367         (d2, 31, 31),
 368     )
 369
 370
 371 CUSTOM_INSNS = {}
 372 for (name, hook) in (
 373     ("setvl", setvl),
 374     ("svstep", svstep),
 375     ("fsins", fsins),
 376     ("fcoss", fcoss),
 377     ("ternlogi", ternlogi),
 378 ):
 379     CUSTOM_INSNS[name] = functools.partial(hook, Rc=False)
 380     CUSTOM_INSNS[f"{name}."] = functools.partial(hook, Rc=True)
 381 CUSTOM_INSNS["bmask"] = bmask
 382 CUSTOM_INSNS["svshape"] = svshape
 383 CUSTOM_INSNS["svindex"] = svindex
 384 CUSTOM_INSNS["svremap"] = svremap
 385 CUSTOM_INSNS["fmvis"] = fmvis
 386 CUSTOM_INSNS["fishmv"] = fishmv
 387
 388 for (name, imm, wide) in (
 389     ("grev", False, False),
 390     ("grevi", True, False),
 391     ("grevw", False, True),
 392     ("grevwi", True, True),
 393 ):
 394     CUSTOM_INSNS[name] = functools.partial(grev,
 395                                            imm=("i" in name), wide=("w" in name), Rc=False)
 396     CUSTOM_INSNS[f"{name}."] = functools.partial(grev,
 397                                                  imm=("i" in name), wide=("w" in name), Rc=True)
 398
 399 for (name, XO) in (
 400     ("maxs", 0b0111001110),
 401     ("maxu", 0b0011001110),
 402     ("minu", 0b0001001110),
 403     ("mins", 0b0101001110),
 404     ("absdu", 0b1011110110),
 405     ("absds", 0b1001110110),
 406     ("avgadd", 0b1101001110),
 407     ("absdacu", 0b1111110110),
 408     ("absdacs", 0b0111110110),
 409     ("cprop", 0b0110001110),
 410 ):
 411     CUSTOM_INSNS[name] = functools.partial(av, XO=XO, Rc=False)
 412     CUSTOM_INSNS[f"{name}."] = functools.partial(av, XO=XO, Rc=True)
 413
 414
 415 # decode GPR into sv extra
 416 def get_extra_gpr(etype, regmode, field):
 417     if regmode == 'scalar':
 418         # cut into 2-bits 5-bits SS FFFFF
 419         sv_extra = field >> 5
 420         field = field & 0b11111
 421     else:
 422         # cut into 5-bits 2-bits FFFFF SS
 423         sv_extra = field & 0b11
 424         field = field >> 2
 425     return sv_extra, field
 426
 427
 428 # decode 3-bit CR into sv extra
 429 def get_extra_cr_3bit(etype, regmode, field):
 430     if regmode == 'scalar':
 431         # cut into 2-bits 3-bits SS FFF
 432         sv_extra = field >> 3
 433         field = field & 0b111
 434     else:
 435         # cut into 3-bits 4-bits FFF SSSS but will cut 2 zeros off later
 436         sv_extra = field & 0b1111
 437         field = field >> 4
 438     return sv_extra, field
 439
 440
 441 # decodes SUBVL
 442 def decode_subvl(encoding):
 443     pmap = {'2': 0b01, '3': 0b10, '4': 0b11}
 444     assert encoding in pmap, \
 445         "encoding %s for SUBVL not recognised" % encoding
 446     return pmap[encoding]
 447
 448
 449 # decodes elwidth
 450 def decode_elwidth(encoding):
 451     pmap = {'8': 0b11, '16': 0b10, '32': 0b01}
 452     assert encoding in pmap, \
 453         "encoding %s for elwidth not recognised" % encoding
 454     return pmap[encoding]
 455
 456
 457 # decodes predicate register encoding
 458 def decode_predicate(encoding):
 459     pmap = {  # integer
 460         '1<<r3': (0, 0b001),
 461         'r3': (0, 0b010),
 462         '~r3': (0, 0b011),
 463         'r10': (0, 0b100),
 464         '~r10': (0, 0b101),
 465         'r30': (0, 0b110),
 466         '~r30': (0, 0b111),
 467         # CR
 468         'lt': (1, 0b000),
 469         'nl': (1, 0b001), 'ge': (1, 0b001),  # same value
 470         'gt': (1, 0b010),
 471         'ng': (1, 0b011), 'le': (1, 0b011),  # same value
 472         'eq': (1, 0b100),
 473         'ne': (1, 0b101),
 474         'so': (1, 0b110), 'un': (1, 0b110),  # same value
 475         'ns': (1, 0b111), 'nu': (1, 0b111),  # same value
 476     }
 477     assert encoding in pmap, \
 478         "encoding %s for predicate not recognised" % encoding
 479     return pmap[encoding]
 480
 481
 482 # decodes "Mode" in similar way to BO field (supposed to, anyway)
 483 def decode_bo(encoding):
 484     pmap = {  # TODO: double-check that these are the same as Branch BO
 485         'lt': 0b000,
 486         'nl': 0b001, 'ge': 0b001,  # same value
 487         'gt': 0b010,
 488         'ng': 0b011, 'le': 0b011,  # same value
 489         'eq': 0b100,
 490         'ne': 0b101,
 491         'so': 0b110, 'un': 0b110,  # same value
 492         'ns': 0b111, 'nu': 0b111,  # same value
 493     }
 494     assert encoding in pmap, \
 495         "encoding %s for BO Mode not recognised" % encoding
 496     return pmap[encoding]
 497
 498
 499 # partial-decode fail-first mode
 500 def decode_ffirst(encoding):
 501     if encoding in ['RC1', '~RC1']:
 502         return encoding
 503     return decode_bo(encoding)
 504
 505
 506 def decode_reg(field, macros=None):
 507     if macros is None:
 508         macros = {}
 509     # decode the field number. "5.v" or "3.s" or "9"
 510     # and now also "*0", and "*%0".  note: *NOT* to add "*%rNNN" etc.
 511     # https://bugs.libre-soc.org/show_bug.cgi?id=884#c0
 512     if field.startswith(("*%", "*")):
 513         if field.startswith("*%"):
 514             field = field[2:]
 515         else:
 516             field = field[1:]
 517         while field in macros:
 518             field = macros[field]
 519         return int(field), "vector"  # actual register number
 520
 521     # try old convention (to be retired)
 522     field = field.split(".")
 523     regmode = 'scalar'  # default
 524     if len(field) == 2:
 525         if field[1] == 's':
 526             regmode = 'scalar'
 527         elif field[1] == 'v':
 528             regmode = 'vector'
 529     field = int(field[0])  # actual register number
 530     return field, regmode
 531
 532
 533 def decode_imm(field):
 534     ldst_imm = "(" in field and field[-1] == ')'
 535     if ldst_imm:
 536         return field[:-1].split("(")
 537     else:
 538         return None, field
 539
 540
 541 def crf_extra(etype, regmode, field, extras):
 542     """takes a CR Field number (CR0-CR127), splits into EXTRA2/3 and v3.0
 543     the scalar/vector mode (crNN.v or crNN.s) changes both the format
 544     of the EXTRA2/3 encoding as well as what range of registers is possible.
 545     this function can be used for both BF/BFA and BA/BB/BT by first removing
 546     the bottom 2 bits of BA/BB/BT then re-instating them after encoding.
 547     see https://libre-soc.org/openpower/sv/svp64/appendix/#cr_extra
 548     for specification
 549     """
 550     sv_extra, field = get_extra_cr_3bit(etype, regmode, field)
 551     # now sanity-check (and shrink afterwards)
 552     if etype == 'EXTRA2':
 553         # 3-bit CR Field (BF, BFA) EXTRA2 encoding
 554         if regmode == 'scalar':
 555             # range is CR0-CR15 in increments of 1
 556             assert (sv_extra >> 1) == 0, \
 557                 "scalar CR %s cannot fit into EXTRA2 %s" % \
 558                 (rname, str(extras[extra_idx]))
 559             # all good: encode as scalar
 560             sv_extra = sv_extra & 0b01
 561         else:  # vector
 562             # range is CR0-CR127 in increments of 16
 563             assert sv_extra & 0b111 == 0, \
 564                 "vector CR %s cannot fit into EXTRA2 %s" % \
 565                 (rname, str(extras[extra_idx]))
 566             # all good: encode as vector (bit 2 set)
 567             sv_extra = 0b10 | (sv_extra >> 3)
 568     else:
 569         # 3-bit CR Field (BF, BFA) EXTRA3 encoding
 570         if regmode == 'scalar':
 571             # range is CR0-CR31 in increments of 1
 572             assert (sv_extra >> 2) == 0, \
 573                 "scalar CR %s cannot fit into EXTRA3 %s" % \
 574                 (rname, str(extras[extra_idx]))
 575             # all good: encode as scalar
 576             sv_extra = sv_extra & 0b11
 577         else:  # vector
 578             # range is CR0-CR127 in increments of 8
 579             assert sv_extra & 0b11 == 0, \
 580                 "vector CR %s cannot fit into EXTRA3 %s" % \
 581                 (rname, str(extras[extra_idx]))
 582             # all good: encode as vector (bit 3 set)
 583             sv_extra = 0b100 | (sv_extra >> 2)
 584     return sv_extra, field
 585
 586
 587 def to_number(field):
 588     if field.startswith("0x"):
 589         return eval(field)
 590     if field.startswith("0b"):
 591         return eval(field)
 592     return int(field)
 593
 594
 595 # decodes svp64 assembly listings and creates EXT001 svp64 prefixes
 596 class SVP64Asm:
 597     def __init__(self, lst, bigendian=False, macros=None):
 598         if macros is None:
 599             macros = {}
 600         self.macros = macros
 601         self.lst = lst
 602         self.trans = self.translate(lst)
 603         self.isa = ISA()  # reads the v3.0B pseudo-code markdown files
 604         self.svp64 = SVP64RM()  # reads the svp64 Remap entries for registers
 605         assert bigendian == False, "error, bigendian not supported yet"
 606
 607     def __iter__(self):
 608         yield from self.trans
 609
 610     def translate_one(self, insn, macros=None):
 611         if macros is None:
 612             macros = {}
 613         macros.update(self.macros)
 614         isa = self.isa
 615         svp64 = self.svp64
 616         insn_no_comments = insn.partition('#')[0]
 617         # find first space, to get opcode
 618         ls = insn_no_comments.split(' ')
 619         opcode = ls[0]
 620         # now find opcode fields
 621         fields = ''.join(ls[1:]).split(',')
 622         mfields = list(map(str.strip, fields))
 623         log("opcode, fields", ls, opcode, mfields)
 624         fields = []
 625         # macro substitution
 626         for field in mfields:
 627             fields.append(macro_subst(macros, field))
 628         log("opcode, fields substed", ls, opcode, fields)
 629
 630         # identify if it is a special instruction
 631         custom_insn_hook = CUSTOM_INSNS.get(opcode)
 632         if custom_insn_hook is not None:
 633             fields = tuple(map(to_number, fields))
 634             insn_num = custom_insn_hook(fields)
 635             log(opcode, bin(insn_num))
 636             yield ".long 0x%X # %s" % (insn_num, insn)
 637             return
 638
 639         # identify if is a svp64 mnemonic
 640         if not opcode.startswith('sv.'):
 641             yield insn  # unaltered
 642             return
 643         opcode = opcode[3:]  # strip leading "sv"
 644
 645         # start working on decoding the svp64 op: sv.basev30Bop/vec2/mode
 646         opmodes = opcode.split("/")  # split at "/"
 647         v30b_op_orig = opmodes.pop(0)    # first is the v3.0B
 648         # check instruction ends with dot
 649         rc_mode = v30b_op_orig.endswith('.')
 650         if rc_mode:
 651             v30b_op = v30b_op_orig[:-1]
 652         else:
 653             v30b_op = v30b_op_orig
 654
 655         # look up the 32-bit op (original, with "." if it has it)
 656         if v30b_op_orig in isa.instr:
 657             isa_instr = isa.instr[v30b_op_orig]
 658         else:
 659             raise Exception("opcode %s of '%s' not supported" %
 660                             (v30b_op_orig, insn))
 661
 662         # look up the svp64 op, first the original (with "." if it has it)
 663         if v30b_op_orig in svp64.instrs:
 664             rm = svp64.instrs[v30b_op_orig]  # one row of the svp64 RM CSV
 665         # then without the "." (if there was one)
 666         elif v30b_op in svp64.instrs:
 667             rm = svp64.instrs[v30b_op]  # one row of the svp64 RM CSV
 668         else:
 669             raise Exception(f"opcode {v30b_op_orig!r} of "
 670                             f"{insn!r} not an svp64 instruction")
 671
 672         # get regs info e.g. "RT,RA,RB"
 673         v30b_regs = isa_instr.regs[0]
 674         log("v3.0B op", v30b_op, "Rc=1" if rc_mode else '')
 675         log("v3.0B regs", opcode, v30b_regs)
 676         log("RM", rm)
 677
 678         # right.  the first thing to do is identify the ordering of
 679         # the registers, by name.  the EXTRA2/3 ordering is in
 680         # rm['0']..rm['3'] but those fields contain the names RA, BB
 681         # etc.  we have to read the pseudocode to understand which
 682         # reg is which in our instruction. sigh.
 683
 684         # first turn the svp64 rm into a "by name" dict, recording
 685         # which position in the RM EXTRA it goes into
 686         # also: record if the src or dest was a CR, for sanity-checking
 687         # (elwidth overrides on CRs are banned)
 688         decode = decode_extra(rm)
 689         dest_reg_cr, src_reg_cr, svp64_src, svp64_dest = decode
 690
 691         log("EXTRA field index, src", svp64_src)
 692         log("EXTRA field index, dest", svp64_dest)
 693
 694         # okaaay now we identify the field value (opcode N,N,N) with
 695         # the pseudo-code info (opcode RT, RA, RB)
 696         assert len(fields) == len(v30b_regs), \
 697             "length of fields %s must match insn `%s` fields %s" % \
 698             (str(v30b_regs), insn, str(fields))
 699         opregfields = zip(fields, v30b_regs)  # err that was easy
 700
 701         # now for each of those find its place in the EXTRA encoding
 702         # note there is the possibility (for LD/ST-with-update) of
 703         # RA occurring **TWICE**.  to avoid it getting added to the
 704         # v3.0B suffix twice, we spot it as a duplicate, here
 705         extras = OrderedDict()
 706         for idx, (field, regname) in enumerate(opregfields):
 707             imm, regname = decode_imm(regname)
 708             rtype = get_regtype(regname)
 709             log("    idx find", rtype, idx, field, regname, imm)
 710             if rtype is None:
 711                 # probably an immediate field, append it straight
 712                 extras[('imm', idx, False)] = (idx, field, None, None, None)
 713                 continue
 714             extra = svp64_src.get(regname, None)
 715             if extra is not None:
 716                 extra = ('s', extra, False)  # not a duplicate
 717                 extras[extra] = (idx, field, regname, rtype, imm)
 718                 log("    idx src", idx, extra, extras[extra])
 719             dextra = svp64_dest.get(regname, None)
 720             log("regname in", regname, dextra)
 721             if dextra is not None:
 722                 is_a_duplicate = extra is not None  # duplicate spotted
 723                 dextra = ('d', dextra, is_a_duplicate)
 724                 extras[dextra] = (idx, field, regname, rtype, imm)
 725                 log("    idx dst", idx, extra, extras[dextra])
 726
 727         # great! got the extra fields in their associated positions:
 728         # also we know the register type. now to create the EXTRA encodings
 729         etype = rm['Etype']  # Extra type: EXTRA3/EXTRA2
 730         ptype = rm['Ptype']  # Predication type: Twin / Single
 731         extra_bits = 0
 732         v30b_newfields = []
 733         for extra_idx, (idx, field, rname, rtype, iname) in extras.items():
 734             # is it a field we don't alter/examine?  if so just put it
 735             # into newfields
 736             if rtype is None:
 737                 v30b_newfields.append(field)
 738                 continue
 739
 740             # identify if this is a ld/st immediate(reg) thing
 741             ldst_imm = "(" in field and field[-1] == ')'
 742             if ldst_imm:
 743                 immed, field = field[:-1].split("(")
 744
 745             field, regmode = decode_reg(field, macros=macros)
 746             log("    ", extra_idx, rname, rtype,
 747                 regmode, iname, field, end=" ")
 748
 749             # see Mode field https://libre-soc.org/openpower/sv/svp64/
 750             # XXX TODO: the following is a bit of a laborious repeated
 751             # mess, which could (and should) easily be parameterised.
 752             # XXX also TODO: the LD/ST modes which are different
 753             # https://libre-soc.org/openpower/sv/ldst/
 754
 755             # rright.  SVP64 register numbering is from 0 to 127
 756             # for GPRs, FPRs *and* CR Fields, where for v3.0 the GPRs and RPFs
 757             # are 0-31 and CR Fields are only 0-7.  the SVP64 RM "Extra"
 758             # area is used to extend the numbering from the 32-bit
 759             # instruction, and also to record whether the register
 760             # is scalar or vector. on a per-operand basis.  this
 761             # results in a slightly finnicky encoding: here we go...
 762
 763             # encode SV-GPR and SV-FPR field into extra, v3.0field
 764             if rtype in ['GPR', 'FPR']:
 765                 sv_extra, field = get_extra_gpr(etype, regmode, field)
 766                 # now sanity-check. EXTRA3 is ok, EXTRA2 has limits
 767                 # (and shrink to a single bit if ok)
 768                 if etype == 'EXTRA2':
 769                     if regmode == 'scalar':
 770                         # range is r0-r63 in increments of 1
 771                         assert (sv_extra >> 1) == 0, \
 772                             "scalar GPR %s cannot fit into EXTRA2 %s" % \
 773                             (rname, str(extras[extra_idx]))
 774                         # all good: encode as scalar
 775                         sv_extra = sv_extra & 0b01
 776                     else:
 777                         # range is r0-r127 in increments of 2 (r0 r2 ... r126)
 778                         assert sv_extra & 0b01 == 0, \
 779                             "%s: vector field %s cannot fit " \
 780                             "into EXTRA2 %s" % \
 781                             (insn, rname, str(extras[extra_idx]))
 782                         # all good: encode as vector (bit 2 set)
 783                         sv_extra = 0b10 | (sv_extra >> 1)
 784                 elif regmode == 'vector':
 785                     # EXTRA3 vector bit needs marking
 786                     sv_extra |= 0b100
 787
 788             # encode SV-CR 3-bit field into extra, v3.0field.
 789             # 3-bit is for things like BF and BFA
 790             elif rtype == 'CR_3bit':
 791                 sv_extra, field = crf_extra(etype, regmode, field, extras)
 792
 793             # encode SV-CR 5-bit field into extra, v3.0field
 794             # 5-bit is for things like BA BB BC BT etc.
 795             # *sigh* this is the same as 3-bit except the 2 LSBs of the
 796             # 5-bit field are passed through unaltered.
 797             elif rtype == 'CR_5bit':
 798                 cr_subfield = field & 0b11  # record bottom 2 bits for later
 799                 field = field >> 2         # strip bottom 2 bits
 800                 # use the exact same 3-bit function for the top 3 bits
 801                 sv_extra, field = crf_extra(etype, regmode, field, extras)
 802                 # reconstruct the actual 5-bit CR field (preserving the
 803                 # bottom 2 bits, unaltered)
 804                 field = (field << 2) | cr_subfield
 805
 806             else:
 807                 raise Exception("no type match: %s" % rtype)
 808
 809             # capture the extra field info
 810             log("=>", "%5s" % bin(sv_extra), field)
 811             extras[extra_idx] = sv_extra
 812
 813             # append altered field value to v3.0b, differs for LDST
 814             # note that duplicates are skipped e.g. EXTRA2 contains
 815             # *BOTH* s:RA *AND* d:RA which happens on LD/ST-with-update
 816             srcdest, idx, duplicate = extra_idx
 817             if duplicate:  # skip adding to v3.0b fields, already added
 818                 continue
 819             if ldst_imm:
 820                 v30b_newfields.append(("%s(%s)" % (immed, str(field))))
 821             else:
 822                 v30b_newfields.append(str(field))
 823
 824         log("new v3.0B fields", v30b_op, v30b_newfields)
 825         log("extras", extras)
 826
 827         # rright. now we have all the info. start creating SVP64 instruction.
 828         db = Database(find_wiki_dir())
 829         svp64_insn = SVP64Instruction.pair(prefix=0, suffix=0)
 830         svp64_prefix = svp64_insn.prefix
 831         svp64_rm = svp64_insn.prefix.rm
 832
 833         # begin with EXTRA fields
 834         for idx, sv_extra in extras.items():
 835             log(idx)
 836             if idx is None:
 837                 continue
 838             if idx[0] == 'imm':
 839                 continue
 840             srcdest, idx, duplicate = idx
 841             if etype == 'EXTRA2':
 842                 svp64_rm.extra2[idx] = sv_extra
 843             else:
 844                 svp64_rm.extra3[idx] = sv_extra
 845
 846         # identify if the op is a LD/ST. the "blegh" way. copied
 847         # from power_enums.  TODO, split the list _insns down.
 848         is_ld = v30b_op in [
 849             "lbarx", "lbz", "lbzu", "lbzux", "lbzx",            # load byte
 850             "ld", "ldarx", "ldbrx", "ldu", "ldux", "ldx",       # load double
 851             "lfs", "lfsx", "lfsu", "lfsux",                     # FP load single
 852             "lfd", "lfdx", "lfdu", "lfdux", "lfiwzx", "lfiwax",  # FP load dbl
 853             "lha", "lharx", "lhau", "lhaux", "lhax",            # load half
 854             "lhbrx", "lhz", "lhzu", "lhzux", "lhzx",            # more load half
 855             "lwa", "lwarx", "lwaux", "lwax", "lwbrx",           # load word
 856             "lwz", "lwzcix", "lwzu", "lwzux", "lwzx",           # more load word
 857         ]
 858         is_st = v30b_op in [
 859             "stb", "stbcix", "stbcx", "stbu", "stbux", "stbx",
 860             "std", "stdbrx", "stdcx", "stdu", "stdux", "stdx",
 861             "stfs", "stfsx", "stfsu", "stfux",                  # FP store sgl
 862             "stfd", "stfdx", "stfdu", "stfdux", "stfiwx",       # FP store dbl
 863             "sth", "sthbrx", "sthcx", "sthu", "sthux", "sthx",
 864             "stw", "stwbrx", "stwcx", "stwu", "stwux", "stwx",
 865         ]
 866         # use this to determine if the SVP64 RM format is different.
 867         # see https://libre-soc.org/openpower/sv/ldst/
 868         is_ldst = is_ld or is_st
 869
 870         # branch-conditional detection
 871         is_bc = v30b_op in [
 872             "bc", "bclr",
 873         ]
 874
 875         # parts of svp64_rm
 876         mmode = 0  # bit 0
 877         pmask = 0  # bits 1-3
 878         destwid = 0  # bits 4-5
 879         srcwid = 0  # bits 6-7
 880         subvl = 0   # bits 8-9
 881         smask = 0  # bits 16-18 but only for twin-predication
 882         mode = 0  # bits 19-23
 883
 884         mask_m_specified = False
 885         has_pmask = False
 886         has_smask = False
 887
 888         saturation = None
 889         src_zero = 0
 890         dst_zero = 0
 891         sv_mode = None
 892
 893         mapreduce = False
 894         reverse_gear = False
 895         mapreduce_crm = False
 896         mapreduce_svm = False
 897
 898         predresult = False
 899         failfirst = False
 900         ldst_elstride = 0
 901
 902         # branch-conditional bits
 903         bc_all = 0
 904         bc_lru = 0
 905         bc_brc = 0
 906         bc_svstep = 0
 907         bc_vsb = 0
 908         bc_vlset = 0
 909         bc_vli = 0
 910         bc_snz = 0
 911
 912         # ok let's start identifying opcode augmentation fields
 913         for encmode in opmodes:
 914             # predicate mask (src and dest)
 915             if encmode.startswith("m="):
 916                 pme = encmode
 917                 pmmode, pmask = decode_predicate(encmode[2:])
 918                 smmode, smask = pmmode, pmask
 919                 mmode = pmmode
 920                 mask_m_specified = True
 921             # predicate mask (dest)
 922             elif encmode.startswith("dm="):
 923                 pme = encmode
 924                 pmmode, pmask = decode_predicate(encmode[3:])
 925                 mmode = pmmode
 926                 has_pmask = True
 927             # predicate mask (src, twin-pred)
 928             elif encmode.startswith("sm="):
 929                 sme = encmode
 930                 smmode, smask = decode_predicate(encmode[3:])
 931                 mmode = smmode
 932                 has_smask = True
 933             # vec2/3/4
 934             elif encmode.startswith("vec"):
 935                 subvl = decode_subvl(encmode[3:])
 936             # elwidth
 937             elif encmode.startswith("ew="):
 938                 destwid = decode_elwidth(encmode[3:])
 939             elif encmode.startswith("sw="):
 940                 srcwid = decode_elwidth(encmode[3:])
 941             # element-strided LD/ST
 942             elif encmode == 'els':
 943                 ldst_elstride = 1
 944             # saturation
 945             elif encmode == 'sats':
 946                 assert sv_mode is None
 947                 saturation = 1
 948                 sv_mode = 0b10
 949             elif encmode == 'satu':
 950                 assert sv_mode is None
 951                 sv_mode = 0b10
 952                 saturation = 0
 953             # predicate zeroing
 954             elif encmode == 'sz':
 955                 src_zero = 1
 956             elif encmode == 'dz':
 957                 dst_zero = 1
 958             # failfirst
 959             elif encmode.startswith("ff="):
 960                 assert sv_mode is None
 961                 sv_mode = 0b01
 962                 failfirst = decode_ffirst(encmode[3:])
 963             # predicate-result, interestingly same as fail-first
 964             elif encmode.startswith("pr="):
 965                 assert sv_mode is None
 966                 sv_mode = 0b11
 967                 predresult = decode_ffirst(encmode[3:])
 968             # map-reduce mode, reverse-gear
 969             elif encmode == 'mrr':
 970                 assert sv_mode is None
 971                 sv_mode = 0b00
 972                 mapreduce = True
 973                 reverse_gear = True
 974             # map-reduce mode
 975             elif encmode == 'mr':
 976                 assert sv_mode is None
 977                 sv_mode = 0b00
 978                 mapreduce = True
 979             elif encmode == 'crm':  # CR on map-reduce
 980                 assert sv_mode is None
 981                 sv_mode = 0b00
 982                 mapreduce_crm = True
 983             elif encmode == 'svm':  # sub-vector mode
 984                 mapreduce_svm = True
 985             elif is_bc:
 986                 if encmode == 'all':
 987                     bc_all = 1
 988                 elif encmode == 'st':  # svstep mode
 989                     bc_step = 1
 990                 elif encmode == 'sr':  # svstep BRc mode
 991                     bc_step = 1
 992                     bc_brc = 1
 993                 elif encmode == 'vs':  # VLSET mode
 994                     bc_vlset = 1
 995                 elif encmode == 'vsi':  # VLSET mode with VLI (VL inclusives)
 996                     bc_vlset = 1
 997                     bc_vli = 1
 998                 elif encmode == 'vsb':  # VLSET mode with VSb
 999                     bc_vlset = 1
1000                     bc_vsb = 1
1001                 elif encmode == 'vsbi':  # VLSET mode with VLI and VSb
1002                     bc_vlset = 1
1003                     bc_vli = 1
1004                     bc_vsb = 1
1005                 elif encmode == 'snz':  # sz (only) already set above
1006                     src_zero = 1
1007                     bc_snz = 1
1008                 elif encmode == 'lu':  # LR update mode
1009                     bc_lru = 1
1010                 else:
1011                     raise AssertionError("unknown encmode %s" % encmode)
1012             else:
1013                 raise AssertionError("unknown encmode %s" % encmode)
1014
1015         if ptype == '2P':
1016             # since m=xx takes precedence (overrides) sm=xx and dm=xx,
1017             # treat them as mutually exclusive
1018             if mask_m_specified:
1019                 assert not has_smask,\
1020                     "cannot have both source-mask and predicate mask"
1021                 assert not has_pmask,\
1022                     "cannot have both dest-mask and predicate mask"
1023             # since the default is INT predication (ALWAYS), if you
1024             # specify one CR mask, you must specify both, to avoid
1025             # mixing INT and CR reg types
1026             if has_pmask and pmmode == 1:
1027                 assert has_smask, \
1028                     "need explicit source-mask in CR twin predication"
1029             if has_smask and smmode == 1:
1030                 assert has_pmask, \
1031                     "need explicit dest-mask in CR twin predication"
1032             # sanity-check that 2Pred mask is same mode
1033             if has_pmask and has_smask:
1034                 assert smmode == pmmode, \
1035                     "predicate masks %s and %s must be same reg type" % \
1036                     (pme, sme)
1037
1038         # sanity-check that twin-predication mask only specified in 2P mode
1039         if ptype == '1P':
1040             assert not has_smask, \
1041                 "source-mask can only be specified on Twin-predicate ops"
1042             assert not has_pmask, \
1043                 "dest-mask can only be specified on Twin-predicate ops"
1044
1045         # construct the mode field, doing sanity-checking along the way
1046         if mapreduce_svm:
1047             assert sv_mode == 0b00, "sub-vector mode in mapreduce only"
1048             assert subvl != 0, "sub-vector mode not possible on SUBVL=1"
1049
1050         if src_zero:
1051             assert has_smask or mask_m_specified, \
1052                 "src zeroing requires a source predicate"
1053         if dst_zero:
1054             assert has_pmask or mask_m_specified, \
1055                 "dest zeroing requires a dest predicate"
1056
1057         # okaaay, so there are 4 different modes, here, which will be
1058         # partly-merged-in: is_ldst is merged in with "normal", but
1059         # is_bc is so different it's done separately.  likewise is_cr
1060         # (when it is done).  here are the maps:
1061
1062         # for "normal" arithmetic: https://libre-soc.org/openpower/sv/normal/
1063         """
1064             | 0-1 |  2  |  3   4  |  description              |
1065             | --- | --- |---------|-------------------------- |
1066             | 00  |   0 |  dz  sz | normal mode                      |
1067             | 00  |   1 | 0  RG   | scalar reduce mode (mapreduce), SUBVL=1 |
1068             | 00  |   1 | 1  /    | parallel reduce mode (mapreduce), SUBVL=1 |
1069             | 00  |   1 | SVM RG  | subvector reduce mode, SUBVL>1   |
1070             | 01  | inv | CR-bit  | Rc=1: ffirst CR sel              |
1071             | 01  | inv | VLi RC1 |  Rc=0: ffirst z/nonz |
1072             | 10  |   N | dz   sz |  sat mode: N=0/1 u/s |
1073             | 11  | inv | CR-bit  |  Rc=1: pred-result CR sel |
1074             | 11  | inv | dz  RC1 |  Rc=0: pred-result z/nonz |
1075         """
1076
1077         # https://libre-soc.org/openpower/sv/ldst/
1078         # for LD/ST-immediate:
1079         """
1080             | 0-1 |  2  |  3   4  |  description               |
1081             | --- | --- |---------|--------------------------- |
1082             | 00  | 0   |  dz els | normal mode                |
1083             | 00  | 1   |  dz shf | shift mode                 |
1084             | 01  | inv | CR-bit  | Rc=1: ffirst CR sel        |
1085             | 01  | inv | els RC1 |  Rc=0: ffirst z/nonz       |
1086             | 10  |   N | dz  els |  sat mode: N=0/1 u/s       |
1087             | 11  | inv | CR-bit  |  Rc=1: pred-result CR sel  |
1088             | 11  | inv | els RC1 |  Rc=0: pred-result z/nonz  |
1089         """
1090
1091         # for LD/ST-indexed (RA+RB):
1092         """
1093             | 0-1 |  2  |  3   4  |  description              |
1094             | --- | --- |---------|-------------------------- |
1095             | 00  | SEA |  dz  sz | normal mode        |
1096             | 01  | SEA | dz sz  | Strided (scalar only source)   |
1097             | 10  |   N | dz   sz |  sat mode: N=0/1 u/s |
1098             | 11  | inv | CR-bit  |  Rc=1: pred-result CR sel |
1099             | 11  | inv | dz  RC1 |  Rc=0: pred-result z/nonz |
1100         """
1101
1102         # and leaving out branches and cr_ops for now because they're
1103         # under development
1104         """ TODO branches and cr_ops
1105         """
1106
1107         # now create mode and (overridden) src/dst widths
1108         # XXX TODO: sanity-check bc modes
1109         if is_bc:
1110             sv_mode = ((bc_svstep << SVP64MODE.MOD2_MSB) |
1111                        (bc_vlset << SVP64MODE.MOD2_LSB) |
1112                        (bc_snz << SVP64MODE.BC_SNZ))
1113             srcwid = (bc_vsb << 1) | bc_lru
1114             destwid = (bc_lru << 1) | bc_all
1115
1116         else:
1117
1118             ######################################
1119             # "normal" mode
1120             if sv_mode is None:
1121                 mode |= src_zero << SVP64MODE.SZ  # predicate zeroing
1122                 mode |= dst_zero << SVP64MODE.DZ  # predicate zeroing
1123                 if is_ldst:
1124                     # TODO: for now, LD/ST-indexed is ignored.
1125                     mode |= ldst_elstride << SVP64MODE.ELS_NORMAL  # el-strided
1126                 else:
1127                     # TODO, reduce and subvector mode
1128                     # 00  1   dz CRM  reduce mode (mapreduce), SUBVL=1
1129                     # 00  1   SVM CRM subvector reduce mode, SUBVL>1
1130                     pass
1131                 sv_mode = 0b00
1132
1133             ######################################
1134             # "mapreduce" modes
1135             elif sv_mode == 0b00:
1136                 mode |= (0b1 << SVP64MODE.REDUCE)  # sets mapreduce
1137                 assert dst_zero == 0, "dest-zero not allowed in mapreduce mode"
1138                 if reverse_gear:
1139                     mode |= (0b1 << SVP64MODE.RG)  # sets Reverse-gear mode
1140                 if mapreduce_crm:
1141                     mode |= (0b1 << SVP64MODE.CRM)  # sets CRM mode
1142                     assert rc_mode, "CRM only allowed when Rc=1"
1143                 # bit of weird encoding to jam zero-pred or SVM mode in.
1144                 # SVM mode can be enabled only when SUBVL=2/3/4 (vec2/3/4)
1145                 if subvl == 0:
1146                     mode |= dst_zero << SVP64MODE.DZ  # predicate zeroing
1147                 elif mapreduce_svm:
1148                     mode |= (0b1 << SVP64MODE.SVM)  # sets SVM mode
1149
1150             ######################################
1151             # "failfirst" modes
1152             elif sv_mode == 0b01:
1153                 assert src_zero == 0, "dest-zero not allowed in failfirst mode"
1154                 if failfirst == 'RC1':
1155                     mode |= (0b1 << SVP64MODE.RC1)  # sets RC1 mode
1156                     mode |= (dst_zero << SVP64MODE.DZ)  # predicate dst-zeroing
1157                     assert rc_mode == False, "ffirst RC1 only ok when Rc=0"
1158                 elif failfirst == '~RC1':
1159                     mode |= (0b1 << SVP64MODE.RC1)  # sets RC1 mode
1160                     mode |= (dst_zero << SVP64MODE.DZ)  # predicate dst-zeroing
1161                     mode |= (0b1 << SVP64MODE.INV)  # ... with inversion
1162                     assert rc_mode == False, "ffirst RC1 only ok when Rc=0"
1163                 else:
1164                     assert dst_zero == 0, "dst-zero not allowed in ffirst BO"
1165                     assert rc_mode, "ffirst BO only possible when Rc=1"
1166                     mode |= (failfirst << SVP64MODE.BO_LSB)  # set BO
1167
1168             ######################################
1169             # "saturation" modes
1170             elif sv_mode == 0b10:
1171                 mode |= src_zero << SVP64MODE.SZ  # predicate zeroing
1172                 mode |= dst_zero << SVP64MODE.DZ  # predicate zeroing
1173                 mode |= (saturation << SVP64MODE.N)  # signed/us saturation
1174
1175             ######################################
1176             # "predicate-result" modes.  err... code-duplication from ffirst
1177             elif sv_mode == 0b11:
1178                 assert src_zero == 0, "dest-zero not allowed in predresult mode"
1179                 if predresult == 'RC1':
1180                     mode |= (0b1 << SVP64MODE.RC1)  # sets RC1 mode
1181                     mode |= (dst_zero << SVP64MODE.DZ)  # predicate dst-zeroing
1182                     assert rc_mode == False, "pr-mode RC1 only ok when Rc=0"
1183                 elif predresult == '~RC1':
1184                     mode |= (0b1 << SVP64MODE.RC1)  # sets RC1 mode
1185                     mode |= (dst_zero << SVP64MODE.DZ)  # predicate dst-zeroing
1186                     mode |= (0b1 << SVP64MODE.INV)  # ... with inversion
1187                     assert rc_mode == False, "pr-mode RC1 only ok when Rc=0"
1188                 else:
1189                     assert dst_zero == 0, "dst-zero not allowed in pr-mode BO"
1190                     assert rc_mode, "pr-mode BO only possible when Rc=1"
1191                     mode |= (predresult << SVP64MODE.BO_LSB)  # set BO
1192
1193         # whewww.... modes all done :)
1194         # now put into svp64_rm
1195         mode |= sv_mode
1196         # mode: bits 19-23
1197         svp64_rm.mode = mode
1198
1199         # put in predicate masks into svp64_rm
1200         if ptype == '2P':
1201             # source pred: bits 16-18
1202             svp64_rm.smask = smask
1203         # mask mode: bit 0
1204         svp64_rm.mmode = mmode
1205         # 1-pred: bits 1-3
1206         svp64_rm.mask = pmask
1207
1208         # and subvl: bits 8-9
1209         svp64_rm.subvl = subvl
1210
1211         # put in elwidths
1212         # srcwid: bits 6-7
1213         svp64_rm.ewsrc = srcwid
1214         # destwid: bits 4-5
1215         svp64_rm.elwidth = destwid
1216
1217         # nice debug printout. (and now for something completely different)
1218         # https://youtu.be/u0WOIwlXE9g?t=146
1219         svp64_rm_value = int(svp64_rm)
1220         log("svp64_rm", hex(svp64_rm_value), bin(svp64_rm_value))
1221         log("    mmode  0    :", bin(mmode))
1222         log("    pmask  1-3  :", bin(pmask))
1223         log("    dstwid 4-5  :", bin(destwid))
1224         log("    srcwid 6-7  :", bin(srcwid))
1225         log("    subvl  8-9  :", bin(subvl))
1226         log("    mode   19-23:", bin(mode))
1227         offs = 2 if etype == 'EXTRA2' else 3  # 2 or 3 bits
1228         for idx, sv_extra in extras.items():
1229             if idx is None:
1230                 continue
1231             if idx[0] == 'imm':
1232                 continue
1233             srcdest, idx, duplicate = idx
1234             start = (10+idx*offs)
1235             end = start + offs-1
1236             log("    extra%d %2d-%2d:" % (idx, start, end),
1237                 bin(sv_extra))
1238         if ptype == '2P':
1239             log("    smask  16-17:", bin(smask))
1240         log()
1241
1242         # update prefix PO and ID (aka PID)
1243         svp64_prefix.po = 0x1
1244         svp64_prefix.id = 0b11
1245
1246         # fiinally yield the svp64 prefix and the thingy.  v3.0b opcode
1247         rc = '.' if rc_mode else ''
1248         yield ".long 0x%08x" % int(svp64_prefix)
1249         log(v30b_op, v30b_newfields)
1250         # argh, sv.fmadds etc. need to be done manually
1251         if v30b_op == 'ffmadds':
1252             opcode = 59 << (32-6)    # bits 0..6 (MSB0)
1253             opcode |= int(v30b_newfields[0]) << (32-11)  # FRT
1254             opcode |= int(v30b_newfields[1]) << (32-16)  # FRA
1255             opcode |= int(v30b_newfields[2]) << (32-21)  # FRB
1256             opcode |= int(v30b_newfields[3]) << (32-26)  # FRC
1257             opcode |= 0b00101 << (32-31)   # bits 26-30
1258             if rc:
1259                 opcode |= 1  # Rc, bit 31.
1260             yield ".long 0x%x" % opcode
1261         # argh, sv.fdmadds need to be done manually
1262         elif v30b_op == 'fdmadds':
1263             opcode = 59 << (32-6)    # bits 0..6 (MSB0)
1264             opcode |= int(v30b_newfields[0]) << (32-11)  # FRT
1265             opcode |= int(v30b_newfields[1]) << (32-16)  # FRA
1266             opcode |= int(v30b_newfields[2]) << (32-21)  # FRB
1267             opcode |= int(v30b_newfields[3]) << (32-26)  # FRC
1268             opcode |= 0b01111 << (32-31)   # bits 26-30
1269             if rc:
1270                 opcode |= 1  # Rc, bit 31.
1271             yield ".long 0x%x" % opcode
1272         # argh, sv.ffadds etc. need to be done manually
1273         elif v30b_op == 'ffadds':
1274             opcode = 59 << (32-6)    # bits 0..6 (MSB0)
1275             opcode |= int(v30b_newfields[0]) << (32-11)  # FRT
1276             opcode |= int(v30b_newfields[1]) << (32-16)  # FRA
1277             opcode |= int(v30b_newfields[2]) << (32-21)  # FRB
1278             opcode |= 0b01101 << (32-31)   # bits 26-30
1279             if rc:
1280                 opcode |= 1  # Rc, bit 31.
1281             yield ".long 0x%x" % opcode
1282         # sigh have to do svstep here manually for now...
1283         elif v30b_op in ["svstep", "svstep."]:
1284             insn = 22 << (31-5)          # opcode 22, bits 0-5
1285             insn |= int(v30b_newfields[0]) << (31-10)  # RT       , bits 6-10
1286             insn |= int(v30b_newfields[1]) << (31-22)  # SVi      , bits 16-22
1287             insn |= int(v30b_newfields[2]) << (31-25)  # vf       , bit  25
1288             insn |= 0b10011 << (31-30)  # XO       , bits 26..30
1289             if opcode == 'svstep.':
1290                 insn |= 1 << (31-31)     # Rc=1     , bit 31
1291             log("svstep", bin(insn))
1292             yield ".long 0x%x" % insn
1293         # argh, sv.fcoss etc. need to be done manually
1294         elif v30b_op in ["fcoss", "fcoss."]:
1295             insn = 59 << (31-5)  # opcode 59, bits 0-5
1296             insn |= int(v30b_newfields[0]) << (31-10)  # RT       , bits 6-10
1297             insn |= int(v30b_newfields[1]) << (31-20)  # RB       , bits 16-20
1298             insn |= 0b1000101110 << (31-30)  # XO       , bits 21..30
1299             if opcode == 'fcoss.':
1300                 insn |= 1 << (31-31)     # Rc=1     , bit 31
1301             log("fcoss", bin(insn))
1302             yield ".long 0x%x" % insn
1303         else:
1304             if not v30b_op.endswith('.'):
1305                 v30b_op += rc
1306             yield "%s %s" % (v30b_op, ", ".join(v30b_newfields))
1307         log("new v3.0B fields", v30b_op, v30b_newfields)
1308
1309     def translate(self, lst):
1310         for insn in lst:
1311             yield from self.translate_one(insn)
1312
1313
1314 def macro_subst(macros, txt):
1315     again = True
1316     log("subst", txt, macros)
1317     while again:
1318         again = False
1319         for macro, value in macros.items():
1320             if macro == txt:
1321                 again = True
1322                 replaced = txt.replace(macro, value)
1323                 log("macro", txt, "replaced", replaced, macro, value)
1324                 txt = replaced
1325                 continue
1326             toreplace = '%s.s' % macro
1327             if toreplace == txt:
1328                 again = True
1329                 replaced = txt.replace(toreplace, "%s.s" % value)
1330                 log("macro", txt, "replaced", replaced, toreplace, value)
1331                 txt = replaced
1332                 continue
1333             toreplace = '%s.v' % macro
1334             if toreplace == txt:
1335                 again = True
1336                 replaced = txt.replace(toreplace, "%s.v" % value)
1337                 log("macro", txt, "replaced", replaced, toreplace, value)
1338                 txt = replaced
1339                 continue
1340             toreplace = '(%s)' % macro
1341             if toreplace in txt:
1342                 again = True
1343                 replaced = txt.replace(toreplace, '(%s)' % value)
1344                 log("macro", txt, "replaced", replaced, toreplace, value)
1345                 txt = replaced
1346                 continue
1347     log("    processed", txt)
1348     return txt
1349
1350
1351 def get_ws(line):
1352     # find whitespace
1353     ws = ''
1354     while line:
1355         if not line[0].isspace():
1356             break
1357         ws += line[0]
1358         line = line[1:]
1359     return ws, line
1360
1361
1362 def asm_process():
1363     # get an input file and an output file
1364     args = sys.argv[1:]
1365     if len(args) == 0:
1366         infile = sys.stdin
1367         outfile = sys.stdout
1368         # read the whole lot in advance in case of in-place
1369         lines = list(infile.readlines())
1370     elif len(args) != 2:
1371         print("pysvp64asm [infile | -] [outfile | -]", file=sys.stderr)
1372         exit(0)
1373     else:
1374         if args[0] == '--':
1375             infile = sys.stdin
1376         else:
1377             infile = open(args[0], "r")
1378         # read the whole lot in advance in case of in-place overwrite
1379         lines = list(infile.readlines())
1380
1381         if args[1] == '--':
1382             outfile = sys.stdout
1383         else:
1384             outfile = open(args[1], "w")
1385
1386     # read the line, look for custom insn, process it
1387     macros = {}  # macros which start ".set"
1388     isa = SVP64Asm([])
1389     for line in lines:
1390         op = line.split("#")[0].strip()
1391         # identify macros
1392         if op.startswith(".set"):
1393             macro = op[4:].split(",")
1394             (macro, value) = map(str.strip, macro)
1395             macros[macro] = value
1396         if not op.startswith('sv.') and not op.startswith(tuple(CUSTOM_INSNS)):
1397             outfile.write(line)
1398             continue
1399
1400         (ws, line) = get_ws(line)
1401         lst = isa.translate_one(op, macros)
1402         lst = '; '.join(lst)
1403         outfile.write("%s%s # %s\n" % (ws, lst, op))
1404
1405
1406 if __name__ == '__main__':
1407     lst = ['slw 3, 1, 4',
1408            'extsw 5, 3',
1409            'sv.extsw 5, 3',
1410            'sv.cmpi 5, 1, 3, 2',
1411            'sv.setb 5, 31',
1412            'sv.isel 64.v, 3, 2, 65.v',
1413            'sv.setb/dm=r3/sm=1<<r3 5, 31',
1414            'sv.setb/m=r3 5, 31',
1415            'sv.setb/vec2 5, 31',
1416            'sv.setb/sw=8/ew=16 5, 31',
1417            'sv.extsw./ff=eq 5, 31',
1418            'sv.extsw./satu/sz/dz/sm=r3/dm=r3 5, 31',
1419            'sv.extsw./pr=eq 5.v, 31',
1420            'sv.add. 5.v, 2.v, 1.v',
1421            'sv.add./m=r3 5.v, 2.v, 1.v',
1422            ]
1423     lst += [
1424         'sv.stw 5.v, 4(1.v)',
1425         'sv.ld 5.v, 4(1.v)',
1426         'setvl. 2, 3, 4, 0, 1, 1',
1427         'sv.setvl. 2, 3, 4, 0, 1, 1',
1428     ]
1429     lst = [
1430         "sv.stfsu 0.v, 16(4.v)",
1431     ]
1432     lst = [
1433         "sv.stfsu/els 0.v, 16(4)",
1434     ]
1435     lst = [
1436         'sv.add./mr 5.v, 2.v, 1.v',
1437     ]
1438     macros = {'win2': '50', 'win': '60'}
1439     lst = [
1440         'sv.addi win2.v, win.v, -1',
1441         'sv.add./mrr 5.v, 2.v, 1.v',
1442         #'sv.lhzsh 5.v, 11(9.v), 15',
1443         #'sv.lwzsh 5.v, 11(9.v), 15',
1444         'sv.ffmadds 6.v, 2.v, 4.v, 6.v',
1445     ]
1446     lst = [
1447         #'sv.fmadds 0.v, 8.v, 16.v, 4.v',
1448         #'sv.ffadds 0.v, 8.v, 4.v',
1449         'svremap 11, 0, 1, 2, 3, 2, 1',
1450         'svshape 8, 1, 1, 1, 0',
1451         'svshape 8, 1, 1, 1, 1',
1452     ]
1453     lst = [
1454         #'sv.lfssh 4.v, 11(8.v), 15',
1455         #'sv.lwzsh 4.v, 11(8.v), 15',
1456         #'sv.svstep. 2.v, 4, 0',
1457         #'sv.fcfids. 48.v, 64.v',
1458         'sv.fcoss. 80.v, 0.v',
1459         'sv.fcoss. 20.v, 0.v',
1460     ]
1461     lst = [
1462         'sv.bc/all 3,12,192',
1463         'sv.bclr/vsbi 3,81.v,192',
1464         'sv.ld 5.v, 4(1.v)',
1465         'sv.svstep. 2.v, 4, 0',
1466     ]
1467     lst = [
1468         'maxs 3,12,5',
1469         'maxs. 3,12,5',
1470         'avgadd 3,12,5',
1471         'absdu 3,12,5',
1472         'absds 3,12,5',
1473         'absdacu 3,12,5',
1474         'absdacs 3,12,5',
1475         'cprop 3,12,5',
1476         'svindex 0,0,1,0,0,0,0',
1477     ]
1478     lst = [
1479         'sv.svstep./m=r3 2.v, 4, 0',
1480         'ternlogi 0,0,0,0x5',
1481         'fmvis 5,65535',
1482         'fmvis 5,1',
1483         'fmvis 5,2',
1484         'fmvis 5,4',
1485         'fmvis 5,8',
1486         'fmvis 5,16',
1487         'fmvis 5,32',
1488         'fmvis 5,64',
1489         'fmvis 5,32768',
1490     ]
1491     lst = [
1492         'sv.andi. *80, *80, 1',
1493         'sv.ffmadds. 6.v, 2.v, 4.v, 6.v',  # incorrectly inserted 32-bit op
1494         'sv.ffmadds 6.v, 2.v, 4.v, 6.v',  # correctly converted to .long
1495     ]
1496     isa = SVP64Asm(lst, macros=macros)
1497     log("list:\n", "\n\t".join(list(isa)))
1498     # running svp64.py is designed to test hard-coded lists
1499     # (above) - which strictly speaking should all be unit tests.
1500     # if you need to actually do assembler translation at the
1501     # commandline use "pysvp64asm" - see setup.py
1502     # XXX NO. asm_process()