src/openpower/sv/trans/svp64.py

   1 # SPDX-License-Identifier: LGPLv3+
   2 # Copyright (C) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
   3 # Funded by NLnet http://nlnet.nl
   4
   5 """SVP64 OpenPOWER v3.0B assembly translator
   6
   7 This class takes raw svp64 assembly mnemonics (aliases excluded) and creates
   8 an EXT001-encoded "svp64 prefix" (as a .long) followed by a v3.0B opcode.
   9
  10 It is very simple and straightforward, the only weirdness being the
  11 extraction of the register information and conversion to v3.0B numbering.
  12
  13 Encoding format of svp64: https://libre-soc.org/openpower/sv/svp64/
  14 Encoding format of arithmetic: https://libre-soc.org/openpower/sv/normal/
  15 Encoding format of LDST: https://libre-soc.org/openpower/sv/ldst/
  16 **TODO format of branches: https://libre-soc.org/openpower/sv/branches/**
  17 **TODO format of CRs: https://libre-soc.org/openpower/sv/cr_ops/**
  18 Bugtracker: https://bugs.libre-soc.org/show_bug.cgi?id=578
  19 """
  20
  21 import functools
  22 import os
  23 import sys
  24 from collections import OrderedDict
  25 import inspect
  26
  27 from openpower.decoder.pseudo.pagereader import ISA
  28 from openpower.decoder.power_svp64 import SVP64RM, get_regtype, decode_extra
  29 from openpower.decoder.selectable_int import SelectableInt
  30 from openpower.consts import SVP64MODE
  31 from openpower.decoder.power_insn import SVP64Instruction
  32 from openpower.decoder.power_insn import Database
  33 from openpower.decoder.power_enums import find_wiki_dir
  34
  35 # for debug logging
  36 from openpower.util import log
  37
  38
  39 def instruction(*fields):
  40     def instruction(insn, desc):
  41         (value, start, end) = desc
  42         bits = ((1,) * ((end + 1) - start))
  43         mask = 0
  44         for bit in bits:
  45             mask = ((mask << 1) | bit)
  46         return (insn | ((value & mask) << (31 - end)))
  47
  48     return functools.reduce(instruction, fields, 0)
  49
  50
  51 CUSTOM_INSNS = {}
  52
  53
  54 def _insn(name, **kwargs):
  55     return name, kwargs
  56
  57
  58 def _custom_insns(*insns):
  59     """ a decorator that adds the function to `CUSTOM_INSNS` """
  60
  61     def decorator(fn):
  62         FIELDS_ARG = object()
  63         if len(insns) == 0:
  64             insns_ = (fn.__name__, {}),
  65         else:
  66             insns_ = insns
  67         for name, kwargs in insns_:
  68             if not isinstance(name, str):
  69                 raise TypeError("instruction name must be a str: {name!r}")
  70             if name in CUSTOM_INSNS:
  71                 raise ValueError(f"duplicate instruction mnemonic: {name!r}")
  72             # use getcallargs to check that arguments work:
  73             inspect.getcallargs(fn, FIELDS_ARG, **kwargs)
  74             CUSTOM_INSNS[name] = functools.partial(fn, **kwargs)
  75         return fn
  76     return decorator
  77
  78
  79 @_custom_insns(
  80     _insn("setvl", Rc=0),
  81     _insn("setvl.", Rc=1),
  82 )
  83 def setvl(fields, Rc):
  84     """
  85     setvl is a *32-bit-only* instruction. It controls SVSTATE.
  86     It is *not* a 64-bit-prefixed Vector instruction (no sv.setvl, yet),
  87     it is a Vector *control* instruction.
  88
  89     * setvl RT,RA,SVi,vf,vs,ms
  90
  91     1.6.28 SVL-FORM - from fields.txt
  92     |0     |6    |11    |16   |23 |24 |25 |26    |31 |
  93     | PO   |  RT |   RA | SVi |ms |vs |vf |   XO |Rc |
  94     """
  95     PO = 22
  96     XO = 0b11011
  97     # ARRRGH these are in a non-obvious order in openpower/isa/simplev.mdwn
  98     # compared to the SVL-Form above. sigh
  99     # setvl RT,RA,SVi,vf,vs,ms
 100     (RT, RA, SVi, vf, vs, ms) = fields
 101     SVi -= 1
 102     return instruction(
 103         (PO, 0, 5),
 104         (RT, 6, 10),
 105         (RA, 11, 15),
 106         (SVi, 16, 22),
 107         (ms, 23, 23),
 108         (vs, 24, 24),
 109         (vf, 25, 25),
 110         (XO, 26, 30),
 111         (Rc, 31, 31),
 112     )
 113
 114
 115 @_custom_insns(
 116     _insn("svstep", Rc=0),
 117     _insn("svstep.", Rc=1),
 118 )
 119 def svstep(fields, Rc):
 120     """
 121     svstep is a 32-bit instruction. It updates SVSTATE.
 122     It *can* be SVP64-prefixed, to indicate that its registers
 123     are Vectorised.
 124
 125     * svstep RT,SVi,vf
 126
 127     # 1.6.28 SVL-FORM - from fields.txt
 128     # |0     |6    |11    |16   |23 |24 |25 |26    |31 |
 129     # | PO   |  RT | /    | SVi |/  |/  |vf |   XO |Rc |
 130
 131     """
 132     PO = 22
 133     XO = 0b10011
 134     (RT, SVi, vf) = fields
 135     SVi -= 1
 136     return instruction(
 137         (PO, 0, 5),
 138         (RT, 6, 10),
 139         (0, 11, 15),
 140         (SVi, 16, 22),
 141         (0, 23, 23),
 142         (0, 24, 24),
 143         (vf, 25, 25),
 144         (XO, 26, 30),
 145         (Rc, 31, 31),
 146     )
 147
 148
 149 @_custom_insns()
 150 def svshape(fields):
 151     """
 152     svshape is a *32-bit-only* instruction. It updates SVSHAPE and SVSTATE.
 153     It is *not* a 64-bit-prefixed Vector instruction (no sv.svshape, yet),
 154     it is a Vector *control* instruction.
 155
 156     https://libre-soc.org/openpower/sv/remap/#svshape
 157
 158     * svshape SVxd,SVyd,SVzd,SVrm,vf
 159
 160     # 1.6.33 SVM-FORM from fields.txt
 161     # |0     |6        |11      |16    |21    |25 |26    |31  |
 162     # | PO   |  SVxd   |   SVyd | SVzd | SVrm |vf |   XO      |
 163
 164     note that SVrm is not permitted to be 0b0111, 0b1000 or 0b1001.
 165     0b0111 is reserved and 0b100- is for svshape2
 166
 167     """
 168     PO = 22
 169     XO = 0b011001
 170     (SVxd, SVyd, SVzd, SVrm, vf) = fields
 171     SVxd -= 1
 172     SVyd -= 1
 173     SVzd -= 1
 174
 175     # check SVrm for reserved (and svshape2) values
 176     assert SVrm not in [0b0111, 0b1000, 0b1001], \
 177             "svshape reserved SVrm value %s" % bin(SVrm)
 178
 179     return instruction(
 180         (PO, 0, 5),
 181         (SVxd, 6, 10),
 182         (SVyd, 11, 15),
 183         (SVzd, 16, 20),
 184         (SVrm, 21, 24),
 185         (vf, 25, 25),
 186         (XO, 26, 31),
 187     )
 188
 189
 190 @_custom_insns()
 191 def svshape2(fields):
 192     """
 193     svshape2 is a *32-bit-only* instruction. It updates SVSHAPE and SVSTATE.
 194     It is *not* a 64-bit-prefixed Vector instruction (no sv.svshape2, yet),
 195     it is a Vector *control* instruction, and is a sort-of hybrid of
 196     svshape and svindex, with the key important feature being the "offset".
 197
 198     https://libre-soc.org/openpower/sv/remap/discussion
 199
 200     * svshape2 offs,yx,rmm,SVd,sk,mm
 201
 202     # 1.6.35.1 SVM2-FORM from fields.txt
 203     # |0     |6     |10|11      |16    |21 |24|25 |26    |31  |
 204     # | PO   | offs |yx|   rmm  | SVd  |XO |mm|sk |   XO      |
 205
 206     note that this fits into the space of svshape and that XO is
 207     split across 2 areas.
 208
 209     """
 210     PO = 22
 211     XO = 0b011001
 212     XO2 = 0b100 # not really XO2 but hey
 213     (offs, yx, rmm, SVd, sk, mm) = fields
 214     SVd -= 1 # offset by one
 215
 216     return instruction(
 217         (PO, 0, 5),
 218         (offs, 6, 9),  # offset (the whole point of adding svshape2)
 219         (yx, 10, 10),  # like svindex
 220         (rmm, 11, 15), # ditto svindex
 221         (SVd, 16, 20), # ditto svindex
 222         (XO2, 21, 23), # actually XO split across 2 places...
 223         (mm, 24, 24),  # ditto svindex
 224         (sk, 25, 25),  # ditto svindex
 225         (XO, 26, 31),
 226     )
 227
 228
 229 @_custom_insns()
 230 def svindex(fields):
 231     """
 232     svindex is a *32-bit-only* instruction. It is a convenience
 233     instruction that reduces instruction count for Indexed REMAP
 234     Mode.
 235     It is *not* a 64-bit-prefixed Vector instruction (no sv.svindex, yet),
 236     it is a Vector *control* instruction.
 237
 238     1.6.28 SVI-FORM
 239       |0     |6    |11    |16   |21 |23|24|25|26    31|
 240       | PO   |  SVG|rmm   | SVd |ew |yx|mm|sk|   XO   |
 241     """
 242     # note that the dimension field one subtracted
 243     PO = 22
 244     XO = 0b101001
 245     (SVG, rmm, SVd, ew, yx, mm, sk) = fields
 246     SVd -= 1
 247     return instruction(
 248         (PO, 0, 5),
 249         (SVG, 6, 10),
 250         (rmm, 11, 15),
 251         (SVd, 16, 20),
 252         (ew, 21, 22),
 253         (yx, 23, 23),
 254         (mm, 24, 24),
 255         (sk, 25, 25),
 256         (XO, 26, 31),
 257     )
 258
 259
 260 @_custom_insns()
 261 def svremap(fields):
 262     """
 263     this is a *32-bit-only* instruction. It updates the SVSHAPE SPR
 264     it is *not* a 64-bit-prefixed Vector instruction (no sv.svremap),
 265     it is a Vector *control* instruction.
 266
 267     * svremap SVme,mi0,mi1,mi2,mo0,mo1,pst
 268
 269     # 1.6.34 SVRM-FORM
 270        |0     |6     |11  |13   |15   |17   |19   |21  |22   |26     |31 |
 271        | PO   | SVme |mi0 | mi1 | mi2 | mo0 | mo1 |pst |///  | XO        |
 272
 273     """
 274     PO = 22
 275     XO = 0b111001
 276     (SVme, mi0, mi1, mi2, mo0, mo1, pst) = fields
 277     return instruction(
 278         (PO, 0, 5),
 279         (SVme, 6, 10),
 280         (mi0, 11, 12),
 281         (mi1, 13, 14),
 282         (mi2, 15, 16),
 283         (mo0, 17, 18),
 284         (mo1, 19, 20),
 285         (pst, 21, 21),
 286         (0, 22, 25),
 287         (XO, 26, 31),
 288     )
 289
 290
 291 # ok from here-on down these are added as 32-bit instructions
 292 # and are here only because binutils (at present) doesn't have
 293 # them (that's being fixed!)
 294 # they can - if implementations then choose - be Vectorised
 295 # because they are general-purpose scalar instructions
 296 @_custom_insns()
 297 def bmask(fields):
 298     """
 299     1.6.2.2 BM2-FORM
 300     |0     |6    |11    |16    |21   |26 |27    31|
 301     | PO   |  RT |   RA |   RB |bm   |L  |   XO   |
 302     """
 303     PO = 22
 304     XO = 0b010001
 305     (RT, RA, RB, bm, L) = fields
 306     return instruction(
 307         (PO, 0, 5),
 308         (RT, 6, 10),
 309         (RA, 11, 15),
 310         (RB, 16, 20),
 311         (bm, 21, 25),
 312         (L, 26, 26),
 313         (XO, 27, 31),
 314     )
 315
 316
 317 @_custom_insns(
 318     _insn("fsins", Rc=0),
 319     _insn("fsins.", Rc=1),
 320 )
 321 def fsins(fields, Rc):
 322     # XXX WARNING THESE ARE NOT APPROVED BY OPF ISA WG
 323     # however we are out of space with opcode 22
 324     # 1.6.7 X-FORM
 325     # |0     |6 |7|8|9  |10  |11|12|13  |15|16|17     |20|21    |31  |
 326     # | PO   |   FRT         |     ///     |   FRB       |   XO |Rc  |
 327     PO = 59
 328     XO = 0b1000001110
 329     (FRT, FRB) = fields
 330     return instruction(
 331         (PO, 0, 5),
 332         (FRT, 6, 10),
 333         (0, 11, 15),
 334         (FRB, 16, 20),
 335         (XO, 21, 30),
 336         (Rc, 31, 31),
 337     )
 338
 339
 340 @_custom_insns(
 341     _insn("fcoss", Rc=0),
 342     _insn("fcoss.", Rc=1),
 343 )
 344 def fcoss(fields, Rc):
 345     # XXX WARNING THESE ARE NOT APPROVED BY OPF ISA WG
 346     # however we are out of space with opcode 22
 347     # 1.6.7 X-FORM
 348     # |0     |6 |7|8|9  |10  |11|12|13  |15|16|17     |20|21    |31  |
 349     # | PO   |   FRT         |     ///     |   FRB       |   XO |Rc  |
 350     PO = 59
 351     XO = 0b1000101110
 352     (FRT, FRB) = fields
 353     return instruction(
 354         (PO, 0, 5),
 355         (FRT, 6, 10),
 356         (0, 11, 15),
 357         (FRB, 16, 20),
 358         (XO, 21, 30),
 359         (Rc, 31, 31),
 360     )
 361
 362
 363 @_custom_insns(
 364     _insn("ternlogi", Rc=0),
 365     _insn("ternlogi.", Rc=1),
 366 )
 367 def ternlogi(fields, Rc):
 368     # XXX WARNING THESE ARE NOT APPROVED BY OPF ISA WG
 369     # however we are out of space with opcode 22
 370     # 1.6.34 TLI-FORM
 371     #   |0   |6   |11   |16   |21   |29  |31 |
 372     #   | PO | RT |  RA |  RB | TLI | XO |Rc |
 373     PO = 5
 374     XO = 0
 375     (RT, RA, RB, TLI) = fields
 376     return instruction(
 377         (PO, 0, 5),
 378         (RT, 6, 10),
 379         (RA, 11, 15),
 380         (RB, 16, 20),
 381         (TLI, 21, 28),
 382         (XO, 29, 30),
 383         (Rc, 31, 31),
 384     )
 385
 386
 387 @_custom_insns(
 388     _insn("grev", Rc=0, imm=0, word=0),
 389     _insn("grevw", Rc=0, imm=0, word=1),
 390     _insn("grevi", Rc=0, imm=1, word=0),
 391     _insn("grevwi", Rc=0, imm=1, word=1),
 392     _insn("grev.", Rc=1, imm=0, word=0),
 393     _insn("grevw.", Rc=1, imm=0, word=1),
 394     _insn("grevi.", Rc=1, imm=1, word=0),
 395     _insn("grevwi.", Rc=1, imm=1, word=1),
 396 )
 397 def grev(fields, Rc, imm, word):
 398     # XXX WARNING THESE ARE NOT APPROVED BY OPF ISA WG
 399     # however we are out of space with opcode 22
 400     insn = PO = 5
 401     # _ matches fields in table at:
 402     # https://libre-soc.org/openpower/sv/bitmanip/
 403     XO = 0b1_0010_110
 404     if word:
 405         XO |= 0b100_000
 406     if imm:
 407         XO |= 0b1000_000
 408     (RT, RA, XBI) = fields
 409     insn = (insn << 5) | RT
 410     insn = (insn << 5) | RA
 411     if imm and not word:
 412         assert 0 <= XBI < 64
 413         insn = (insn << 6) | XBI
 414         insn = (insn << 9) | XO
 415     else:
 416         assert 0 <= XBI < 32
 417         insn = (insn << 5) | XBI
 418         insn = (insn << 10) | XO
 419     insn = (insn << 1) | Rc
 420     return insn
 421
 422
 423 @_custom_insns(
 424     _insn("maxs", XO=0b0111001110, Rc=0),
 425     _insn("maxs.", XO=0b0111001110, Rc=1),
 426     _insn("maxu", XO=0b0011001110, Rc=0),
 427     _insn("maxu.", XO=0b0011001110, Rc=1),
 428     _insn("minu", XO=0b0001001110, Rc=0),
 429     _insn("minu.", XO=0b0001001110, Rc=1),
 430     _insn("mins", XO=0b0101001110, Rc=0),
 431     _insn("mins.", XO=0b0101001110, Rc=1),
 432     _insn("absdu", XO=0b1011110110, Rc=0),
 433     _insn("absdu.", XO=0b1011110110, Rc=1),
 434     _insn("absds", XO=0b1001110110, Rc=0),
 435     _insn("absds.", XO=0b1001110110, Rc=1),
 436     _insn("avgadd", XO=0b1101001110, Rc=0),
 437     _insn("avgadd.", XO=0b1101001110, Rc=1),
 438     _insn("absdacu", XO=0b1111110110, Rc=0),
 439     _insn("absdacu.", XO=0b1111110110, Rc=1),
 440     _insn("absdacs", XO=0b0111110110, Rc=0),
 441     _insn("absdacs.", XO=0b0111110110, Rc=1),
 442     _insn("cprop", XO=0b0110001110, Rc=0),
 443     _insn("cprop.", XO=0b0110001110, Rc=1),
 444 )
 445 def av(fields, XO, Rc):
 446     # 1.6.7 X-FORM
 447     #   |0     |6 |7|8|9  |10  |11|12|13  |15|16|17     |20|21    |31  |
 448     #   | PO   |       RT      |    RA       |    RB       |   XO |Rc  |
 449     PO = 22
 450     (RT, RA, RB) = fields
 451     return instruction(
 452         (PO, 0, 5),
 453         (RT, 6, 10),
 454         (RA, 11, 15),
 455         (RB, 16, 20),
 456         (XO, 21, 30),
 457         (Rc, 31, 31),
 458     )
 459
 460
 461 @_custom_insns()
 462 def fmvis(fields):
 463     # XXX WARNING THESE ARE NOT APPROVED BY OPF ISA WG
 464     # V3.0B 1.6.6 DX-FORM
 465     # |0     |6 |7|8|9  |10  |11|12|13  |15|16|17     |26|27    |31  |
 466     # | PO   |   FRS         |     d1      |      d0  |      XO |d2  |
 467     PO = 22
 468     XO = 0b00011
 469     (FRS, imm) = fields
 470     # first split imm into d1, d0 and d2. sigh
 471     d2 = (imm & 1)  # LSB (0)
 472     d1 = (imm >> 1) & 0b11111  # bits 1-5
 473     d0 = (imm >> 6)  # MSBs 6-15
 474     return instruction(
 475         (PO, 0, 5),
 476         (FRS, 6, 10),
 477         (d1,  11, 15),
 478         (d0,  16, 25),
 479         (XO, 26, 30),
 480         (d2, 31, 31),
 481     )
 482
 483
 484 @_custom_insns()
 485 def fishmv(fields):
 486     # XXX WARNING THESE ARE NOT APPROVED BY OPF ISA WG
 487     # V3.0B 1.6.6 DX-FORM
 488     # |0     |6 |7|8|9  |10  |11|12|13  |15|16|17     |26|27   |31  |
 489     # | PO   |   FRS         |     d1      |      d0  |     XO |d2  |
 490     PO = 22
 491     XO = 0b01011
 492     (FRS, imm) = fields
 493     # first split imm into d1, d0 and d2. sigh
 494     d2 = (imm & 1)  # LSB (0)
 495     d1 = (imm >> 1) & 0b11111  # bits 1-5
 496     d0 = (imm >> 6)  # MSBs 6-15
 497     return instruction(
 498         (PO, 0, 5),
 499         (FRS, 6, 10),
 500         (d1,  11, 15),
 501         (d0,  16, 25),
 502         (XO, 26, 30),
 503         (d2, 31, 31),
 504     )
 505
 506
 507 # decode GPR into sv extra
 508 def get_extra_gpr(etype, regmode, field):
 509     if regmode == 'scalar':
 510         # cut into 2-bits 5-bits SS FFFFF
 511         sv_extra = field >> 5
 512         field = field & 0b11111
 513     else:
 514         # cut into 5-bits 2-bits FFFFF SS
 515         sv_extra = field & 0b11
 516         field = field >> 2
 517     return sv_extra, field
 518
 519
 520 # decode 3-bit CR into sv extra
 521 def get_extra_cr_3bit(etype, regmode, field):
 522     if regmode == 'scalar':
 523         # cut into 2-bits 3-bits SS FFF
 524         sv_extra = field >> 3
 525         field = field & 0b111
 526     else:
 527         # cut into 3-bits 4-bits FFF SSSS but will cut 2 zeros off later
 528         sv_extra = field & 0b1111
 529         field = field >> 4
 530     return sv_extra, field
 531
 532
 533 # decodes SUBVL
 534 def decode_subvl(encoding):
 535     pmap = {'2': 0b01, '3': 0b10, '4': 0b11}
 536     assert encoding in pmap, \
 537         "encoding %s for SUBVL not recognised" % encoding
 538     return pmap[encoding]
 539
 540
 541 # decodes elwidth
 542 def decode_elwidth(encoding):
 543     pmap = {'8': 0b11, '16': 0b10, '32': 0b01}
 544     assert encoding in pmap, \
 545         "encoding %s for elwidth not recognised" % encoding
 546     return pmap[encoding]
 547
 548
 549 # decodes predicate register encoding
 550 def decode_predicate(encoding):
 551     pmap = {  # integer
 552         '1<<r3': (0, 0b001),
 553         'r3': (0, 0b010),
 554         '~r3': (0, 0b011),
 555         'r10': (0, 0b100),
 556         '~r10': (0, 0b101),
 557         'r30': (0, 0b110),
 558         '~r30': (0, 0b111),
 559         # CR
 560         'lt': (1, 0b000),
 561         'nl': (1, 0b001), 'ge': (1, 0b001),  # same value
 562         'gt': (1, 0b010),
 563         'ng': (1, 0b011), 'le': (1, 0b011),  # same value
 564         'eq': (1, 0b100),
 565         'ne': (1, 0b101),
 566         'so': (1, 0b110), 'un': (1, 0b110),  # same value
 567         'ns': (1, 0b111), 'nu': (1, 0b111),  # same value
 568     }
 569     assert encoding in pmap, \
 570         "encoding %s for predicate not recognised" % encoding
 571     return pmap[encoding]
 572
 573
 574 # decodes "Mode" in similar way to BO field (supposed to, anyway)
 575 def decode_bo(encoding):
 576     pmap = {  # TODO: double-check that these are the same as Branch BO
 577         'lt': 0b000,
 578         'nl': 0b001, 'ge': 0b001,  # same value
 579         'gt': 0b010,
 580         'ng': 0b011, 'le': 0b011,  # same value
 581         'eq': 0b100,
 582         'ne': 0b101,
 583         'so': 0b110, 'un': 0b110,  # same value
 584         'ns': 0b111, 'nu': 0b111,  # same value
 585     }
 586     assert encoding in pmap, \
 587         "encoding %s for BO Mode not recognised" % encoding
 588     return pmap[encoding]
 589
 590
 591 # partial-decode fail-first mode
 592 def decode_ffirst(encoding):
 593     if encoding in ['RC1', '~RC1']:
 594         return encoding
 595     return decode_bo(encoding)
 596
 597
 598 def decode_reg(field, macros=None):
 599     if macros is None:
 600         macros = {}
 601     # decode the field number. "5.v" or "3.s" or "9"
 602     # and now also "*0", and "*%0".  note: *NOT* to add "*%rNNN" etc.
 603     # https://bugs.libre-soc.org/show_bug.cgi?id=884#c0
 604     if field.startswith(("*%", "*")):
 605         if field.startswith("*%"):
 606             field = field[2:]
 607         else:
 608             field = field[1:]
 609         while field in macros:
 610             field = macros[field]
 611         return int(field), "vector"  # actual register number
 612
 613     # try old convention (to be retired)
 614     field = field.split(".")
 615     regmode = 'scalar'  # default
 616     if len(field) == 2:
 617         if field[1] == 's':
 618             regmode = 'scalar'
 619         elif field[1] == 'v':
 620             regmode = 'vector'
 621     field = int(field[0])  # actual register number
 622     return field, regmode
 623
 624
 625 def decode_imm(field):
 626     ldst_imm = "(" in field and field[-1] == ')'
 627     if ldst_imm:
 628         return field[:-1].split("(")
 629     else:
 630         return None, field
 631
 632
 633 def crf_extra(etype, regmode, field, extras):
 634     """takes a CR Field number (CR0-CR127), splits into EXTRA2/3 and v3.0
 635     the scalar/vector mode (crNN.v or crNN.s) changes both the format
 636     of the EXTRA2/3 encoding as well as what range of registers is possible.
 637     this function can be used for both BF/BFA and BA/BB/BT by first removing
 638     the bottom 2 bits of BA/BB/BT then re-instating them after encoding.
 639     see https://libre-soc.org/openpower/sv/svp64/appendix/#cr_extra
 640     for specification
 641     """
 642     sv_extra, field = get_extra_cr_3bit(etype, regmode, field)
 643     # now sanity-check (and shrink afterwards)
 644     if etype == 'EXTRA2':
 645         # 3-bit CR Field (BF, BFA) EXTRA2 encoding
 646         if regmode == 'scalar':
 647             # range is CR0-CR15 in increments of 1
 648             assert (sv_extra >> 1) == 0, \
 649                 "scalar CR %s cannot fit into EXTRA2 %s" % \
 650                 (rname, str(extras[extra_idx]))
 651             # all good: encode as scalar
 652             sv_extra = sv_extra & 0b01
 653         else:  # vector
 654             # range is CR0-CR127 in increments of 16
 655             assert sv_extra & 0b111 == 0, \
 656                 "vector CR %s cannot fit into EXTRA2 %s" % \
 657                 (rname, str(extras[extra_idx]))
 658             # all good: encode as vector (bit 2 set)
 659             sv_extra = 0b10 | (sv_extra >> 3)
 660     else:
 661         # 3-bit CR Field (BF, BFA) EXTRA3 encoding
 662         if regmode == 'scalar':
 663             # range is CR0-CR31 in increments of 1
 664             assert (sv_extra >> 2) == 0, \
 665                 "scalar CR %s cannot fit into EXTRA3 %s" % \
 666                 (rname, str(extras[extra_idx]))
 667             # all good: encode as scalar
 668             sv_extra = sv_extra & 0b11
 669         else:  # vector
 670             # range is CR0-CR127 in increments of 8
 671             assert sv_extra & 0b11 == 0, \
 672                 "vector CR %s cannot fit into EXTRA3 %s" % \
 673                 (rname, str(extras[extra_idx]))
 674             # all good: encode as vector (bit 3 set)
 675             sv_extra = 0b100 | (sv_extra >> 2)
 676     return sv_extra, field
 677
 678
 679 def to_number(field):
 680     if field.startswith("0x"):
 681         return eval(field)
 682     if field.startswith("0b"):
 683         return eval(field)
 684     return int(field)
 685
 686
 687 # decodes svp64 assembly listings and creates EXT001 svp64 prefixes
 688 class SVP64Asm:
 689     def __init__(self, lst, bigendian=False, macros=None):
 690         if macros is None:
 691             macros = {}
 692         self.macros = macros
 693         self.lst = lst
 694         self.trans = self.translate(lst)
 695         self.isa = ISA()  # reads the v3.0B pseudo-code markdown files
 696         self.svp64 = SVP64RM()  # reads the svp64 Remap entries for registers
 697         assert bigendian == False, "error, bigendian not supported yet"
 698
 699     def __iter__(self):
 700         yield from self.trans
 701
 702     def translate_one(self, insn, macros=None):
 703         if macros is None:
 704             macros = {}
 705         macros.update(self.macros)
 706         isa = self.isa
 707         svp64 = self.svp64
 708         insn_no_comments = insn.partition('#')[0]
 709         # find first space, to get opcode
 710         ls = insn_no_comments.split(' ')
 711         opcode = ls[0]
 712         # now find opcode fields
 713         fields = ''.join(ls[1:]).split(',')
 714         mfields = list(map(str.strip, fields))
 715         log("opcode, fields", ls, opcode, mfields)
 716         fields = []
 717         # macro substitution
 718         for field in mfields:
 719             fields.append(macro_subst(macros, field))
 720         log("opcode, fields substed", ls, opcode, fields)
 721
 722         # identify if it is a special instruction
 723         custom_insn_hook = CUSTOM_INSNS.get(opcode)
 724         if custom_insn_hook is not None:
 725             fields = tuple(map(to_number, fields))
 726             insn_num = custom_insn_hook(fields)
 727             log(opcode, bin(insn_num))
 728             yield ".long 0x%X # %s" % (insn_num, insn)
 729             return
 730
 731         # identify if is a svp64 mnemonic
 732         if not opcode.startswith('sv.'):
 733             yield insn  # unaltered
 734             return
 735         opcode = opcode[3:]  # strip leading "sv"
 736
 737         # start working on decoding the svp64 op: sv.basev30Bop/vec2/mode
 738         opmodes = opcode.split("/")  # split at "/"
 739         v30b_op_orig = opmodes.pop(0)    # first is the v3.0B
 740         # check instruction ends with dot
 741         rc_mode = v30b_op_orig.endswith('.')
 742         if rc_mode:
 743             v30b_op = v30b_op_orig[:-1]
 744         else:
 745             v30b_op = v30b_op_orig
 746
 747         # look up the 32-bit op (original, with "." if it has it)
 748         if v30b_op_orig in isa.instr:
 749             isa_instr = isa.instr[v30b_op_orig]
 750         else:
 751             raise Exception("opcode %s of '%s' not supported" %
 752                             (v30b_op_orig, insn))
 753
 754         # look up the svp64 op, first the original (with "." if it has it)
 755         if v30b_op_orig in svp64.instrs:
 756             rm = svp64.instrs[v30b_op_orig]  # one row of the svp64 RM CSV
 757         # then without the "." (if there was one)
 758         elif v30b_op in svp64.instrs:
 759             rm = svp64.instrs[v30b_op]  # one row of the svp64 RM CSV
 760         else:
 761             raise Exception(f"opcode {v30b_op_orig!r} of "
 762                             f"{insn!r} not an svp64 instruction")
 763
 764         # get regs info e.g. "RT,RA,RB"
 765         v30b_regs = isa_instr.regs[0]
 766         log("v3.0B op", v30b_op, "Rc=1" if rc_mode else '')
 767         log("v3.0B regs", opcode, v30b_regs)
 768         log("RM", rm)
 769
 770         # right.  the first thing to do is identify the ordering of
 771         # the registers, by name.  the EXTRA2/3 ordering is in
 772         # rm['0']..rm['3'] but those fields contain the names RA, BB
 773         # etc.  we have to read the pseudocode to understand which
 774         # reg is which in our instruction. sigh.
 775
 776         # first turn the svp64 rm into a "by name" dict, recording
 777         # which position in the RM EXTRA it goes into
 778         # also: record if the src or dest was a CR, for sanity-checking
 779         # (elwidth overrides on CRs are banned)
 780         decode = decode_extra(rm)
 781         dest_reg_cr, src_reg_cr, svp64_src, svp64_dest = decode
 782
 783         log("EXTRA field index, src", svp64_src)
 784         log("EXTRA field index, dest", svp64_dest)
 785
 786         # okaaay now we identify the field value (opcode N,N,N) with
 787         # the pseudo-code info (opcode RT, RA, RB)
 788         assert len(fields) == len(v30b_regs), \
 789             "length of fields %s must match insn `%s` fields %s" % \
 790             (str(v30b_regs), insn, str(fields))
 791         opregfields = zip(fields, v30b_regs)  # err that was easy
 792
 793         # now for each of those find its place in the EXTRA encoding
 794         # note there is the possibility (for LD/ST-with-update) of
 795         # RA occurring **TWICE**.  to avoid it getting added to the
 796         # v3.0B suffix twice, we spot it as a duplicate, here
 797         extras = OrderedDict()
 798         for idx, (field, regname) in enumerate(opregfields):
 799             imm, regname = decode_imm(regname)
 800             rtype = get_regtype(regname)
 801             log("    idx find", rtype, idx, field, regname, imm)
 802             if rtype is None:
 803                 # probably an immediate field, append it straight
 804                 extras[('imm', idx, False)] = (idx, field, None, None, None)
 805                 continue
 806             extra = svp64_src.get(regname, None)
 807             if extra is not None:
 808                 extra = ('s', extra, False)  # not a duplicate
 809                 extras[extra] = (idx, field, regname, rtype, imm)
 810                 log("    idx src", idx, extra, extras[extra])
 811             dextra = svp64_dest.get(regname, None)
 812             log("regname in", regname, dextra)
 813             if dextra is not None:
 814                 is_a_duplicate = extra is not None  # duplicate spotted
 815                 dextra = ('d', dextra, is_a_duplicate)
 816                 extras[dextra] = (idx, field, regname, rtype, imm)
 817                 log("    idx dst", idx, extra, extras[dextra])
 818
 819         # great! got the extra fields in their associated positions:
 820         # also we know the register type. now to create the EXTRA encodings
 821         etype = rm['Etype']  # Extra type: EXTRA3/EXTRA2
 822         ptype = rm['Ptype']  # Predication type: Twin / Single
 823         extra_bits = 0
 824         v30b_newfields = []
 825         for extra_idx, (idx, field, rname, rtype, iname) in extras.items():
 826             # is it a field we don't alter/examine?  if so just put it
 827             # into newfields
 828             if rtype is None:
 829                 v30b_newfields.append(field)
 830                 continue
 831
 832             # identify if this is a ld/st immediate(reg) thing
 833             ldst_imm = "(" in field and field[-1] == ')'
 834             if ldst_imm:
 835                 immed, field = field[:-1].split("(")
 836
 837             field, regmode = decode_reg(field, macros=macros)
 838             log("    ", extra_idx, rname, rtype,
 839                 regmode, iname, field, end=" ")
 840
 841             # see Mode field https://libre-soc.org/openpower/sv/svp64/
 842             # XXX TODO: the following is a bit of a laborious repeated
 843             # mess, which could (and should) easily be parameterised.
 844             # XXX also TODO: the LD/ST modes which are different
 845             # https://libre-soc.org/openpower/sv/ldst/
 846
 847             # rright.  SVP64 register numbering is from 0 to 127
 848             # for GPRs, FPRs *and* CR Fields, where for v3.0 the GPRs and RPFs
 849             # are 0-31 and CR Fields are only 0-7.  the SVP64 RM "Extra"
 850             # area is used to extend the numbering from the 32-bit
 851             # instruction, and also to record whether the register
 852             # is scalar or vector. on a per-operand basis.  this
 853             # results in a slightly finnicky encoding: here we go...
 854
 855             # encode SV-GPR and SV-FPR field into extra, v3.0field
 856             if rtype in ['GPR', 'FPR']:
 857                 sv_extra, field = get_extra_gpr(etype, regmode, field)
 858                 # now sanity-check. EXTRA3 is ok, EXTRA2 has limits
 859                 # (and shrink to a single bit if ok)
 860                 if etype == 'EXTRA2':
 861                     if regmode == 'scalar':
 862                         # range is r0-r63 in increments of 1
 863                         assert (sv_extra >> 1) == 0, \
 864                             "scalar GPR %s cannot fit into EXTRA2 %s" % \
 865                             (rname, str(extras[extra_idx]))
 866                         # all good: encode as scalar
 867                         sv_extra = sv_extra & 0b01
 868                     else:
 869                         # range is r0-r127 in increments of 2 (r0 r2 ... r126)
 870                         assert sv_extra & 0b01 == 0, \
 871                             "%s: vector field %s cannot fit " \
 872                             "into EXTRA2 %s" % \
 873                             (insn, rname, str(extras[extra_idx]))
 874                         # all good: encode as vector (bit 2 set)
 875                         sv_extra = 0b10 | (sv_extra >> 1)
 876                 elif regmode == 'vector':
 877                     # EXTRA3 vector bit needs marking
 878                     sv_extra |= 0b100
 879
 880             # encode SV-CR 3-bit field into extra, v3.0field.
 881             # 3-bit is for things like BF and BFA
 882             elif rtype == 'CR_3bit':
 883                 sv_extra, field = crf_extra(etype, regmode, field, extras)
 884
 885             # encode SV-CR 5-bit field into extra, v3.0field
 886             # 5-bit is for things like BA BB BC BT etc.
 887             # *sigh* this is the same as 3-bit except the 2 LSBs of the
 888             # 5-bit field are passed through unaltered.
 889             elif rtype == 'CR_5bit':
 890                 cr_subfield = field & 0b11  # record bottom 2 bits for later
 891                 field = field >> 2         # strip bottom 2 bits
 892                 # use the exact same 3-bit function for the top 3 bits
 893                 sv_extra, field = crf_extra(etype, regmode, field, extras)
 894                 # reconstruct the actual 5-bit CR field (preserving the
 895                 # bottom 2 bits, unaltered)
 896                 field = (field << 2) | cr_subfield
 897
 898             else:
 899                 raise Exception("no type match: %s" % rtype)
 900
 901             # capture the extra field info
 902             log("=>", "%5s" % bin(sv_extra), field)
 903             extras[extra_idx] = sv_extra
 904
 905             # append altered field value to v3.0b, differs for LDST
 906             # note that duplicates are skipped e.g. EXTRA2 contains
 907             # *BOTH* s:RA *AND* d:RA which happens on LD/ST-with-update
 908             srcdest, idx, duplicate = extra_idx
 909             if duplicate:  # skip adding to v3.0b fields, already added
 910                 continue
 911             if ldst_imm:
 912                 v30b_newfields.append(("%s(%s)" % (immed, str(field))))
 913             else:
 914                 v30b_newfields.append(str(field))
 915
 916         log("new v3.0B fields", v30b_op, v30b_newfields)
 917         log("extras", extras)
 918
 919         # rright. now we have all the info. start creating SVP64 instruction.
 920         db = Database(find_wiki_dir())
 921         svp64_insn = SVP64Instruction.pair(prefix=0, suffix=0)
 922         svp64_prefix = svp64_insn.prefix
 923         svp64_rm = svp64_insn.prefix.rm
 924
 925         # begin with EXTRA fields
 926         for idx, sv_extra in extras.items():
 927             log(idx)
 928             if idx is None:
 929                 continue
 930             if idx[0] == 'imm':
 931                 continue
 932             srcdest, idx, duplicate = idx
 933             if etype == 'EXTRA2':
 934                 svp64_rm.extra2[idx] = sv_extra
 935             else:
 936                 svp64_rm.extra3[idx] = sv_extra
 937
 938         # identify if the op is a LD/ST. the "blegh" way. copied
 939         # from power_enums.  TODO, split the list _insns down.
 940         is_ld = v30b_op in [
 941             "lbarx", "lbz", "lbzu", "lbzux", "lbzx",            # load byte
 942             "ld", "ldarx", "ldbrx", "ldu", "ldux", "ldx",       # load double
 943             "lfs", "lfsx", "lfsu", "lfsux",                     # FP load single
 944             "lfd", "lfdx", "lfdu", "lfdux", "lfiwzx", "lfiwax",  # FP load dbl
 945             "lha", "lharx", "lhau", "lhaux", "lhax",            # load half
 946             "lhbrx", "lhz", "lhzu", "lhzux", "lhzx",            # more load half
 947             "lwa", "lwarx", "lwaux", "lwax", "lwbrx",           # load word
 948             "lwz", "lwzcix", "lwzu", "lwzux", "lwzx",           # more load word
 949         ]
 950         is_st = v30b_op in [
 951             "stb", "stbcix", "stbcx", "stbu", "stbux", "stbx",
 952             "std", "stdbrx", "stdcx", "stdu", "stdux", "stdx",
 953             "stfs", "stfsx", "stfsu", "stfux",                  # FP store sgl
 954             "stfd", "stfdx", "stfdu", "stfdux", "stfiwx",       # FP store dbl
 955             "sth", "sthbrx", "sthcx", "sthu", "sthux", "sthx",
 956             "stw", "stwbrx", "stwcx", "stwu", "stwux", "stwx",
 957         ]
 958         # use this to determine if the SVP64 RM format is different.
 959         # see https://libre-soc.org/openpower/sv/ldst/
 960         is_ldst = is_ld or is_st
 961
 962         # branch-conditional detection
 963         is_bc = v30b_op in [
 964             "bc", "bclr",
 965         ]
 966
 967         # parts of svp64_rm
 968         mmode = 0  # bit 0
 969         pmask = 0  # bits 1-3
 970         destwid = 0  # bits 4-5
 971         srcwid = 0  # bits 6-7
 972         subvl = 0   # bits 8-9
 973         smask = 0  # bits 16-18 but only for twin-predication
 974         mode = 0  # bits 19-23
 975
 976         mask_m_specified = False
 977         has_pmask = False
 978         has_smask = False
 979
 980         saturation = None
 981         src_zero = 0
 982         dst_zero = 0
 983         sv_mode = None
 984
 985         parallel = False
 986         mapreduce = False
 987         reverse_gear = False
 988         mapreduce_crm = False
 989         mapreduce_svm = False
 990
 991         predresult = False
 992         failfirst = False
 993         ldst_elstride = 0
 994
 995         # branch-conditional bits
 996         bc_all = 0
 997         bc_lru = 0
 998         bc_brc = 0
 999         bc_svstep = 0
1000         bc_vsb = 0
1001         bc_vlset = 0
1002         bc_vli = 0
1003         bc_snz = 0
1004
1005         # ok let's start identifying opcode augmentation fields
1006         for encmode in opmodes:
1007             # predicate mask (src and dest)
1008             if encmode.startswith("m="):
1009                 pme = encmode
1010                 pmmode, pmask = decode_predicate(encmode[2:])
1011                 smmode, smask = pmmode, pmask
1012                 mmode = pmmode
1013                 mask_m_specified = True
1014             # predicate mask (dest)
1015             elif encmode.startswith("dm="):
1016                 pme = encmode
1017                 pmmode, pmask = decode_predicate(encmode[3:])
1018                 mmode = pmmode
1019                 has_pmask = True
1020             # predicate mask (src, twin-pred)
1021             elif encmode.startswith("sm="):
1022                 sme = encmode
1023                 smmode, smask = decode_predicate(encmode[3:])
1024                 mmode = smmode
1025                 has_smask = True
1026             # vec2/3/4
1027             elif encmode.startswith("vec"):
1028                 subvl = decode_subvl(encmode[3:])
1029             # elwidth
1030             elif encmode.startswith("ew="):
1031                 destwid = decode_elwidth(encmode[3:])
1032             elif encmode.startswith("sw="):
1033                 srcwid = decode_elwidth(encmode[3:])
1034             # element-strided LD/ST
1035             elif encmode == 'els':
1036                 ldst_elstride = 1
1037             # saturation
1038             elif encmode == 'sats':
1039                 assert sv_mode is None
1040                 saturation = 1
1041                 sv_mode = 0b10
1042             elif encmode == 'satu':
1043                 assert sv_mode is None
1044                 sv_mode = 0b10
1045                 saturation = 0
1046             # predicate zeroing
1047             elif encmode == 'sz':
1048                 src_zero = 1
1049             elif encmode == 'dz':
1050                 dst_zero = 1
1051             # failfirst
1052             elif encmode.startswith("ff="):
1053                 assert sv_mode is None
1054                 sv_mode = 0b01
1055                 failfirst = decode_ffirst(encmode[3:])
1056             # predicate-result, interestingly same as fail-first
1057             elif encmode.startswith("pr="):
1058                 assert sv_mode is None
1059                 sv_mode = 0b11
1060                 predresult = decode_ffirst(encmode[3:])
1061             # map-reduce mode, reverse-gear
1062             elif encmode == 'mrr':
1063                 assert sv_mode is None
1064                 sv_mode = 0b00
1065                 mapreduce = True
1066                 reverse_gear = True
1067             # map-reduce mode
1068             elif encmode == 'mr':
1069                 assert sv_mode is None
1070                 sv_mode = 0b00
1071                 mapreduce = True
1072             # parallel prefix mode
1073             elif encmode == 'pp':
1074                 assert sv_mode is None
1075                 sv_mode = 0b00
1076                 parallel = True
1077             elif encmode == 'crm':  # CR on map-reduce
1078                 assert sv_mode is None
1079                 sv_mode = 0b00
1080                 mapreduce_crm = True
1081             elif encmode == 'svm':  # sub-vector mode
1082                 mapreduce_svm = True
1083             elif is_bc:
1084                 if encmode == 'all':
1085                     bc_all = 1
1086                 elif encmode == 'st':  # svstep mode
1087                     bc_step = 1
1088                 elif encmode == 'sr':  # svstep BRc mode
1089                     bc_step = 1
1090                     bc_brc = 1
1091                 elif encmode == 'vs':  # VLSET mode
1092                     bc_vlset = 1
1093                 elif encmode == 'vsi':  # VLSET mode with VLI (VL inclusives)
1094                     bc_vlset = 1
1095                     bc_vli = 1
1096                 elif encmode == 'vsb':  # VLSET mode with VSb
1097                     bc_vlset = 1
1098                     bc_vsb = 1
1099                 elif encmode == 'vsbi':  # VLSET mode with VLI and VSb
1100                     bc_vlset = 1
1101                     bc_vli = 1
1102                     bc_vsb = 1
1103                 elif encmode == 'snz':  # sz (only) already set above
1104                     src_zero = 1
1105                     bc_snz = 1
1106                 elif encmode == 'lu':  # LR update mode
1107                     bc_lru = 1
1108                 else:
1109                     raise AssertionError("unknown encmode %s" % encmode)
1110             else:
1111                 raise AssertionError("unknown encmode %s" % encmode)
1112
1113         if ptype == '2P':
1114             # since m=xx takes precedence (overrides) sm=xx and dm=xx,
1115             # treat them as mutually exclusive
1116             if mask_m_specified:
1117                 assert not has_smask,\
1118                     "cannot have both source-mask and predicate mask"
1119                 assert not has_pmask,\
1120                     "cannot have both dest-mask and predicate mask"
1121             # since the default is INT predication (ALWAYS), if you
1122             # specify one CR mask, you must specify both, to avoid
1123             # mixing INT and CR reg types
1124             if has_pmask and pmmode == 1:
1125                 assert has_smask, \
1126                     "need explicit source-mask in CR twin predication"
1127             if has_smask and smmode == 1:
1128                 assert has_pmask, \
1129                     "need explicit dest-mask in CR twin predication"
1130             # sanity-check that 2Pred mask is same mode
1131             if has_pmask and has_smask:
1132                 assert smmode == pmmode, \
1133                     "predicate masks %s and %s must be same reg type" % \
1134                     (pme, sme)
1135
1136         # sanity-check that twin-predication mask only specified in 2P mode
1137         if ptype == '1P':
1138             assert not has_smask, \
1139                 "source-mask can only be specified on Twin-predicate ops"
1140             assert not has_pmask, \
1141                 "dest-mask can only be specified on Twin-predicate ops"
1142
1143         # construct the mode field, doing sanity-checking along the way
1144         if mapreduce_svm:
1145             assert sv_mode == 0b00, "sub-vector mode in mapreduce only"
1146             assert subvl != 0, "sub-vector mode not possible on SUBVL=1"
1147
1148         if src_zero:
1149             assert has_smask or mask_m_specified, \
1150                 "src zeroing requires a source predicate"
1151         if dst_zero:
1152             assert has_pmask or mask_m_specified, \
1153                 "dest zeroing requires a dest predicate"
1154
1155         # okaaay, so there are 4 different modes, here, which will be
1156         # partly-merged-in: is_ldst is merged in with "normal", but
1157         # is_bc is so different it's done separately.  likewise is_cr
1158         # (when it is done).  here are the maps:
1159
1160         # for "normal" arithmetic: https://libre-soc.org/openpower/sv/normal/
1161         """
1162             | 0-1 |  2  |  3   4  |  description              |
1163             | --- | --- |---------|-------------------------- |
1164             | 00  |   0 |  dz  sz | simple mode                      |
1165             | 00  |   1 | 0  RG   | scalar reduce mode (mapreduce), SUBVL=1 |
1166             | 00  |   1 | 1  /    | parallel reduce mode (mapreduce), SUBVL=1 |
1167             | 00  |   1 | SVM 0   | subvector reduce mode, SUBVL>1   |
1168             | 00  |   1 | SVM 1   | Pack/Unpack mode, SUBVL>1   |
1169             | 01  | inv | CR-bit  | Rc=1: ffirst CR sel              |
1170             | 01  | inv | VLi RC1 |  Rc=0: ffirst z/nonz |
1171             | 10  |   N | dz   sz |  sat mode: N=0/1 u/s, SUBVL=1 |
1172             | 10  |   N | zz   0  |  sat mode: N=0/1 u/s, SUBVL>1 |
1173             | 10  |   N | zz   1  |  Pack/Unpack sat mode: N=0/1 u/s, SUBVL>1 |
1174             | 11  | inv | CR-bit  |  Rc=1: pred-result CR sel |
1175             | 11  | inv | zz  RC1 |  Rc=0: pred-result z/nonz |
1176         """
1177
1178         # https://libre-soc.org/openpower/sv/ldst/
1179         # for LD/ST-immediate:
1180         """
1181             | 0-1 |  2  |  3   4  |  description               |
1182             | --- | --- |---------|--------------------------- |
1183             | 00  | 0   |  dz els | normal mode                |
1184             | 00  | 1   |  dz shf | shift mode                 |
1185             | 01  | inv | CR-bit  | Rc=1: ffirst CR sel        |
1186             | 01  | inv | els RC1 |  Rc=0: ffirst z/nonz       |
1187             | 10  |   N | dz  els |  sat mode: N=0/1 u/s       |
1188             | 11  | inv | CR-bit  |  Rc=1: pred-result CR sel  |
1189             | 11  | inv | els RC1 |  Rc=0: pred-result z/nonz  |
1190         """
1191
1192         # for LD/ST-indexed (RA+RB):
1193         """
1194             | 0-1 |  2  |  3   4  |  description              |
1195             | --- | --- |---------|-------------------------- |
1196             | 00  | SEA |  dz  sz | normal mode        |
1197             | 01  | SEA | dz sz  | Strided (scalar only source)   |
1198             | 10  |   N | dz   sz |  sat mode: N=0/1 u/s |
1199             | 11  | inv | CR-bit  |  Rc=1: pred-result CR sel |
1200             | 11  | inv | dz  RC1 |  Rc=0: pred-result z/nonz |
1201         """
1202
1203         # and leaving out branches and cr_ops for now because they're
1204         # under development
1205         """ TODO branches and cr_ops
1206         """
1207
1208         # now create mode and (overridden) src/dst widths
1209         # XXX TODO: sanity-check bc modes
1210         if is_bc:
1211             sv_mode = ((bc_svstep << SVP64MODE.MOD2_MSB) |
1212                        (bc_vlset << SVP64MODE.MOD2_LSB) |
1213                        (bc_snz << SVP64MODE.BC_SNZ))
1214             srcwid = (bc_vsb << 1) | bc_lru
1215             destwid = (bc_lru << 1) | bc_all
1216
1217         else:
1218
1219             ######################################
1220             # "normal" mode
1221             if sv_mode is None:
1222                 mode |= src_zero << SVP64MODE.SZ  # predicate zeroing
1223                 mode |= dst_zero << SVP64MODE.DZ  # predicate zeroing
1224                 if is_ldst:
1225                     # TODO: for now, LD/ST-indexed is ignored.
1226                     mode |= ldst_elstride << SVP64MODE.ELS_NORMAL  # el-strided
1227                 else:
1228                     # TODO, reduce and subvector mode
1229                     # 00  1   dz CRM  reduce mode (mapreduce), SUBVL=1
1230                     # 00  1   SVM CRM subvector reduce mode, SUBVL>1
1231                     pass
1232                 sv_mode = 0b00
1233
1234             ######################################
1235             # "mapreduce" modes
1236             elif sv_mode == 0b00:
1237                 if parallel:
1238                     mode |= (0b1 << SVP64MODE.PTREDUCE)  # sets parallel reduce
1239                     assert subvl == 0, "TODO sub-vector parallel reduce"
1240                 else:
1241                     mode |= (0b1 << SVP64MODE.REDUCE)  # sets mapreduce
1242                 assert dst_zero == 0, "dest-zero not allowed in mapreduce mode"
1243                 if reverse_gear:
1244                     mode |= (0b1 << SVP64MODE.RG)  # sets Reverse-gear mode
1245                 if mapreduce_crm:
1246                     mode |= (0b1 << SVP64MODE.CRM)  # sets CRM mode
1247                     assert rc_mode, "CRM only allowed when Rc=1"
1248                 # bit of weird encoding to jam zero-pred or SVM mode in.
1249                 # SVM mode can be enabled only when SUBVL=2/3/4 (vec2/3/4)
1250                 if subvl == 0:
1251                     mode |= dst_zero << SVP64MODE.DZ  # predicate zeroing
1252                 elif mapreduce_svm:
1253                     mode |= (0b1 << SVP64MODE.SVM)  # sets SVM mode
1254
1255             ######################################
1256             # "failfirst" modes
1257             elif sv_mode == 0b01:
1258                 assert src_zero == 0, "dest-zero not allowed in failfirst mode"
1259                 if failfirst == 'RC1':
1260                     mode |= (0b1 << SVP64MODE.RC1)  # sets RC1 mode
1261                     mode |= (dst_zero << SVP64MODE.DZ)  # predicate dst-zeroing
1262                     assert rc_mode == False, "ffirst RC1 only ok when Rc=0"
1263                 elif failfirst == '~RC1':
1264                     mode |= (0b1 << SVP64MODE.RC1)  # sets RC1 mode
1265                     mode |= (dst_zero << SVP64MODE.DZ)  # predicate dst-zeroing
1266                     mode |= (0b1 << SVP64MODE.INV)  # ... with inversion
1267                     assert rc_mode == False, "ffirst RC1 only ok when Rc=0"
1268                 else:
1269                     assert dst_zero == 0, "dst-zero not allowed in ffirst BO"
1270                     assert rc_mode, "ffirst BO only possible when Rc=1"
1271                     mode |= (failfirst << SVP64MODE.BO_LSB)  # set BO
1272
1273             ######################################
1274             # "saturation" modes
1275             elif sv_mode == 0b10:
1276                 mode |= src_zero << SVP64MODE.SZ  # predicate zeroing
1277                 mode |= dst_zero << SVP64MODE.DZ  # predicate zeroing
1278                 mode |= (saturation << SVP64MODE.N)  # signed/us saturation
1279
1280             ######################################
1281             # "predicate-result" modes.  err... code-duplication from ffirst
1282             elif sv_mode == 0b11:
1283                 assert src_zero == 0, "dest-zero not allowed in predresult mode"
1284                 if predresult == 'RC1':
1285                     mode |= (0b1 << SVP64MODE.RC1)  # sets RC1 mode
1286                     mode |= (dst_zero << SVP64MODE.DZ)  # predicate dst-zeroing
1287                     assert rc_mode == False, "pr-mode RC1 only ok when Rc=0"
1288                 elif predresult == '~RC1':
1289                     mode |= (0b1 << SVP64MODE.RC1)  # sets RC1 mode
1290                     mode |= (dst_zero << SVP64MODE.DZ)  # predicate dst-zeroing
1291                     mode |= (0b1 << SVP64MODE.INV)  # ... with inversion
1292                     assert rc_mode == False, "pr-mode RC1 only ok when Rc=0"
1293                 else:
1294                     assert dst_zero == 0, "dst-zero not allowed in pr-mode BO"
1295                     assert rc_mode, "pr-mode BO only possible when Rc=1"
1296                     mode |= (predresult << SVP64MODE.BO_LSB)  # set BO
1297
1298         # whewww.... modes all done :)
1299         # now put into svp64_rm
1300         mode |= sv_mode
1301         # mode: bits 19-23
1302         svp64_rm.mode = mode
1303
1304         # put in predicate masks into svp64_rm
1305         if ptype == '2P':
1306             # source pred: bits 16-18
1307             svp64_rm.smask = smask
1308         # mask mode: bit 0
1309         svp64_rm.mmode = mmode
1310         # 1-pred: bits 1-3
1311         svp64_rm.mask = pmask
1312
1313         # and subvl: bits 8-9
1314         svp64_rm.subvl = subvl
1315
1316         # put in elwidths
1317         # srcwid: bits 6-7
1318         svp64_rm.ewsrc = srcwid
1319         # destwid: bits 4-5
1320         svp64_rm.elwidth = destwid
1321
1322         # nice debug printout. (and now for something completely different)
1323         # https://youtu.be/u0WOIwlXE9g?t=146
1324         svp64_rm_value = int(svp64_rm)
1325         log("svp64_rm", hex(svp64_rm_value), bin(svp64_rm_value))
1326         log("    mmode  0    :", bin(mmode))
1327         log("    pmask  1-3  :", bin(pmask))
1328         log("    dstwid 4-5  :", bin(destwid))
1329         log("    srcwid 6-7  :", bin(srcwid))
1330         log("    subvl  8-9  :", bin(subvl))
1331         log("    mode   19-23:", bin(mode))
1332         offs = 2 if etype == 'EXTRA2' else 3  # 2 or 3 bits
1333         for idx, sv_extra in extras.items():
1334             if idx is None:
1335                 continue
1336             if idx[0] == 'imm':
1337                 continue
1338             srcdest, idx, duplicate = idx
1339             start = (10+idx*offs)
1340             end = start + offs-1
1341             log("    extra%d %2d-%2d:" % (idx, start, end),
1342                 bin(sv_extra))
1343         if ptype == '2P':
1344             log("    smask  16-17:", bin(smask))
1345         log()
1346
1347         # update prefix PO and ID (aka PID)
1348         svp64_prefix.po = 0x1
1349         svp64_prefix.id = 0b11
1350
1351         # fiinally yield the svp64 prefix and the thingy.  v3.0b opcode
1352         rc = '.' if rc_mode else ''
1353         yield ".long 0x%08x" % int(svp64_prefix)
1354         log(v30b_op, v30b_newfields)
1355
1356         v30b_op_rc = v30b_op
1357         if not v30b_op.endswith('.'):
1358             v30b_op_rc += rc
1359
1360         # svstep is weird
1361         # FIXME(lkcl): should sv.svstep be like svstep?
1362         if v30b_op_rc in ("svstep", "svstep."):
1363             # compensate for `SVi -= 1` in svstep()
1364             v30b_newfields[1] = str(int(v30b_newfields[1]) + 1)
1365
1366         custom_insn_hook = CUSTOM_INSNS.get(v30b_op_rc)
1367         if custom_insn_hook is not None:
1368             fields = tuple(map(to_number, v30b_newfields))
1369             insn_num = custom_insn_hook(fields)
1370             log(opcode, bin(insn_num))
1371             yield ".long 0x%X # %s" % (insn_num, insn)
1372             return
1373         # argh, sv.fmadds etc. need to be done manually
1374         elif v30b_op == 'ffmadds':
1375             opcode = 59 << (32-6)    # bits 0..6 (MSB0)
1376             opcode |= int(v30b_newfields[0]) << (32-11)  # FRT
1377             opcode |= int(v30b_newfields[1]) << (32-16)  # FRA
1378             opcode |= int(v30b_newfields[2]) << (32-21)  # FRB
1379             opcode |= int(v30b_newfields[3]) << (32-26)  # FRC
1380             opcode |= 0b00101 << (32-31)   # bits 26-30
1381             if rc:
1382                 opcode |= 1  # Rc, bit 31.
1383             yield ".long 0x%x" % opcode
1384         # argh, sv.fdmadds need to be done manually
1385         elif v30b_op == 'fdmadds':
1386             opcode = 59 << (32-6)    # bits 0..6 (MSB0)
1387             opcode |= int(v30b_newfields[0]) << (32-11)  # FRT
1388             opcode |= int(v30b_newfields[1]) << (32-16)  # FRA
1389             opcode |= int(v30b_newfields[2]) << (32-21)  # FRB
1390             opcode |= int(v30b_newfields[3]) << (32-26)  # FRC
1391             opcode |= 0b11011 << (32-31)   # bits 26-30
1392             if rc:
1393                 opcode |= 1  # Rc, bit 31.
1394             yield ".long 0x%x" % opcode
1395         # argh, sv.ffadds etc. need to be done manually
1396         elif v30b_op == 'ffadds':
1397             opcode = 59 << (32-6)    # bits 0..6 (MSB0)
1398             opcode |= int(v30b_newfields[0]) << (32-11)  # FRT
1399             opcode |= int(v30b_newfields[1]) << (32-16)  # FRA
1400             opcode |= int(v30b_newfields[2]) << (32-21)  # FRB
1401             opcode |= 0b1000001100 << (32-31)   # bits 21-30
1402             if rc:
1403                 opcode |= 1  # Rc, bit 31.
1404             yield ".long 0x%x" % opcode
1405         else:
1406             if not v30b_op.endswith('.'):
1407                 v30b_op += rc
1408             yield "%s %s" % (v30b_op, ", ".join(v30b_newfields))
1409         log("new v3.0B fields", v30b_op, v30b_newfields)
1410
1411     def translate(self, lst):
1412         for insn in lst:
1413             yield from self.translate_one(insn)
1414
1415
1416 def macro_subst(macros, txt):
1417     again = True
1418     log("subst", txt, macros)
1419     while again:
1420         again = False
1421         for macro, value in macros.items():
1422             if macro == txt:
1423                 again = True
1424                 replaced = txt.replace(macro, value)
1425                 log("macro", txt, "replaced", replaced, macro, value)
1426                 txt = replaced
1427                 continue
1428             toreplace = '%s.s' % macro
1429             if toreplace == txt:
1430                 again = True
1431                 replaced = txt.replace(toreplace, "%s.s" % value)
1432                 log("macro", txt, "replaced", replaced, toreplace, value)
1433                 txt = replaced
1434                 continue
1435             toreplace = '%s.v' % macro
1436             if toreplace == txt:
1437                 again = True
1438                 replaced = txt.replace(toreplace, "%s.v" % value)
1439                 log("macro", txt, "replaced", replaced, toreplace, value)
1440                 txt = replaced
1441                 continue
1442             toreplace = '(%s)' % macro
1443             if toreplace in txt:
1444                 again = True
1445                 replaced = txt.replace(toreplace, '(%s)' % value)
1446                 log("macro", txt, "replaced", replaced, toreplace, value)
1447                 txt = replaced
1448                 continue
1449     log("    processed", txt)
1450     return txt
1451
1452
1453 def get_ws(line):
1454     # find whitespace
1455     ws = ''
1456     while line:
1457         if not line[0].isspace():
1458             break
1459         ws += line[0]
1460         line = line[1:]
1461     return ws, line
1462
1463
1464 def asm_process():
1465     # get an input file and an output file
1466     args = sys.argv[1:]
1467     if len(args) == 0:
1468         infile = sys.stdin
1469         outfile = sys.stdout
1470         # read the whole lot in advance in case of in-place
1471         lines = list(infile.readlines())
1472     elif len(args) != 2:
1473         print("pysvp64asm [infile | -] [outfile | -]", file=sys.stderr)
1474         exit(0)
1475     else:
1476         if args[0] == '--':
1477             infile = sys.stdin
1478         else:
1479             infile = open(args[0], "r")
1480         # read the whole lot in advance in case of in-place overwrite
1481         lines = list(infile.readlines())
1482
1483         if args[1] == '--':
1484             outfile = sys.stdout
1485         else:
1486             outfile = open(args[1], "w")
1487
1488     # read the line, look for custom insn, process it
1489     macros = {}  # macros which start ".set"
1490     isa = SVP64Asm([])
1491     for line in lines:
1492         op = line.split("#")[0].strip()
1493         # identify macros
1494         if op.startswith(".set"):
1495             macro = op[4:].split(",")
1496             (macro, value) = map(str.strip, macro)
1497             macros[macro] = value
1498         if not op.startswith('sv.') and not op.startswith(tuple(CUSTOM_INSNS)):
1499             outfile.write(line)
1500             continue
1501
1502         (ws, line) = get_ws(line)
1503         lst = isa.translate_one(op, macros)
1504         lst = '; '.join(lst)
1505         outfile.write("%s%s # %s\n" % (ws, lst, op))
1506
1507
1508 if __name__ == '__main__':
1509     lst = ['slw 3, 1, 4',
1510            'extsw 5, 3',
1511            'sv.extsw 5, 3',
1512            'sv.cmpi 5, 1, 3, 2',
1513            'sv.setb 5, 31',
1514            'sv.isel 64.v, 3, 2, 65.v',
1515            'sv.setb/dm=r3/sm=1<<r3 5, 31',
1516            'sv.setb/m=r3 5, 31',
1517            'sv.setb/vec2 5, 31',
1518            'sv.setb/sw=8/ew=16 5, 31',
1519            'sv.extsw./ff=eq 5, 31',
1520            'sv.extsw./satu/sz/dz/sm=r3/dm=r3 5, 31',
1521            'sv.extsw./pr=eq 5.v, 31',
1522            'sv.add. 5.v, 2.v, 1.v',
1523            'sv.add./m=r3 5.v, 2.v, 1.v',
1524            ]
1525     lst += [
1526         'sv.stw 5.v, 4(1.v)',
1527         'sv.ld 5.v, 4(1.v)',
1528         'setvl. 2, 3, 4, 0, 1, 1',
1529         'sv.setvl. 2, 3, 4, 0, 1, 1',
1530     ]
1531     lst = [
1532         "sv.stfsu 0.v, 16(4.v)",
1533     ]
1534     lst = [
1535         "sv.stfsu/els 0.v, 16(4)",
1536     ]
1537     lst = [
1538         'sv.add./mr 5.v, 2.v, 1.v',
1539     ]
1540     macros = {'win2': '50', 'win': '60'}
1541     lst = [
1542         'sv.addi win2.v, win.v, -1',
1543         'sv.add./mrr 5.v, 2.v, 1.v',
1544         #'sv.lhzsh 5.v, 11(9.v), 15',
1545         #'sv.lwzsh 5.v, 11(9.v), 15',
1546         'sv.ffmadds 6.v, 2.v, 4.v, 6.v',
1547     ]
1548     lst = [
1549         #'sv.fmadds 0.v, 8.v, 16.v, 4.v',
1550         #'sv.ffadds 0.v, 8.v, 4.v',
1551         'svremap 11, 0, 1, 2, 3, 2, 1',
1552         'svshape 8, 1, 1, 1, 0',
1553         'svshape 8, 1, 1, 1, 1',
1554     ]
1555     lst = [
1556         #'sv.lfssh 4.v, 11(8.v), 15',
1557         #'sv.lwzsh 4.v, 11(8.v), 15',
1558         #'sv.svstep. 2.v, 4, 0',
1559         #'sv.fcfids. 48.v, 64.v',
1560         'sv.fcoss. 80.v, 0.v',
1561         'sv.fcoss. 20.v, 0.v',
1562     ]
1563     lst = [
1564         'sv.bc/all 3,12,192',
1565         'sv.bclr/vsbi 3,81.v,192',
1566         'sv.ld 5.v, 4(1.v)',
1567         'sv.svstep. 2.v, 4, 0',
1568     ]
1569     lst = [
1570         'maxs 3,12,5',
1571         'maxs. 3,12,5',
1572         'avgadd 3,12,5',
1573         'absdu 3,12,5',
1574         'absds 3,12,5',
1575         'absdacu 3,12,5',
1576         'absdacs 3,12,5',
1577         'cprop 3,12,5',
1578         'svindex 0,0,1,0,0,0,0',
1579     ]
1580     lst = [
1581         'sv.svstep./m=r3 2.v, 4, 0',
1582         'ternlogi 0,0,0,0x5',
1583         'fmvis 5,65535',
1584         'fmvis 5,1',
1585         'fmvis 5,2',
1586         'fmvis 5,4',
1587         'fmvis 5,8',
1588         'fmvis 5,16',
1589         'fmvis 5,32',
1590         'fmvis 5,64',
1591         'fmvis 5,32768',
1592     ]
1593     lst = [
1594         'sv.andi. *80, *80, 1',
1595         'sv.ffmadds. 6.v, 2.v, 4.v, 6.v',  # incorrectly inserted 32-bit op
1596         'sv.ffmadds 6.v, 2.v, 4.v, 6.v',  # correctly converted to .long
1597         'svshape2 8, 1, 31, 7, 1, 1',
1598     ]
1599     lst = [
1600         'sv.add./pp 5.v, 2.v, 1.v',
1601     ]
1602     isa = SVP64Asm(lst, macros=macros)
1603     log("list:\n", "\n\t".join(list(isa)))
1604     # running svp64.py is designed to test hard-coded lists
1605     # (above) - which strictly speaking should all be unit tests.
1606     # if you need to actually do assembler translation at the
1607     # commandline use "pysvp64asm" - see setup.py
1608     # XXX NO. asm_process()