lxo/532/comp16-v1alt-skel.py

   1 #! /bin/env python3
   2 # see https://bugs.libre-soc.org/show_bug.cgi?id=532
   3
   4 # Estimate ppc code compression with Libre-SOC encoding attempt v1alt.
   5
   6
   7 # Copyright 2020 Alexandre Oliva
   8
   9 # This script is free software; you can redistribute it and/or modify
  10 # it under the terms of the GNU General Public License as published by
  11 # the Free Software Foundation; either version 3, or (at your option)
  12 # any later version.
  13
  14 # This script is distributed in the hope that it will be useful, but
  15 # WITHOUT ANY WARRANTY; without even the implied warranty of
  16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17 # General Public License for more details.
  18
  19 # You should have received a copy of the GNU General Public License
  20 # along with this script; see the file COPYING3.  If not see
  21 # <http://www.gnu.org/licenses/>.
  22
  23 # Skeleton originally by Alexandre Oliva <oliva@gnu.org>.
  24
  25
  26 # Feed this script the output of objdump -M raw --no-show-raw-insn ppc-prog
  27
  28 # It will look for insns that can be represented in compressed mode,
  29 # according to the encoding rules in the copcond dictionary below.
  30
  31 # Nothing is assumed as to the actual bit-encoding of the insns, this
  32 # is just to experiment with insn selection and get a quick feedback
  33 # loop for the encoding options in compressed mode.
  34
  35 # This script is intended to compare the compression ratio between v1,
  36 # and an alternate mode-switching strategy that does away with 10-bit
  37 # insns to enter compressed mode, and instead uses a major 6-bit
  38 # opcode of a 32-bit insn to signal the insn encodes 10 mode bits, and
  39 # a compressed insn in the remaining 16 bits.  These 10 bits each
  40 # correspond to an upcoming insn, telling whether or not it's
  41 # compressed, so that any compressible insns among the 10 subsequent
  42 # insns can be encoded as such without any further overhead.
  43
  44 # This would enable us to use the mode-switching bits in 16-bit insns
  45 # for other purposes, but this script does not attempt to do so, so as
  46 # to make for a simpler, more direct comparison.
  47
  48
  49 # At (visible) entry points, mode is forced to return to uncompressed
  50 # mode.  Every branch target must be in uncompressed mode as well, but
  51 # this script does not enforce that.  In this model, the mode bits are
  52 # cleared when branches are taken: they are static, but they shall not
  53 # carry over across branch targets.  Mode-switching insns can only
  54 # appear in uncompressed mode, and they reset the mode bits for
  55 # upcoming insns, rather than appending.
  56
  57 # The entire code stream is printed, without any attempt to modify the
  58 # addresses that go along with or in them; we only insert markers for
  59 # the transition points, and for the compressed instructions.
  60
  61 # The really useful information is printed at the end: a summary of
  62 # transition and compressed-insn counts, and the achieved compression
  63 # rate.
  64
  65 import sys
  66 import re
  67
  68 modebits = 10
  69
  70 insn = re.compile('\s+(?P<addr>[0-9a-f]+):\s+(?P<opcode>[^ ]+) *(?P<operands>.*)')
  71
  72 # reg is a regkind (r, cr, fr) followed by a regnum
  73 xreg = '(?P<reg>(?P<regkind>[cf]?r)(?P<regnum>[0-9]+))'
  74
  75 # immediate is a sequence of digits, possibly preceded by a negative sign
  76 ximm = '(?P<immediate>-?[0-9]+)'
  77
  78 # branch is a branch target address; ignore an angle-bracketed label after it
  79 xbrt = '(?P<branch>[0-9a-f]+)(?: <.*>)?'
  80
  81 # offset is like immediate, but followed by a parenthesized basereg
  82 xoff = '(?P<offset>-?[0-9]+)\((?P<basereg>r[0-9]+)\)'
  83
  84 # creg is the cr, cond names one of its bits
  85 crbit = '(?:4\*(?P<creg>cr[0-7])\+)?(?P<cond>gt|lt|eq|so)'
  86
  87 # Combine the above into alternatives, to easily classify operands by
  88 # pattern matching.
  89 opkind = re.compile('|'.join([xreg, ximm, xbrt, xoff, crbit]))
  90
  91 # Pre-parse and classify op into a mop, short for mapped op.
  92 def mapop(op):
  93     match = opkind.fullmatch(op)
  94
  95     if match is None:
  96         op = ('other', op)
  97     elif match['reg'] is not None:
  98         op = (match['regkind'], int(match['regnum']), op)
  99     elif match['immediate'] is not None:
 100         op = ('imm', int (op).bit_length (), op)
 101     elif match['branch'] is not None:
 102         op = ('pcoff', (int (match['branch'], 16)
 103                         - int (addr, 16)).bit_length (), op, addr)
 104     elif match['offset'] is not None:
 105         op = ('ofst', mapop(match['offset']), mapop(match['basereg']), op)
 106     elif match['cond'] is not None:
 107         if match['creg'] is None:
 108             creg = 'cr0'
 109         else:
 110             creg = match['creg']
 111         op = ('crbit', mapop(creg), ('cond', match['cond']), op)
 112     else:
 113         raise "unrecognized operand kind"
 114
 115     return op
 116
 117 # Accessor to enable the mapop representation to change easily.
 118 def opclass(mop):
 119     return mop[0]
 120
 121 # Some opclass predicates, for the same reason.
 122 def regp(mop):
 123     return opclass(mop) in { 'r', 'fr', 'cr' } \
 124         or (opclass(mop) is  'imm' and mop[1] is 0)
 125 def immp(mop):
 126     return opclass(mop) in { 'imm', 'pcoff' }
 127 def rofp(mop):
 128     return opclass(mop) is   'ofst'
 129 def crbt(mop):
 130     return opclass(mop) is   'crbit'
 131
 132 # Some more accessors.
 133
 134 # Return the reg number if mop fits regp.
 135 def regno(mop):
 136     if regp(mop) \
 137        or (immp(mop) and mop[1] is 0):
 138         return mop[1]
 139     raise "operand is not a register"
 140
 141 def immval(mop):
 142     if immp(mop):
 143         return int(mop[2])
 144     raise "operand is not an immediate"
 145
 146 # Return the immediate length if mop fits immp.
 147 def immbits(mop):
 148     if immp(mop):
 149         return mop[1]
 150     raise "operand is not an immediate"
 151
 152 # Return the register sub-mop if mop fits rofp.
 153 def rofreg(mop):
 154     if rofp(mop):
 155         return mop[2]
 156     raise "operand is not an offset"
 157
 158 # Return the offset sub-opt if mop fits rofp.
 159 def rofset(mop):
 160     if rofp(mop):
 161         return mop[1]
 162     raise "operand is not an offset"
 163
 164 # Return the register sub-mop if mop fits crbit.
 165 def crbtreg(mop):
 166     if crbt(mop):
 167         return mop[1]
 168     raise "operand is not a condition register bit"
 169
 170 # Return the cond bit name if mop fits crbit.
 171 def crbtcnd(mop):
 172     if crbt(mop):
 173         return mop[2]
 174     raise "operand is not a condition register bit"
 175
 176 # Following are predicates to be used in copcond, to tell the mode in
 177 # which opcode with ops as operands is to be represented.
 178
 179 # TODO: use insn_histogram.py to show the best targets
 180 # (remember to exclude nop - ori r0,r0,0 as this skews numbers)
 181 # Registers representable in a made-up 3-bit mapping.
 182 # It must contain 0 for proper working of at least storex.
 183 #cregs3 = { 0, 31, 1, 2, 3, 4, 5, 6, 7 }
 184 cregs3 = { 0, 9, 3, 1, 2, 31, 10, 30, 4 }
 185 # Ditto in a 2-bit mapping.  It needs not contain 0, but it must be a
 186 # subset of cregs3 for proper working of at least storex.
 187 cregs2 = { 9, 3, 1, 2 }
 188 # Use the same sets for FP for now.
 189 cfregs3 = cregs3
 190 cfregs2 = cregs2
 191 ccregs2 = { 0, 1, 2, 3 }
 192
 193 # Return true iff mop is a regular register present in cregs2
 194 def rcregs2(mop):
 195     return opclass(mop) in { 'r', 'imm' } and regno(mop) in cregs2
 196
 197 # Return true iff mop is a regular register present in cregs3
 198 def rcregs3(mop):
 199     return opclass(mop) in { 'r', 'imm' } and regno(mop) in cregs3
 200
 201 # Return true iff mop is a floating-point register present in cfregs2
 202 def rcfregs2(mop):
 203     return opclass(mop) is 'fr' and regno(mop) in cfregs2
 204
 205 # Return true iff mop is a floating-point register present in cfregs3
 206 def rcfregs3(mop):
 207     return opclass(mop) is 'fr' and regno(mop) in cfregs3
 208
 209 # Return true iff mop is a condition register present in ccregs2
 210 def rccregs2(mop):
 211     return opclass(mop) is 'cr' and regno(mop) in ccregs2
 212
 213 # Return true iff mop is an immediate of at most 8 bits.
 214 def imm8(mop):
 215     return immp(mop) and immbits(mop) <= 8
 216
 217 # Return true iff mop is an immediate of at most 12 bits.
 218 def imm12(mop):
 219     return immp(mop) and immbits(mop) <= 12
 220
 221 # Compress binary opcodes iff the first two operands (output and first
 222 # input operand) are registers representable in 3 bits in compressed
 223 # mode, and the immediate operand can be represented in 8 bits.
 224 def bin2regs3imm8(opcode, ops):
 225     if rcregs3(ops[0]) and rcregs3(ops[1]) and imm8(ops[2]):
 226         return 1
 227     return 0
 228
 229 # Recognize do-nothing insns, particularly ori r0,r0,0.
 230 def maybenop(opcode, ops):
 231     if opcode in ['ori', 'addi'] and regno(ops[0]) is regno(ops[1]) \
 232        and opclass(ops[0]) is 'r' and regno(ops[0]) is 0 \
 233        and imm8(ops[2]) and immbits(ops[2]) is 0:
 234         return 3
 235     return 0
 236
 237 # Recognize an unconditional branch, that can be represented with a
 238 # 6-bit operand in 10-bit mode, an an additional 4 bits in 16-bit
 239 # mode.  In both cases, the offset is shifted left by 2 bits.
 240 def uncondbranch(opcode, ops):
 241     if imm8(ops[0]):
 242         return 3
 243     if imm12(ops[0]):
 244         return 1
 245     return 0
 246
 247 # 2 bits for RT and RA.  RB is r0 in 10-bit, and 3 bits in 16-bit ???
 248 # there's a general assumption that, if an insn can be represented in
 249 # 10-bits, then it can also be represented in 16 bits.  This will not
 250 # be the case if cregs3 can't represent register 0.  For
 251 # register+offset addresses, we support 16-imm stdi, fstdi, with 3-bit
 252 # immediates left-shifted by 3; stwi, fstsi, with 2-bit immediates
 253 # left-shifted by 2; stdspi for 6-bit immediate left-shifted by 3
 254 # biased by -256, and stwspi for 6-bit immediate left-shifted by 2
 255 # also biased by -256.  fstdi and fstsi store in memory a
 256 # floating-point register, the others do a general-purpose register.
 257 def storexaddr(opcode, ops):
 258     # Canonicalize offset in ops[1] to reg, imm
 259     if rofp(ops[1]):
 260         ops = (ops[0], rofreg(ops[1]), rofset(ops[1]))
 261         shift = memshifts[opcode[-1]]
 262         if immval(ops[2]) & ((1 << shift) - 1) is not 0:
 263             return 0
 264         if rcregs3(ops[1]) and immbits(ops[2]) <= shift + 3:
 265             return 2
 266         if regno(ops[1]) is 1 and opclass(ops[0]) is not 'fr' \
 267            and (immval(ops[2]) - 256).bit_length() <= shift + 6:
 268             return 2
 269         # Require offset 0 for compression of non-indexed form.
 270         if not regp(ops[2]):
 271             return 0
 272     # If any of the registers is zero, and the other fits in cregs2,
 273     # it fits in 10-bit.
 274     if (rcregs2(ops[1]) and regno(ops[2]) is 0) \
 275        or (regno(ops[1]) is 0 and rcregs2(ops[2])):
 276         return 3
 277     # For 16-bit one must fit rcregs2 and the other rcregs3.
 278     if (rcregs2(ops[1]) and rcregs3(ops[2])) \
 279        or (rcregs3(ops[1]) and rcregs2(ops[2])):
 280         return 1
 281     return 0
 282 def rstorex(opcode, ops):
 283     if rcregs2(ops[0]):
 284         return storexaddr(opcode, ops)
 285     return 0
 286 def frstorex(opcode, ops):
 287     if rcfregs2(ops[0]):
 288         return storexaddr(opcode, ops)
 289     return 0
 290
 291 memshifts = { 'd': 3, 'w': 2, 'z': 2, 's': 2 }
 292
 293 # 3 bits for RA, 3 bits for RB, 3 bits for RT for 16-bit.  for 10-bit,
 294 # RB and RT must match.  ??? It's not clear what that means WRT
 295 # register mapping of different kinds of registers, e.g. when RT is a
 296 # floating-point register..
 297 # For register+offset addresses, we support 16-imm ldi, fldi, with
 298 # 3-bit immediates left-shifted by 3; lwi, flsi, with 2-bit immediates
 299 # left-shifted by 2; ldspi for 6-bit immediate left-shifted by 3
 300 # biased by -256, and lwspi for 6-bit immediate left-shifted by 2 also
 301 # biased by -256.  fldi and flsi load to floating-point registers, the
 302 # others load to general-purpose registers.
 303 def loadxaddr(opcode, ops):
 304     if rofp(ops[1]):
 305         ops = (ops[0], rofreg(ops[1]), rofset(ops[1]))
 306         shift = memshifts[opcode[-1]]
 307         if immval(ops[2]) & ((1 << shift) - 1) is not 0:
 308             return 0
 309         if rcregs3(ops[1]) and immbits(ops[2]) <= shift + 3:
 310             return 2
 311         if regno(ops[1]) is 1 and opclass(ops[0]) is not 'fr' \
 312            and (immval(ops[2]) - 256).bit_length() <= shift + 6:
 313             return 2
 314         # Otherwise require offset 0 for compression of non-indexed form.
 315         if not regp(ops[2]):
 316             return 0
 317     if rcregs3(ops[1]) and rcregs3(ops[2]):
 318         if regno(ops[0]) in { regno(ops[1]), regno(ops[2]) }:
 319             return 3
 320         return 1
 321     return 0
 322 def rloadx(opcode, ops):
 323     if rcregs3(ops[0]):
 324         return loadxaddr(opcode, ops)
 325     return 0
 326 def frloadx(opcode, ops):
 327     if rcfregs3(ops[0]):
 328         return loadxaddr(opcode, ops)
 329     return 0
 330
 331 # 3 bits for RA, 3 bits for RB, 3 bits for RT for 16-bit.  for 10-bit,
 332 # RB and RT must match.  RA must not be zero, but in 16-bit mode we
 333 # can swap RA and RB to make it fit.
 334 def addop(opcode, ops):
 335     if rcregs3(ops[0]) and rcregs3(ops[1]) and rcregs3(ops[2]):
 336         if regno(ops[0]) in { regno(ops[1]), regno(ops[2]) }:
 337             return 3
 338         if regno(ops[1]) is not 0 or regno(ops[2]) is not 0:
 339             return 1
 340     return 0
 341
 342 # 3 bits for RA, 3 bits for RB, 3 bits for RT for 16-bit.  for 10-bit,
 343 # RA and RT must match.  ??? The spec says RB, but the actual opcode
 344 # is subf., subtract from, and it subtracts RA from RB.  'neg.' would
 345 # make no sense as described there if we didn't use RA.
 346 def subfop(opcode, ops):
 347     if rcregs3(ops[0]) and rcregs3(ops[1]) and rcregs3(ops[2]):
 348         if regno(ops[0]) is regno(ops[1]):
 349             return 3
 350         return 1
 351     return 0
 352 def negop(opcode, ops):
 353     if rcregs3(ops[0]) and rcregs3(ops[1]):
 354         return 3
 355     return 0
 356
 357 # 3 bits for RA and 3 bits for RB.  L (op1) must be 1 for 10-bit.
 358 # op0 is a cr, must be zero for 10-bit.
 359 def cmpop(opcode, ops):
 360     if rcregs3(ops[2]) and rcregs3(ops[3]):
 361         if regno(ops[0]) is 0 and immval(ops[1]) is 1:
 362             return 3
 363         return 1
 364     return 0
 365
 366 # 3 bits for RS, 3 bits for RB, 3 bits for RS, 16-bit only.
 367 def sldop(opcode, ops):
 368     if rcregs3(ops[0]) and rcregs3(ops[1]) and rcregs3(ops[2]):
 369         return 1
 370     return 0
 371 # same as sld, except RS must be nonzero.
 372 def srdop(opcode, ops):
 373     if regno(ops[1]) is not 0:
 374         return sldop(opcode, ops)
 375     return 0
 376 # same as sld, except RS is given by RA, so they must be the same.
 377 def sradop(opcode, ops):
 378     if regno(ops[0]) is regno(ops[1]):
 379         return sldop(opcode, ops)
 380     return 0
 381
 382 # binary logical ops: and, nand, or, nor.
 383 # 3 bits for RA (nonzero), 3 bits for RB, 3 bits for RT in 16-bit mode.
 384 # RT is implicitly RB in 10-bit mode.
 385 def binlog1016ops(opcode, ops):
 386     if rcregs3(ops[0]) and rcregs3(ops[1]) and rcregs3(ops[2]) \
 387        and regno(ops[1]) is not 0:
 388         # mr RT, RB AKA or RT, RB, RB takes the 10-bit encoding
 389         # of the 16-bit nor; we've already ruled out r0 as RB above.
 390         if regno(ops[0]) is regno(ops[2]) and opcode is not 'nor':
 391             return 3
 392         # or and and, with two identical inputs, stand for mr.
 393         # nor and nand, likewise, stand for not, that has its
 394         # own unary 10-bit encoding.
 395         if regno(ops[1]) is regno(ops[2]):
 396             return 3
 397         return 1
 398     return 0
 399 # 3 bits for RB, 3 bits for RT in 16-bit mode.
 400 # RT is implicitly RB in 10-bit mode.
 401 def unlog1016ops(opcode, ops):
 402     if rcregs3(ops[0]) and rcregs3(ops[1]):
 403         if regno(ops[0]) is regno(ops[1]):
 404             return 3
 405         return 1
 406     return 0
 407 # 16-bit only logical ops; no 10-bit encoding available
 408 # same constraints as the 1016 ones above.
 409 def binlog16ops(opcode, ops):
 410     if rcregs3(ops[0]) and rcregs3(ops[1]) and rcregs3(ops[2]) \
 411        and regno(ops[1]) is not 0:
 412         return 1
 413     return 0
 414 def unlog16ops(opcode, ops):
 415     if rcregs3(ops[0]) and rcregs3(ops[1]):
 416         return 1
 417     return 0
 418
 419 # binary floating-point ops
 420 # 3 bits for FRA (nonzero), 3 bits for FRB, 3 bits for FRT in 16-bit mode.
 421 # FRT is implicitly FRB in 10-bit mode.
 422 def binfp1016ops(opcode, ops):
 423     if rcfregs3(ops[0]) and rcfregs3(ops[1]) and rcfregs3(ops[2]) \
 424        and regno(ops[1]) is not 0:
 425         if regno(ops[0]) is regno(ops[2]):
 426             return 3
 427         return 1
 428     return 0
 429 def unfp1016ops(opcode, ops):
 430     if rcfregs3(ops[0]) and rcfregs3(ops[1]):
 431         if regno(ops[0]) is regno(ops[1]):
 432             return 3
 433         return 1
 434     return 0
 435 def binfp16ops(opcode, ops):
 436     if rcfregs3(ops[0]) and rcfregs3(ops[1]) and rcfregs3(ops[2]) \
 437        and regno(ops[1]) is not 0:
 438         return 1
 439     return 0
 440 def unfp16ops(opcode, ops):
 441     if rcfregs3(ops[0]) and rcfregs3(ops[1]):
 442         return 1
 443     return 0
 444
 445 def cnvfp16ops(opcode, ops):
 446     if rcfregs2(ops[0]) and rcfregs2(ops[1]):
 447         return 1
 448     return 0
 449
 450 # Move between CRs.  3 bits for destination, 3 bits for source in
 451 # 16-bit mode.  That covers all possibilities.  For 10-bit mode, only
 452 # 2 bits for destination.
 453 def mcrfop(opcode, ops):
 454     if rccregs2(ops[0]):
 455         return 3
 456     return 1
 457 # Logical ops between two CRs into one.  2 bits for destination, that
 458 # must coincide with one of the inputs, 3 bits for the other input.
 459 # 16-bit only.
 460 def crops(opcode, ops):
 461     if rccregs2(ops[0]) and regno(ops[0]) is regno(ops[1]):
 462         return 1
 463     return 0
 464
 465 # 3 bits for general-purpose register; immediate identifies the
 466 # special purpose register to move to: 8 for lr, 9 for ctr.  16-bit
 467 # only.  mtspr imm,rN moves from rN to the spr; mfspr rN,imm moves
 468 # from spr to rN.
 469 def mtsprops(opcode, ops):
 470     if immval(ops[0]) in (8, 9) and rcregs3(ops[1]):
 471         return 1
 472     return 0
 473 def mfsprops(opcode, ops):
 474     if immval(ops[1]) in (8, 9) and rcregs3(ops[0]):
 475         return 1
 476     return 0
 477
 478 # 3 bits for nonzero general-purpose register; the immediate is a
 479 # per-CR mask (8-bits).  mtcr rN is mtcrf 0xFF, rN.  mfcr rN is a raw
 480 # opcode, not an alias.
 481 def mtcrfops(opcode, ops):
 482     if immval(ops[0]) is 255 and rcregs3(ops[1]) and regno(ops[1]) is not 0:
 483         return 1
 484     return 0
 485 def mfcrops(opcode, ops):
 486     if rcregs3(ops[0]) and regno(ops[0]) is not 0:
 487         return 1
 488     return 0
 489
 490 # 3 bits for destination and source register, must be the same.  Full
 491 # shift range fits.  16-imm format.
 492 def shiftops(opcode, ops):
 493     if rcregs3(ops[0]) and regno(ops[0]) is regno(ops[1]):
 494         return 2
 495     return 0
 496
 497 # For 16-imm 'addis' and 'addi', we have 3 bits (nonzero) for the
 498 # destination register, source register is implied 0, the immediate
 499 # must either fit in signed 5-bit, left-shifted by 3, or in signed
 500 # 7-bit without shift.  ??? That seems backwards.
 501 def addiops(opcode, ops):
 502     if rcregs3(ops[0]) and regno(ops[0]) is not 0 \
 503        and regno(ops[1]) is 0 and imm8(ops[2]) \
 504        and immbits(ops[2]) <= 8 \
 505        and ((immval(ops[2]) & 7) is 0 or immbits(ops[2]) <= 7):
 506         return 2
 507     return maybenop(opcode, ops)
 508
 509 # cmpdi and cmpwi are aliases to uncompressed cmp CR#, L, RA, imm16,
 510 # CR# being the target condition register, L being set for d rather
 511 # than w.  In 16-imm, CR# must be zero, RA must fit in 3 bits, and the
 512 # immediate must be 6 bits signed.
 513 def cmpiops(opcode, ops):
 514     if regno(ops[0]) is 0 and immval(ops[1]) in (0,1) \
 515        and rcregs3(ops[2]) and immbits(ops[3]) <= 6:
 516         return 2
 517     return 0
 518
 519 # 16-imm bc, with or without LK, uses 3 bits for BI (CR0 and CR1 only),
 520 # and 1 bit for BO1 (to tell BO 12 from negated 4).
 521 def bcops(opcode, ops):
 522     if immval(ops[0]) in (4,12) and regno(crbtreg(ops[1])) <= 1 \
 523        and immbits(ops[2]) <= 8:
 524         return 2
 525     return 0
 526
 527 # 2 bits for BI and 3 bits for BO in 10-bit encoding; one extra bit
 528 # for each in 16-bit.
 529 def bclrops(opcode, ops):
 530     if immval(ops[0]) <= 15 and regno(crbtreg(ops[1])) <= 1 \
 531        and immbits(ops[2]) is 0:
 532         if immval(ops[0]) <= 7 and regno(crbtreg(ops[1])) is 0:
 533             return 3
 534         return 1
 535     return 0
 536
 537 # Map opcodes that might be compressed to a function that returns the
 538 # best potential encoding kind for the insn, per the numeric coding
 539 # below.
 540 copcond = {
 541     'ori': maybenop,
 542     # 'attn': binutils won't ever print this
 543     'b': uncondbranch, 'bl': uncondbranch,
 544     'bc': bcops, 'bcl': bcops,
 545     'bclr': bclrops, 'bclrl': bclrops,
 546     # Stores and loads, including 16-imm ones
 547     'stdx': rstorex, 'stwx': rstorex,
 548     'std': rstorex, 'stw': rstorex, # only offset zero
 549     'stfdx': frstorex, 'stfsx': frstorex,
 550     'stfd': frstorex, 'stfs': frstorex, # only offset zero
 551     # Assuming lwz* rather than lwa*.
 552     'ldx': rloadx, 'lwzx': rloadx,
 553     'ld': rloadx, 'lwz': rloadx, # only offset zero
 554     'lfdx': rloadx, 'lfsx': rloadx,
 555     'lfd': rloadx, 'lfs': rloadx, # only offset zero
 556     'add': addop,
 557     'subf.': subfop, 'neg.': negop,
 558     # Assuming cmpl stands for cmpd, i.e., cmp with L=1.
 559     # cmpw is cmp with L=0, 16-bit only.
 560     'cmp': cmpop,
 561     'sld.': sldop, 'srd.': srdop, 'srad.': sradop,
 562     'and': binlog1016ops, 'nand': binlog1016ops,
 563     'or': binlog1016ops, 'nor': binlog1016ops,
 564     # assuming popcnt and cntlz mean the *d opcodes.
 565     'popcntd': unlog1016ops, 'cntlzd': unlog1016ops, 'extsw': unlog1016ops,
 566     # not RT, RB is mapped to nand/nor RT, RB, RB.
 567     'xor': binlog16ops, 'eqv': binlog16ops,
 568     # 'setvl.': unlog16ops, # ??? What's 'setvl.'?
 569     # assuming cnttz mean the *d opcode.
 570     'cnttzd': unlog16ops, 'extsb': unlog16ops, 'extsh': unlog16ops,
 571     'fsub.': binfp1016ops, 'fadd': binfp1016ops, 'fmul': binfp1016ops,
 572     'fneg.': unfp1016ops,
 573     'fdiv': binfp16ops,
 574     'fabs.': unfp16ops, 'fmr.': unfp16ops,
 575     # ??? are these the intended fp2int and int2fp, for all
 576     # combinations of signed/unsigned float/double?
 577     'fcfid': cnvfp16ops, 'fctidz': cnvfp16ops,
 578     'fcfidu': cnvfp16ops, 'fctiduz': cnvfp16ops,
 579     'fcfids': cnvfp16ops, 'fctiwz': cnvfp16ops,
 580     'fcfidus': cnvfp16ops, 'fctiwuz': cnvfp16ops,
 581     # Condition register opcodes.
 582     'mcrf': mcrfop,
 583     'crnor': crops,
 584     'crandc': crops,
 585     'crxor': crops,
 586     'crnand': crops,
 587     'crand': crops,
 588     'creqv': crops,
 589     'crorc': crops,
 590     'cror': crops,
 591     # System opcodes.
 592     # 'cbank' is not a ppc opcode, not handled
 593     'mtspr': mtsprops, # raw opcode for 'mtlr', 'mtctr'
 594     'mfspr': mfsprops, # raw opcode for 'mflr', 'mfctr'
 595     'mtcrf': mtcrfops, # raw opcode for 'mtcr'
 596     'mfcr': mfcrops,
 597     # 16-imm opcodes.
 598     'sradi.': shiftops, 'srawi.': shiftops,
 599     'addi': addiops,
 600     'cmpi': cmpiops, # raw opcode for 'cmpwi', 'cmpdi'
 601     # 'setvli', 'setmvli' are not ppc opcodes, not handled.
 602 }
 603
 604 # v1 has 4 kinds of insns:
 605
 606 # 0: uncompressed; leave input insn unchanged
 607 # 1: 16-bit compressed, only in compressed mode
 608 # 2: 16-imm, i.e., compressed insn that can't switch-out of compressed mode
 609 # 3: 10-bit compressed, may switch to compressed mode
 610
 611 # In v1alt, we map 1, 2 and 3 to compressed (count[1]).  If we have a
 612 # compressing insn, and we've run out of bits from the latest
 613 # mode-switch insn, we output another (count[2]).
 614
 615 count = [0,0,0]
 616 # Default comments for the insn kinds above.
 617 comments = ['', '\t; 16-bit', '\t; 6+10-bit mode']
 618
 619 # This counts the remaining bits to use from the latest mode-switching
 620 # insn.
 621 remobits = 0
 622
 623 for line in sys.stdin:
 624     if line[-1] is '\n':
 625         line = line[:-1]
 626
 627     match = insn.fullmatch(line)
 628     if match is None:
 629         print(line)
 630         # Switch to uncompressed mode at function boundaries
 631         remobits = 0
 632         continue
 633
 634     addr = match['addr']
 635     opcode = match['opcode']
 636     operands = match['operands']
 637
 638     if opcode in copcond:
 639         nexti = copcond[opcode](opcode,
 640                                [mapop(op) for op in operands.split(',')])
 641     else:
 642         nexti = 0
 643
 644     comment = None
 645
 646     if nexti is not 0:
 647         nexti = 1
 648         if remobits is 0:
 649             remobits = modebits + 1
 650             print('\t\th.nop\t\t; 16-bit mode-switching prefix')
 651             count[2] += 1
 652
 653     count[nexti] += 1
 654
 655     if comment is None:
 656         comment = comments[nexti]
 657     else:
 658         comment = '\t; ' + comment
 659
 660     print(line + comment)
 661
 662     if remobits > 0:
 663         remobits -= 1
 664
 665 transition_bytes = 2 * count[2]
 666 compressed_bytes = 2 * count[1]
 667 uncompressed_bytes = 4 * count[0]
 668 total_bytes = transition_bytes + compressed_bytes + uncompressed_bytes
 669 original_bytes = 2 * compressed_bytes + uncompressed_bytes
 670
 671 print()
 672 print('Summary')
 673 print('32-bit uncompressed instructions: %i' % count[0])
 674 print('16-bit compressed instructions: %i' % count[1])
 675 print('16-bit mode-switching nops: %i' % count[2])
 676 print('Compressed size estimate: %i' % total_bytes)
 677 print('Original size: %i' % original_bytes)
 678 print('Compressed/original ratio: %f' % (total_bytes / original_bytes))