openpower/sv/comp16-v1-skel.py

   1 #! /bin/env python3
   2 # see https://bugs.libre-soc.org/show_bug.cgi?id=532
   3
   4 # Estimate ppc code compression with Libre-SOC encoding attempt v1.
   5
   6
   7 # Copyright 2020 Alexandre Oliva
   8
   9 # This script is free software; you can redistribute it and/or modify
  10 # it under the terms of the GNU General Public License as published by
  11 # the Free Software Foundation; either version 3, or (at your option)
  12 # any later version.
  13
  14 # This script is distributed in the hope that it will be useful, but
  15 # WITHOUT ANY WARRANTY; without even the implied warranty of
  16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17 # General Public License for more details.
  18
  19 # You should have received a copy of the GNU General Public License
  20 # along with this script; see the file COPYING3.  If not see
  21 # <http://www.gnu.org/licenses/>.
  22
  23 # Skeleton originally by Alexandre Oliva <oliva@gnu.org>.
  24
  25
  26 # Feed this script the output of objdump -M raw --no-show-raw-insn ppc-prog
  27
  28 # It will look for insns that can be represented in compressed mode,
  29 # according to the encoding rules in the copcond dictionary below.
  30
  31 # Nothing is assumed as to the actual bit-encoding of the insns, this
  32 # is just to experiment with insn selection and get a quick feedback
  33 # loop for the encoding options in compressed mode.
  34
  35 # In this script, the computations of encoding modes and transitions
  36 # are those for attempt 1 encoding, that encompasses:
  37
  38 # - a 16-bit insn (with 10-bit payload) that may switch to compressed
  39 # mode or return to 32-bit mode;
  40
  41 # - 16-bit insns in compressed mode, each with 2 bits devoted to
  42 # encoding one of the following possibilities:
  43
  44 # -- switch back to uncompressed mode at the next insn
  45
  46 # -- interpret the next insn in uncompressed mode, then return to
  47 # compressed mode
  48
  49 # -- remain in 16-bit mode for the next insn
  50
  51 # - a 16-bit immediate insn in compressed mode, that must be followed
  52 # by another compressed insn
  53
  54 # At (visible) entry points, mode is forced to return to uncompressed
  55 # mode.  Every branch target must be in uncompressed mode as well, but
  56 # this script does not enforce that.
  57
  58 # The entire code stream is printed, without any attempt to modify the
  59 # addresses that go along with or in them; we only insert markers for
  60 # the transition points, and for the compressed instructions.
  61
  62 # The really useful information is printed at the end: a summary of
  63 # transition and compressed-insn counts, and the achieved compression
  64 # rate.
  65
  66 import sys
  67 import re
  68
  69 insn = re.compile('\s+(?P<addr>[0-9a-f]+):\s+(?P<opcode>[^ ]+) *(?P<operands>.*)')
  70
  71 # reg is a regkind (r, cr, fr) followed by a regnum
  72 xreg = '(?P<reg>(?P<regkind>[cf]?r)(?P<regnum>[0-9]+))'
  73
  74 # immediate is a sequence of digits, possibly preceded by a negative sign
  75 ximm = '(?P<immediate>-?[0-9]+)'
  76
  77 # branch is a branch target address; ignore an angle-bracketed label after it
  78 xbrt = '(?P<branch>[0-9a-f]+)(?: <.*>)?'
  79
  80 # offset is like immediate, but followed by a parenthesized basereg
  81 xoff = '(?P<offset>-?[0-9]+)\((?P<basereg>r[0-9]+)\)'
  82
  83 # creg is the cr, cond names one of its bits
  84 crbit = '(?:4\*(?P<creg>cr[0-7])\+)?(?P<cond>gt|lt|eq|so)'
  85
  86 # Combine the above into alternatives, to easily classify operands by
  87 # pattern matching.
  88 opkind = re.compile('|'.join([xreg, ximm, xbrt, xoff, crbit]))
  89
  90 # Pre-parse and classify op into a mop, short for mapped op.
  91 def mapop(op):
  92     match = opkind.fullmatch(op)
  93
  94     if match is None:
  95         op = ('other', op)
  96     elif match['reg'] is not None:
  97         op = (match['regkind'], int(match['regnum']), op)
  98     elif match['immediate'] is not None:
  99         op = ('imm', int (op).bit_length (), op)
 100     elif match['branch'] is not None:
 101         op = ('pcoff', (int (match['branch'], 16)
 102                         - int (addr, 16)).bit_length (), op, addr)
 103     elif match['offset'] is not None:
 104         op = ('ofst', mapop(match['offset']), mapop(match['basereg']), op)
 105     elif match['cond'] is not None:
 106         if match['creg'] is None:
 107             creg = 'cr0'
 108         else:
 109             creg = match['creg']
 110         op = ('crbit', mapop(creg), ('cond', match['cond']), op)
 111     else:
 112         raise "unrecognized operand kind"
 113
 114     return op
 115
 116 # Accessor to enable the mapop representation to change easily.
 117 def opclass(mop):
 118     return mop[0]
 119
 120 # Some opclass predicates, for the same reason.
 121 def regp(mop):
 122     return opclass(mop) in { 'r', 'fr', 'cr' } \
 123         or (opclass(mop) is  'imm' and mop[1] is 0)
 124 def immp(mop):
 125     return opclass(mop) in { 'imm', 'pcoff' }
 126 def rofp(mop):
 127     return opclass(mop) is   'ofst'
 128 def crbt(mop):
 129     return opclass(mop) is   'crbit'
 130
 131 # Some more accessors.
 132
 133 # Return the reg number if mop fits regp.
 134 def regno(mop):
 135     if regp(mop) \
 136        or (immp(mop) and mop[1] is 0):
 137         return mop[1]
 138     raise "operand is not a register"
 139
 140 def immval(mop):
 141     if immp(mop):
 142         return int(mop[2])
 143     raise "operand is not an immediate"
 144
 145 # Return the immediate length if mop fits immp.
 146 def immbits(mop):
 147     if immp(mop):
 148         return mop[1]
 149     raise "operand is not an immediate"
 150
 151 # Return the register sub-mop if mop fits rofp.
 152 def rofreg(mop):
 153     if rofp(mop):
 154         return mop[2]
 155     raise "operand is not an offset"
 156
 157 # Return the offset sub-opt if mop fits rofp.
 158 def rofset(mop):
 159     if rofp(mop):
 160         return mop[1]
 161     raise "operand is not an offset"
 162
 163 # Return the register sub-mop if mop fits crbit.
 164 def crbtreg(mop):
 165     if crbt(mop):
 166         return mop[1]
 167     raise "operand is not a condition register bit"
 168
 169 # Return the cond bit name if mop fits crbit.
 170 def crbtcnd(mop):
 171     if crbt(mop):
 172         return mop[2]
 173     raise "operand is not a condition register bit"
 174
 175 # Following are predicates to be used in copcond, to tell the mode in
 176 # which opcode with ops as operands is to be represented.
 177
 178 # TODO: use insn_histogram.py to show the best targets
 179 # (remember to exclude nop - ori r0,r0,0 as this skews numbers)
 180 # Registers representable in a made-up 3-bit mapping.
 181 # It must contain 0 for proper working of at least storex.
 182 #cregs3 = { 0, 31, 1, 2, 3, 4, 5, 6, 7 }
 183 cregs3 = { 0, 9, 3, 1, 2, 31, 10, 30, 4 }
 184 # Ditto in a 2-bit mapping.  It needs not contain 0, but it must be a
 185 # subset of cregs3 for proper working of at least storex.
 186 cregs2 = { 9, 3, 1, 2 }
 187 # Use the same sets for FP for now.
 188 cfregs3 = cregs3
 189 cfregs2 = cregs2
 190 ccregs2 = { 0, 1, 2, 3 }
 191
 192 # Return true iff mop is a regular register present in cregs2
 193 def rcregs2(mop):
 194     return opclass(mop) in { 'r', 'imm' } and regno(mop) in cregs2
 195
 196 # Return true iff mop is a regular register present in cregs3
 197 def rcregs3(mop):
 198     return opclass(mop) in { 'r', 'imm' } and regno(mop) in cregs3
 199
 200 # Return true iff mop is a floating-point register present in cfregs2
 201 def rcfregs2(mop):
 202     return opclass(mop) is 'fr' and regno(mop) in cfregs2
 203
 204 # Return true iff mop is a floating-point register present in cfregs3
 205 def rcfregs3(mop):
 206     return opclass(mop) is 'fr' and regno(mop) in cfregs3
 207
 208 # Return true iff mop is a condition register present in ccregs2
 209 def rccregs2(mop):
 210     return opclass(mop) is 'cr' and regno(mop) in ccregs2
 211
 212 # Return true iff mop is an immediate of at most 8 bits.
 213 def imm8(mop):
 214     return immp(mop) and immbits(mop) <= 8
 215
 216 # Return true iff mop is an immediate of at most 12 bits.
 217 def imm12(mop):
 218     return immp(mop) and immbits(mop) <= 12
 219
 220 # Compress binary opcodes iff the first two operands (output and first
 221 # input operand) are registers representable in 3 bits in compressed
 222 # mode, and the immediate operand can be represented in 8 bits.
 223 def bin2regs3imm8(opcode, ops):
 224     if rcregs3(ops[0]) and rcregs3(ops[1]) and imm8(ops[2]):
 225         return 1
 226     return 0
 227
 228 # Recognize do-nothing insns, particularly ori r0,r0,0.
 229 def maybenop(opcode, ops):
 230     if opcode in ['ori', 'addi'] and regno(ops[0]) is regno(ops[1]) \
 231        and opclass(ops[0]) is 'r' and regno(ops[0]) is 0 \
 232        and imm8(ops[2]) and immbits(ops[2]) is 0:
 233         return 3
 234     return 0
 235
 236 # Recognize an unconditional branch, that can be represented with a
 237 # 6-bit operand in 10-bit mode, an an additional 4 bits in 16-bit
 238 # mode.  In both cases, the offset is shifted left by 2 bits.
 239 def uncondbranch(opcode, ops):
 240     if imm8(ops[0]):
 241         return 3
 242     if imm12(ops[0]):
 243         return 1
 244     return 0
 245
 246 # 2 bits for RT and RA.  RB is r0 in 10-bit, and 3 bits in 16-bit ???
 247 # there's a general assumption that, if an insn can be represented in
 248 # 10-bits, then it can also be represented in 16 bits.  This will not
 249 # be the case if cregs3 can't represent register 0.  For
 250 # register+offset addresses, we support 16-imm stdi, fstdi, with 3-bit
 251 # immediates left-shifted by 3; stwi, fstsi, with 2-bit immediates
 252 # left-shifted by 2; stdspi for 6-bit immediate left-shifted by 3
 253 # biased by -256, and stwspi for 6-bit immediate left-shifted by 2
 254 # also biased by -256.  fstdi and fstsi store in memory a
 255 # floating-point register, the others do a general-purpose register.
 256 def storexaddr(opcode, ops):
 257     # Canonicalize offset in ops[1] to reg, imm
 258     if rofp(ops[1]):
 259         ops = (ops[0], rofreg(ops[1]), rofset(ops[1]))
 260         shift = memshifts[opcode[-1]]
 261         if immval(ops[2]) & ((1 << shift) - 1) is not 0:
 262             return 0
 263         if rcregs3(ops[1]) and immbits(ops[2]) <= shift + 3:
 264             return 2
 265         if regno(ops[1]) is 1 and opclass(ops[0]) is not 'fr' \
 266            and (immval(ops[2]) - 256).bit_length() <= shift + 6:
 267             return 2
 268         # Require offset 0 for compression of non-indexed form.
 269         if not regp(ops[2]):
 270             return 0
 271     # If any of the registers is zero, and the other fits in cregs2,
 272     # it fits in 10-bit.
 273     if (rcregs2(ops[1]) and regno(ops[2]) is 0) \
 274        or (regno(ops[1]) is 0 and rcregs2(ops[2])):
 275         return 3
 276     # For 16-bit one must fit rcregs2 and the other rcregs3.
 277     if (rcregs2(ops[1]) and rcregs3(ops[2])) \
 278        or (rcregs3(ops[1]) and rcregs2(ops[2])):
 279         return 1
 280     return 0
 281 def rstorex(opcode, ops):
 282     if rcregs2(ops[0]):
 283         return storexaddr(opcode, ops)
 284     return 0
 285 def frstorex(opcode, ops):
 286     if rcfregs2(ops[0]):
 287         return storexaddr(opcode, ops)
 288     return 0
 289
 290 memshifts = { 'd': 3, 'w': 2, 'z': 2, 's': 2 }
 291
 292 # 3 bits for RA, 3 bits for RB, 3 bits for RT for 16-bit.  for 10-bit,
 293 # RB and RT must match.  ??? It's not clear what that means WRT
 294 # register mapping of different kinds of registers, e.g. when RT is a
 295 # floating-point register..
 296 # For register+offset addresses, we support 16-imm ldi, fldi, with
 297 # 3-bit immediates left-shifted by 3; lwi, flsi, with 2-bit immediates
 298 # left-shifted by 2; ldspi for 6-bit immediate left-shifted by 3
 299 # biased by -256, and lwspi for 6-bit immediate left-shifted by 2 also
 300 # biased by -256.  fldi and flsi load to floating-point registers, the
 301 # others load to general-purpose registers.
 302 def loadxaddr(opcode, ops):
 303     if rofp(ops[1]):
 304         ops = (ops[0], rofreg(ops[1]), rofset(ops[1]))
 305         shift = memshifts[opcode[-1]]
 306         if immval(ops[2]) & ((1 << shift) - 1) is not 0:
 307             return 0
 308         if rcregs3(ops[1]) and immbits(ops[2]) <= shift + 3:
 309             return 2
 310         if regno(ops[1]) is 1 and opclass(ops[0]) is not 'fr' \
 311            and (immval(ops[2]) - 256).bit_length() <= shift + 6:
 312             return 2
 313         # Otherwise require offset 0 for compression of non-indexed form.
 314         if not regp(ops[2]):
 315             return 0
 316     if rcregs3(ops[1]) and rcregs3(ops[2]):
 317         if regno(ops[0]) in { regno(ops[1]), regno(ops[2]) }:
 318             return 3
 319         return 1
 320     return 0
 321 def rloadx(opcode, ops):
 322     if rcregs3(ops[0]):
 323         return loadxaddr(opcode, ops)
 324     return 0
 325 def frloadx(opcode, ops):
 326     if rcfregs3(ops[0]):
 327         return loadxaddr(opcode, ops)
 328     return 0
 329
 330 # 3 bits for RA, 3 bits for RB, 3 bits for RT for 16-bit.  for 10-bit,
 331 # RB and RT must match.  RA must not be zero, but in 16-bit mode we
 332 # can swap RA and RB to make it fit.
 333 def addop(opcode, ops):
 334     if rcregs3(ops[0]) and rcregs3(ops[1]) and rcregs3(ops[2]):
 335         if regno(ops[0]) in { regno(ops[1]), regno(ops[2]) }:
 336             return 3
 337         if regno(ops[1]) is not 0 or regno(ops[2]) is not 0:
 338             return 1
 339     return 0
 340
 341 # 3 bits for RA, 3 bits for RB, 3 bits for RT for 16-bit.  for 10-bit,
 342 # RA and RT must match.  ??? The spec says RB, but the actual opcode
 343 # is subf., subtract from, and it subtracts RA from RB.  'neg.' would
 344 # make no sense as described there if we didn't use RA.
 345 def subfop(opcode, ops):
 346     if rcregs3(ops[0]) and rcregs3(ops[1]) and rcregs3(ops[2]):
 347         if regno(ops[0]) is regno(ops[1]):
 348             return 3
 349         return 1
 350     return 0
 351 def negop(opcode, ops):
 352     if rcregs3(ops[0]) and rcregs3(ops[1]):
 353         return 3
 354     return 0
 355
 356 # 3 bits for RA and 3 bits for RB.  L (op1) must be 1 for 10-bit.
 357 # op0 is a cr, must be zero for 10-bit.
 358 def cmpop(opcode, ops):
 359     if rcregs3(ops[2]) and rcregs3(ops[3]):
 360         if regno(ops[0]) is 0 and immval(ops[1]) is 1:
 361             return 3
 362         return 1
 363     return 0
 364
 365 # 3 bits for RS, 3 bits for RB, 3 bits for RS, 16-bit only.
 366 def sldop(opcode, ops):
 367     if rcregs3(ops[0]) and rcregs3(ops[1]) and rcregs3(ops[2]):
 368         return 1
 369     return 0
 370 # same as sld, except RS must be nonzero.
 371 def srdop(opcode, ops):
 372     if regno(ops[1]) is not 0:
 373         return sldop(opcode, ops)
 374     return 0
 375 # same as sld, except RS is given by RA, so they must be the same.
 376 def sradop(opcode, ops):
 377     if regno(ops[0]) is regno(ops[1]):
 378         return sldop(opcode, ops)
 379     return 0
 380
 381 # binary logical ops: and, nand, or, nor.
 382 # 3 bits for RA (nonzero), 3 bits for RB, 3 bits for RT in 16-bit mode.
 383 # RT is implicitly RB in 10-bit mode.
 384 def binlog1016ops(opcode, ops):
 385     if rcregs3(ops[0]) and rcregs3(ops[1]) and rcregs3(ops[2]) \
 386        and regno(ops[1]) is not 0:
 387         # mr RT, RB AKA or RT, RB, RB takes the 10-bit encoding
 388         # of the 16-bit nor; we've already ruled out r0 as RB above.
 389         if regno(ops[0]) is regno(ops[2]) and opcode is not 'nor':
 390             return 3
 391         # or and and, with two identical inputs, stand for mr.
 392         # nor and nand, likewise, stand for not, that has its
 393         # own unary 10-bit encoding.
 394         if regno(ops[1]) is regno(ops[2]):
 395             return 3
 396         return 1
 397     return 0
 398 # 3 bits for RB, 3 bits for RT in 16-bit mode.
 399 # RT is implicitly RB in 10-bit mode.
 400 def unlog1016ops(opcode, ops):
 401     if rcregs3(ops[0]) and rcregs3(ops[1]):
 402         if regno(ops[0]) is regno(ops[1]):
 403             return 3
 404         return 1
 405     return 0
 406 # 16-bit only logical ops; no 10-bit encoding available
 407 # same constraints as the 1016 ones above.
 408 def binlog16ops(opcode, ops):
 409     if rcregs3(ops[0]) and rcregs3(ops[1]) and rcregs3(ops[2]) \
 410        and regno(ops[1]) is not 0:
 411         return 1
 412     return 0
 413 def unlog16ops(opcode, ops):
 414     if rcregs3(ops[0]) and rcregs3(ops[1]):
 415         return 1
 416     return 0
 417
 418 # binary floating-point ops
 419 # 3 bits for FRA (nonzero), 3 bits for FRB, 3 bits for FRT in 16-bit mode.
 420 # FRT is implicitly FRB in 10-bit mode.
 421 def binfp1016ops(opcode, ops):
 422     if rcfregs3(ops[0]) and rcfregs3(ops[1]) and rcfregs3(ops[2]) \
 423        and regno(ops[1]) is not 0:
 424         if regno(ops[0]) is regno(ops[2]):
 425             return 3
 426         return 1
 427     return 0
 428 def unfp1016ops(opcode, ops):
 429     if rcfregs3(ops[0]) and rcfregs3(ops[1]):
 430         if regno(ops[0]) is regno(ops[1]):
 431             return 3
 432         return 1
 433     return 0
 434 def binfp16ops(opcode, ops):
 435     if rcfregs3(ops[0]) and rcfregs3(ops[1]) and rcfregs3(ops[2]) \
 436        and regno(ops[1]) is not 0:
 437         return 1
 438     return 0
 439 def unfp16ops(opcode, ops):
 440     if rcfregs3(ops[0]) and rcfregs3(ops[1]):
 441         return 1
 442     return 0
 443
 444 def cnvfp16ops(opcode, ops):
 445     if rcfregs2(ops[0]) and rcfregs2(ops[1]):
 446         return 1
 447     return 0
 448
 449 # Move between CRs.  3 bits for destination, 3 bits for source in
 450 # 16-bit mode.  That covers all possibilities.  For 10-bit mode, only
 451 # 2 bits for destination.
 452 def mcrfop(opcode, ops):
 453     if rccregs2(ops[0]):
 454         return 3
 455     return 1
 456 # Logical ops between two CRs into one.  2 bits for destination, that
 457 # must coincide with one of the inputs, 3 bits for the other input.
 458 # 16-bit only.
 459 def crops(opcode, ops):
 460     if rccregs2(ops[0]) and regno(ops[0]) is regno(ops[1]):
 461         return 1
 462     return 0
 463
 464 # 3 bits for general-purpose register; immediate identifies the
 465 # special purpose register to move to: 8 for lr, 9 for ctr.  16-bit
 466 # only.  mtspr imm,rN moves from rN to the spr; mfspr rN,imm moves
 467 # from spr to rN.
 468 def mtsprops(opcode, ops):
 469     if immval(ops[0]) in (8, 9) and rcregs3(ops[1]):
 470         return 1
 471     return 0
 472 def mfsprops(opcode, ops):
 473     if immval(ops[1]) in (8, 9) and rcregs3(ops[0]):
 474         return 1
 475     return 0
 476
 477 # 3 bits for nonzero general-purpose register; the immediate is a
 478 # per-CR mask (8-bits).  mtcr rN is mtcrf 0xFF, rN.  mfcr rN is a raw
 479 # opcode, not an alias.
 480 def mtcrfops(opcode, ops):
 481     if immval(ops[0]) is 255 and rcregs3(ops[1]) and regno(ops[1]) is not 0:
 482         return 1
 483     return 0
 484 def mfcrops(opcode, ops):
 485     if rcregs3(ops[0]) and regno(ops[0]) is not 0:
 486         return 1
 487     return 0
 488
 489 # 3 bits for destination and source register, must be the same.  Full
 490 # shift range fits.  16-imm format.
 491 def shiftops(opcode, ops):
 492     if rcregs3(ops[0]) and regno(ops[0]) is regno(ops[1]):
 493         return 2
 494     return 0
 495
 496 # For 16-imm 'addis' and 'addi', we have 3 bits (nonzero) for the
 497 # destination register, source register is implied 0, the immediate
 498 # must either fit in signed 5-bit, left-shifted by 3, or in signed
 499 # 7-bit without shift.  ??? That seems backwards.
 500 def addiops(opcode, ops):
 501     if rcregs3(ops[0]) and regno(ops[0]) is not 0 \
 502        and regno(ops[1]) is 0 and imm8(ops[2]) \
 503        and immbits(ops[2]) <= 8 \
 504        and ((immval(ops[2]) & 7) is 0 or immbits(ops[2]) <= 7):
 505         return 2
 506     return maybenop(opcode, ops)
 507
 508 # cmpdi and cmpwi are aliases to uncompressed cmp CR#, L, RA, imm16,
 509 # CR# being the target condition register, L being set for d rather
 510 # than w.  In 16-imm, CR# must be zero, RA must fit in 3 bits, and the
 511 # immediate must be 6 bits signed.
 512 def cmpiops(opcode, ops):
 513     if regno(ops[0]) is 0 and immval(ops[1]) in (0,1) \
 514        and rcregs3(ops[2]) and immbits(ops[3]) <= 6:
 515         return 2
 516     return 0
 517
 518 # 16-imm bc, with or without LK, uses 3 bits for BI (CR0 and CR1 only),
 519 # and 1 bit for BO1 (to tell BO 12 from negated 4).
 520 def bcops(opcode, ops):
 521     if immval(ops[0]) in (4,12) and regno(crbtreg(ops[1])) <= 1 \
 522        and immbits(ops[2]) <= 8:
 523         return 2
 524     return 0
 525
 526 # 2 bits for BI and 3 bits for BO in 10-bit encoding; one extra bit
 527 # for each in 16-bit.
 528 def bclrops(opcode, ops):
 529     if immval(ops[0]) <= 15 and regno(crbtreg(ops[1])) <= 1 \
 530        and immbits(ops[2]) is 0:
 531         if immval(ops[0]) <= 7 and regno(crbtreg(ops[1])) is 0:
 532             return 3
 533         return 1
 534     return 0
 535
 536 # Map opcodes that might be compressed to a function that returns the
 537 # best potential encoding kind for the insn, per the numeric coding
 538 # below.
 539 copcond = {
 540     'ori': maybenop,
 541     # 'attn': binutils won't ever print this
 542     'b': uncondbranch, 'bl': uncondbranch,
 543     'bc': bcops, 'bcl': bcops,
 544     'bclr': bclrops, 'bclrl': bclrops,
 545     # Stores and loads, including 16-imm ones
 546     'stdx': rstorex, 'stwx': rstorex,
 547     'std': rstorex, 'stw': rstorex, # only offset zero
 548     'stfdx': frstorex, 'stfsx': frstorex,
 549     'stfd': frstorex, 'stfs': frstorex, # only offset zero
 550     # Assuming lwz* rather than lwa*.
 551     'ldx': rloadx, 'lwzx': rloadx,
 552     'ld': rloadx, 'lwz': rloadx, # only offset zero
 553     'lfdx': rloadx, 'lfsx': rloadx,
 554     'lfd': rloadx, 'lfs': rloadx, # only offset zero
 555     'add': addop,
 556     'subf.': subfop, 'neg.': negop,
 557     # Assuming cmpl stands for cmpd, i.e., cmp with L=1.
 558     # cmpw is cmp with L=0, 16-bit only.
 559     'cmp': cmpop,
 560     'sld.': sldop, 'srd.': srdop, 'srad.': sradop,
 561     'and': binlog1016ops, 'nand': binlog1016ops,
 562     'or': binlog1016ops, 'nor': binlog1016ops,
 563     # assuming popcnt and cntlz mean the *d opcodes.
 564     'popcntd': unlog1016ops, 'cntlzd': unlog1016ops, 'extsw': unlog1016ops,
 565     # not RT, RB is mapped to nand/nor RT, RB, RB.
 566     'xor': binlog16ops, 'eqv': binlog16ops,
 567     # 'setvl.': unlog16ops, # ??? What's 'setvl.'?
 568     # assuming cnttz mean the *d opcode.
 569     'cnttzd': unlog16ops, 'extsb': unlog16ops, 'extsh': unlog16ops,
 570     'fsub.': binfp1016ops, 'fadd': binfp1016ops, 'fmul': binfp1016ops,
 571     'fneg.': unfp1016ops,
 572     'fdiv': binfp16ops,
 573     'fabs.': unfp16ops, 'fmr.': unfp16ops,
 574     # ??? are these the intended fp2int and int2fp, for all
 575     # combinations of signed/unsigned float/double?
 576     'fcfid': cnvfp16ops, 'fctidz': cnvfp16ops,
 577     'fcfidu': cnvfp16ops, 'fctiduz': cnvfp16ops,
 578     'fcfids': cnvfp16ops, 'fctiwz': cnvfp16ops,
 579     'fcfidus': cnvfp16ops, 'fctiwuz': cnvfp16ops,
 580     # Condition register opcodes.
 581     'mcrf': mcrfop,
 582     'crnor': crops,
 583     'crandc': crops,
 584     'crxor': crops,
 585     'crnand': crops,
 586     'crand': crops,
 587     'creqv': crops,
 588     'crorc': crops,
 589     'cror': crops,
 590     # System opcodes.
 591     # 'cbank' is not a ppc opcode, not handled
 592     'mtspr': mtsprops, # raw opcode for 'mtlr', 'mtctr'
 593     'mfspr': mfsprops, # raw opcode for 'mflr', 'mfctr'
 594     'mtcrf': mtcrfops, # raw opcode for 'mtcr'
 595     'mfcr': mfcrops,
 596     # 16-imm opcodes.
 597     'sradi.': shiftops, 'srawi.': shiftops,
 598     'addi': addiops,
 599     'cmpi': cmpiops, # raw opcode for 'cmpwi', 'cmpdi'
 600     # 'setvli', 'setmvli' are not ppc opcodes, not handled.
 601 }
 602
 603 # We have 4 kinds of insns:
 604
 605 # 0: uncompressed; leave input insn unchanged
 606 # 1: 16-bit compressed, only in compressed mode
 607 # 2: 16-imm, i.e., compressed insn that can't switch-out of compressed mode
 608 # 3: 10-bit compressed, may switch to compressed mode
 609
 610 # count[0:3] count the occurrences of the base kinds.
 611 # count[4] counts extra 10-bit nop-switches to compressed mode,
 612 #   tentatively introduced before insns that can be 16-bit encoded.
 613 # count[5] counts extra 10-bit nop-switches to compressed mode,
 614 #   tentatively introduced before insns that can be 16-imm encoded.
 615 # count[6] counts extra 16-bit nop-switches back to uncompressed,
 616 #   introduced after a 16-imm insn.
 617 # count[7] counts pairs of 10-bit nop-switches and 16-imm insns
 618 #   that turned out to be followed by 32-bit insns.  We assume
 619 #   a compressor would backtrack the pair into as a single 32-bit
 620 #   insn, so as to avoid a switch-back nop.  The nop and 16-imm
 621 #   insns remain counted as such, so we count these occurrences
 622 #   here.
 623 count = [0,0,0,0,0,0,0,0]
 624 # Default comments for the insn kinds above.
 625 comments = ['', '\t; 16-bit', '\t; 16-imm', '\t; 10-bit']
 626
 627 # curi stands for the insn kind that we read and processed in the
 628 # previous iteration of the loop, and previ is the one before it.  the
 629 # one we're processing in the current iteration will be stored in
 630 # nexti until we make it curi at the very end of the loop.
 631 previ = curi = 0
 632
 633 for line in sys.stdin:
 634     if line[-1] is '\n':
 635         line = line[:-1]
 636
 637     match = insn.fullmatch(line)
 638     if match is None:
 639         print(line)
 640         # Switch to uncompressed mode at function boundaries
 641         previ = curi = 0
 642         continue
 643
 644     addr = match['addr']
 645     opcode = match['opcode']
 646     operands = match['operands']
 647
 648     if opcode in copcond:
 649         nexti = copcond[opcode](opcode,
 650                                [mapop(op) for op in operands.split(',')])
 651     else:
 652         nexti = 0
 653
 654     comment = None
 655
 656     if curi is 0:
 657         if nexti is 0:
 658             True # Uncompressed mode for good.
 659         elif nexti is 1:
 660             # If curi was not a single uncompressed mode insn,
 661             # tentatively encode a 10-bit nop to enter compressed
 662             # mode, and then 16-bit.  It takes as much space as
 663             # encoding as 32-bit, but offers more possibilities for
 664             # subsequent compressed encodings.  A compressor proper
 665             # would have to go back and change the encoding
 666             # afterwards, but wé re just counting.
 667             if previ is not 1:
 668                 print('\t\th.nop\t\t; 10-bit (tentative)')
 669                 count[4] += 1
 670                 comment = '16-bit (tentative)'
 671             else:
 672                 comment = '16-bit auto-back'
 673         elif nexti is 2:
 674             # We can use compressed encoding for the 16-imm nexti
 675             # after an uncompressed insn without penalty if it's the
 676             # single-insn uncompressed mode slot.  For other
 677             # configurations, we can either remain in uncompressed
 678             # mode, or switch to compressed mode with a 10-bit nop.
 679             if previ is not 1:
 680                 print('\t\th.nop\t\t; 10-bit (tentative)')
 681                 count[5] += 1
 682                 comment = '16-imm (tentative), vs uncompressed'
 683             else:
 684                 comment = '16-imm auto-back'
 685         elif nexti is 3:
 686             # If previ was 16-bit compressed, curi would be in the
 687             # single-insn uncompressed slot, so nexti could be encoded
 688             # as 16-bit, enabling another 1-insn uncompressed slot
 689             # after nexti that a 10-bit insn wouldn't, so make it so.
 690             if previ is 1:
 691                 nexti = 1
 692                 comment = '16-bit auto-back, vs 10-bit'
 693     elif curi is 1:
 694         # After a 16-bit insn, anything goes.  If it remains in 16-bit
 695         # mode, we can have 1 or 2 as nexti; if it returns to 32-bit
 696         # mode, we can have 0 or 3.  Using 1 instead of 3 makes room
 697         # for a subsequent single-insn compressed mode, so prefer
 698         # that.
 699         if nexti is 3:
 700             nexti = 1
 701             comment = '16-bit, vs 10-bit'
 702     elif curi is 2:
 703         # After a 16-imm insn, we can only switch back to uncompressed
 704         # mode with a 16-bit nop.
 705         if nexti is 0:
 706             if previ is 0:
 707                 print('\t\t\t\t; backtracking pair above to 32-bit')
 708                 count[7] += 1
 709                 curi = 0
 710             else:
 711                 print('\t\tc.nop\t\t; forced switch back to uncompressed mode')
 712                 count[6] += 1
 713                 previ = curi
 714                 curi = 1
 715         elif nexti is 3:
 716             nexti = 1
 717     elif curi is 3:
 718         # After a 10-bit insn, another insn that could be encoded as
 719         # 10-bit might as well be encoded as 16-bit, to make room for
 720         # a single-insn uncompressed insn afterwards.
 721         if nexti is 3:
 722             nexti = 1
 723             comment = '16-bit, vs 10-bit'
 724     else:
 725         raise "unknown mode for previious insn"
 726
 727     count[nexti] += 1
 728
 729     if comment is None:
 730         comment = comments[nexti]
 731     else:
 732         comment = '\t; ' + comment
 733
 734     print(line + comment)
 735
 736     previ = curi
 737     curi = nexti
 738
 739 transition_bytes = 2 * (count[4] + count[5] + count[6])
 740 compressed_bytes = 2 * (count[1] + count[2] + count[3])
 741 uncompressed_bytes = 4 * count[0]
 742 total_bytes = transition_bytes + compressed_bytes + uncompressed_bytes
 743 original_bytes = 2 * compressed_bytes + uncompressed_bytes
 744
 745 print()
 746 print('Summary')
 747 print('32-bit uncompressed instructions: %i' % count[0])
 748 print('16-bit compressed instructions: %i' % count[1])
 749 print('16-imm compressed-mode instructions: %i' % count[2])
 750 print('10-bit compressed instructions: %i' % count[3])
 751 print('10-bit mode-switching nops: %i' % count[4])
 752 print('10-bit mode-switching nops for imm-16: %i' % count[5])
 753 print('16-bit mode-switching nops after imm-16: %i' % count[6])
 754 print('10-bit nop+16-imm pairs above, backtracked to 32-bit: %i' % count[7])
 755 print('Compressed size estimate: %i' % total_bytes)
 756 print('Original size: %i' % original_bytes)
 757 print('Compressed/original ratio: %f' % (total_bytes / original_bytes))