2 # see https://bugs.libre-soc.org/show_bug.cgi?id=532
4 # Estimate ppc code compression with Libre-SOC encoding attempt v1.
7 # Copyright 2020 Alexandre Oliva
9 # This script is free software; you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation; either version 3, or (at your option)
14 # This script is distributed in the hope that it will be useful, but
15 # WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 # General Public License for more details.
19 # You should have received a copy of the GNU General Public License
20 # along with this script; see the file COPYING3. If not see
21 # <http://www.gnu.org/licenses/>.
23 # Skeleton originally by Alexandre Oliva <oliva@gnu.org>.
26 # Feed this script the output of objdump -M raw --no-show-raw-insn ppc-prog
28 # It will look for insns that can be represented in compressed mode,
29 # according to the encoding rules in the copcond dictionary below.
31 # Nothing is assumed as to the actual bit-encoding of the insns, this
32 # is just to experiment with insn selection and get a quick feedback
33 # loop for the encoding options in compressed mode.
35 # In this script, the computations of encoding modes and transitions
36 # are those for attempt 1 encoding, that encompasses:
38 # - a 16-bit insn (with 10-bit payload) that may switch to compressed
39 # mode or return to 32-bit mode;
41 # - 16-bit insns in compressed mode, each with 2 bits devoted to
42 # encoding one of the following possibilities:
44 # -- switch back to uncompressed mode at the next insn
46 # -- interpret the next insn in uncompressed mode, then return to
49 # -- remain in 16-bit mode for the next insn
51 # - a 16-bit immediate insn in compressed mode, that must be followed
52 # by another compressed insn
54 # At (visible) entry points, mode is forced to return to uncompressed
55 # mode. Every branch target must be in uncompressed mode as well, but
56 # this script does not enforce that.
58 # The entire code stream is printed, without any attempt to modify the
59 # addresses that go along with or in them; we only insert markers for
60 # the transition points, and for the compressed instructions.
62 # The really useful information is printed at the end: a summary of
63 # transition and compressed-insn counts, and the achieved compression
69 insn
= re
.compile('\s+(?P<addr>[0-9a-f]+):\s+(?P<opcode>[^ ]+) *(?P<operands>.*)')
71 # reg is a regkind (r, cr, fr) followed by a regnum
72 xreg
= '(?P<reg>(?P<regkind>[cf]?r)(?P<regnum>[0-9]+))'
74 # immediate is a sequence of digits, possibly preceded by a negative sign
75 ximm
= '(?P<immediate>-?[0-9]+)'
77 # branch is a branch target address; ignore an angle-bracketed label after it
78 xbrt
= '(?P<branch>[0-9a-f]+)(?: <.*>)?'
80 # offset is like immediate, but followed by a parenthesized basereg
81 xoff
= '(?P<offset>-?[0-9]+)\((?P<basereg>r[0-9]+)\)'
83 # creg is the cr, cond names one of its bits
84 crbit
= '(?:4\*(?P<creg>cr[0-7])\+)?(?P<cond>gt|lt|eq|so)'
86 # Combine the above into alternatives, to easily classify operands by
88 opkind
= re
.compile('|'.join([xreg
, ximm
, xbrt
, xoff
, crbit
]))
90 # Pre-parse and classify op into a mop, short for mapped op.
92 match
= opkind
.fullmatch(op
)
96 elif match
['reg'] is not None:
97 op
= (match
['regkind'], int(match
['regnum']), op
)
98 elif match
['immediate'] is not None:
99 op
= ('imm', int (op
).bit_length (), op
)
100 elif match
['branch'] is not None:
101 op
= ('pcoff', (int (match
['branch'], 16)
102 - int (addr
, 16)).bit_length (), op
, addr
)
103 elif match
['offset'] is not None:
104 op
= ('ofst', mapop(match
['offset']), mapop(match
['basereg']), op
)
105 elif match
['cond'] is not None:
106 if match
['creg'] is None:
110 op
= ('crbit', mapop(creg
), ('cond', match
['cond']), op
)
112 raise "unrecognized operand kind"
116 # Accessor to enable the mapop representation to change easily.
120 # Some opclass predicates, for the same reason.
122 return opclass(mop
) in { 'r', 'fr', 'cr' } \
123 or (opclass(mop
) is 'imm' and mop
[1] is 0)
125 return opclass(mop
) in { 'imm', 'pcoff' }
127 return opclass(mop
) is 'ofst'
129 return opclass(mop
) is 'crbit'
131 # Some more accessors.
133 # Return the reg number if mop fits regp.
136 or (immp(mop
) and mop
[1] is 0):
138 raise "operand is not a register"
143 raise "operand is not an immediate"
145 # Return the immediate length if mop fits immp.
149 raise "operand is not an immediate"
151 # Return the register sub-mop if mop fits rofp.
155 raise "operand is not an offset"
157 # Return the offset sub-opt if mop fits rofp.
161 raise "operand is not an offset"
163 # Return the register sub-mop if mop fits crbit.
167 raise "operand is not a condition register bit"
169 # Return the cond bit name if mop fits crbit.
173 raise "operand is not a condition register bit"
175 # Following are predicates to be used in copcond, to tell the mode in
176 # which opcode with ops as operands is to be represented.
178 # TODO: use insn_histogram.py to show the best targets
179 # (remember to exclude nop - ori r0,r0,0 as this skews numbers)
180 # Registers representable in a made-up 3-bit mapping.
181 # It must contain 0 for proper working of at least storex.
182 #cregs3 = { 0, 31, 1, 2, 3, 4, 5, 6, 7 }
183 cregs3
= { 0, 9, 3, 1, 2, 31, 10, 30, 4 }
184 # Ditto in a 2-bit mapping. It needs not contain 0, but it must be a
185 # subset of cregs3 for proper working of at least storex.
186 cregs2
= { 9, 3, 1, 2 }
187 # Use the same sets for FP for now.
190 ccregs2
= { 0, 1, 2, 3 }
192 # Return true iff mop is a regular register present in cregs2
194 return opclass(mop
) in { 'r', 'imm' } and regno(mop
) in cregs2
196 # Return true iff mop is a regular register present in cregs3
198 return opclass(mop
) in { 'r', 'imm' } and regno(mop
) in cregs3
200 # Return true iff mop is a floating-point register present in cfregs2
202 return opclass(mop
) is 'fr' and regno(mop
) in cfregs2
204 # Return true iff mop is a floating-point register present in cfregs3
206 return opclass(mop
) is 'fr' and regno(mop
) in cfregs3
208 # Return true iff mop is a condition register present in ccregs2
210 return opclass(mop
) is 'cr' and regno(mop
) in ccregs2
212 # Return true iff mop is an immediate of at most 8 bits.
214 return immp(mop
) and immbits(mop
) <= 8
216 # Return true iff mop is an immediate of at most 12 bits.
218 return immp(mop
) and immbits(mop
) <= 12
220 # Compress binary opcodes iff the first two operands (output and first
221 # input operand) are registers representable in 3 bits in compressed
222 # mode, and the immediate operand can be represented in 8 bits.
223 def bin2regs3imm8(opcode
, ops
):
224 if rcregs3(ops
[0]) and rcregs3(ops
[1]) and imm8(ops
[2]):
228 # Recognize do-nothing insns, particularly ori r0,r0,0.
229 def maybenop(opcode
, ops
):
230 if opcode
in ['ori', 'addi'] and regno(ops
[0]) is regno(ops
[1]) \
231 and opclass(ops
[0]) is 'r' and regno(ops
[0]) is 0 \
232 and imm8(ops
[2]) and immbits(ops
[2]) is 0:
236 # Recognize an unconditional branch, that can be represented with a
237 # 6-bit operand in 10-bit mode, an an additional 4 bits in 16-bit
238 # mode. In both cases, the offset is shifted left by 2 bits.
239 def uncondbranch(opcode
, ops
):
246 # 2 bits for RT and RA. RB is r0 in 10-bit, and 3 bits in 16-bit ???
247 # there's a general assumption that, if an insn can be represented in
248 # 10-bits, then it can also be represented in 16 bits. This will not
249 # be the case if cregs3 can't represent register 0. For
250 # register+offset addresses, we support 16-imm stdi, fstdi, with 3-bit
251 # immediates left-shifted by 3; stwi, fstsi, with 2-bit immediates
252 # left-shifted by 2; stdspi for 6-bit immediate left-shifted by 3
253 # biased by -256, and stwspi for 6-bit immediate left-shifted by 2
254 # also biased by -256. fstdi and fstsi store in memory a
255 # floating-point register, the others do a general-purpose register.
256 def storexaddr(opcode
, ops
):
257 # Canonicalize offset in ops[1] to reg, imm
259 ops
= (ops
[0], rofreg(ops
[1]), rofset(ops
[1]))
260 shift
= memshifts
[opcode
[-1]]
261 if immval(ops
[2]) & ((1 << shift
) - 1) is not 0:
263 if rcregs3(ops
[1]) and immbits(ops
[2]) <= shift
+ 3:
265 if regno(ops
[1]) is 1 and opclass(ops
[0]) is not 'fr' \
266 and (immval(ops
[2]) - 256).bit_length() <= shift
+ 6:
268 # Require offset 0 for compression of non-indexed form.
271 # If any of the registers is zero, and the other fits in cregs2,
273 if (rcregs2(ops
[1]) and regno(ops
[2]) is 0) \
274 or (regno(ops
[1]) is 0 and rcregs2(ops
[2])):
276 # For 16-bit one must fit rcregs2 and the other rcregs3.
277 if (rcregs2(ops
[1]) and rcregs3(ops
[2])) \
278 or (rcregs3(ops
[1]) and rcregs2(ops
[2])):
281 def rstorex(opcode
, ops
):
283 return storexaddr(opcode
, ops
)
285 def frstorex(opcode
, ops
):
287 return storexaddr(opcode
, ops
)
290 memshifts
= { 'd': 3, 'w': 2, 'z': 2, 's': 2 }
292 # 3 bits for RA, 3 bits for RB, 3 bits for RT for 16-bit. for 10-bit,
293 # RB and RT must match. ??? It's not clear what that means WRT
294 # register mapping of different kinds of registers, e.g. when RT is a
295 # floating-point register..
296 # For register+offset addresses, we support 16-imm ldi, fldi, with
297 # 3-bit immediates left-shifted by 3; lwi, flsi, with 2-bit immediates
298 # left-shifted by 2; ldspi for 6-bit immediate left-shifted by 3
299 # biased by -256, and lwspi for 6-bit immediate left-shifted by 2 also
300 # biased by -256. fldi and flsi load to floating-point registers, the
301 # others load to general-purpose registers.
302 def loadxaddr(opcode
, ops
):
304 ops
= (ops
[0], rofreg(ops
[1]), rofset(ops
[1]))
305 shift
= memshifts
[opcode
[-1]]
306 if immval(ops
[2]) & ((1 << shift
) - 1) is not 0:
308 if rcregs3(ops
[1]) and immbits(ops
[2]) <= shift
+ 3:
310 if regno(ops
[1]) is 1 and opclass(ops
[0]) is not 'fr' \
311 and (immval(ops
[2]) - 256).bit_length() <= shift
+ 6:
313 # Otherwise require offset 0 for compression of non-indexed form.
316 if rcregs3(ops
[1]) and rcregs3(ops
[2]):
317 if regno(ops
[0]) in { regno(ops
[1]), regno(ops
[2]) }:
321 def rloadx(opcode
, ops
):
323 return loadxaddr(opcode
, ops
)
325 def frloadx(opcode
, ops
):
327 return loadxaddr(opcode
, ops
)
330 # 3 bits for RA, 3 bits for RB, 3 bits for RT for 16-bit. for 10-bit,
331 # RB and RT must match. RA must not be zero, but in 16-bit mode we
332 # can swap RA and RB to make it fit.
333 def addop(opcode
, ops
):
334 if rcregs3(ops
[0]) and rcregs3(ops
[1]) and rcregs3(ops
[2]):
335 if regno(ops
[0]) in { regno(ops
[1]), regno(ops
[2]) }:
337 if regno(ops
[1]) is not 0 or regno(ops
[2]) is not 0:
341 # 3 bits for RA, 3 bits for RB, 3 bits for RT for 16-bit. for 10-bit,
342 # RA and RT must match. ??? The spec says RB, but the actual opcode
343 # is subf., subtract from, and it subtracts RA from RB. 'neg.' would
344 # make no sense as described there if we didn't use RA.
345 def subfop(opcode
, ops
):
346 if rcregs3(ops
[0]) and rcregs3(ops
[1]) and rcregs3(ops
[2]):
347 if regno(ops
[0]) is regno(ops
[1]):
351 def negop(opcode
, ops
):
352 if rcregs3(ops
[0]) and rcregs3(ops
[1]):
356 # 3 bits for RA and 3 bits for RB. L (op1) must be 1 for 10-bit.
357 # op0 is a cr, must be zero for 10-bit.
358 def cmpop(opcode
, ops
):
359 if rcregs3(ops
[2]) and rcregs3(ops
[3]):
360 if regno(ops
[0]) is 0 and immval(ops
[1]) is 1:
365 # 3 bits for RS, 3 bits for RB, 3 bits for RS, 16-bit only.
366 def sldop(opcode
, ops
):
367 if rcregs3(ops
[0]) and rcregs3(ops
[1]) and rcregs3(ops
[2]):
370 # same as sld, except RS must be nonzero.
371 def srdop(opcode
, ops
):
372 if regno(ops
[1]) is not 0:
373 return sldop(opcode
, ops
)
375 # same as sld, except RS is given by RA, so they must be the same.
376 def sradop(opcode
, ops
):
377 if regno(ops
[0]) is regno(ops
[1]):
378 return sldop(opcode
, ops
)
381 # binary logical ops: and, nand, or, nor.
382 # 3 bits for RA (nonzero), 3 bits for RB, 3 bits for RT in 16-bit mode.
383 # RT is implicitly RB in 10-bit mode.
384 def binlog1016ops(opcode
, ops
):
385 if rcregs3(ops
[0]) and rcregs3(ops
[1]) and rcregs3(ops
[2]) \
386 and regno(ops
[1]) is not 0:
387 # mr RT, RB AKA or RT, RB, RB takes the 10-bit encoding
388 # of the 16-bit nor; we've already ruled out r0 as RB above.
389 if regno(ops
[0]) is regno(ops
[2]) and opcode
is not 'nor':
391 # or and and, with two identical inputs, stand for mr.
392 # nor and nand, likewise, stand for not, that has its
393 # own unary 10-bit encoding.
394 if regno(ops
[1]) is regno(ops
[2]):
398 # 3 bits for RB, 3 bits for RT in 16-bit mode.
399 # RT is implicitly RB in 10-bit mode.
400 def unlog1016ops(opcode
, ops
):
401 if rcregs3(ops
[0]) and rcregs3(ops
[1]):
402 if regno(ops
[0]) is regno(ops
[1]):
406 # 16-bit only logical ops; no 10-bit encoding available
407 # same constraints as the 1016 ones above.
408 def binlog16ops(opcode
, ops
):
409 if rcregs3(ops
[0]) and rcregs3(ops
[1]) and rcregs3(ops
[2]) \
410 and regno(ops
[1]) is not 0:
413 def unlog16ops(opcode
, ops
):
414 if rcregs3(ops
[0]) and rcregs3(ops
[1]):
418 # binary floating-point ops
419 # 3 bits for FRA (nonzero), 3 bits for FRB, 3 bits for FRT in 16-bit mode.
420 # FRT is implicitly FRB in 10-bit mode.
421 def binfp1016ops(opcode
, ops
):
422 if rcfregs3(ops
[0]) and rcfregs3(ops
[1]) and rcfregs3(ops
[2]) \
423 and regno(ops
[1]) is not 0:
424 if regno(ops
[0]) is regno(ops
[2]):
428 def unfp1016ops(opcode
, ops
):
429 if rcfregs3(ops
[0]) and rcfregs3(ops
[1]):
430 if regno(ops
[0]) is regno(ops
[1]):
434 def binfp16ops(opcode
, ops
):
435 if rcfregs3(ops
[0]) and rcfregs3(ops
[1]) and rcfregs3(ops
[2]) \
436 and regno(ops
[1]) is not 0:
439 def unfp16ops(opcode
, ops
):
440 if rcfregs3(ops
[0]) and rcfregs3(ops
[1]):
444 def cnvfp16ops(opcode
, ops
):
445 if rcfregs2(ops
[0]) and rcfregs2(ops
[1]):
449 # Move between CRs. 3 bits for destination, 3 bits for source in
450 # 16-bit mode. That covers all possibilities. For 10-bit mode, only
451 # 2 bits for destination.
452 def mcrfop(opcode
, ops
):
456 # Logical ops between two CRs into one. 2 bits for destination, that
457 # must coincide with one of the inputs, 3 bits for the other input.
459 def crops(opcode
, ops
):
460 if rccregs2(ops
[0]) and regno(ops
[0]) is regno(ops
[1]):
464 # 3 bits for general-purpose register; immediate identifies the
465 # special purpose register to move to: 8 for lr, 9 for ctr. 16-bit
466 # only. mtspr imm,rN moves from rN to the spr; mfspr rN,imm moves
468 def mtsprops(opcode
, ops
):
469 if immval(ops
[0]) in (8, 9) and rcregs3(ops
[1]):
472 def mfsprops(opcode
, ops
):
473 if immval(ops
[1]) in (8, 9) and rcregs3(ops
[0]):
477 # 3 bits for nonzero general-purpose register; the immediate is a
478 # per-CR mask (8-bits). mtcr rN is mtcrf 0xFF, rN. mfcr rN is a raw
479 # opcode, not an alias.
480 def mtcrfops(opcode
, ops
):
481 if immval(ops
[0]) is 255 and rcregs3(ops
[1]) and regno(ops
[1]) is not 0:
484 def mfcrops(opcode
, ops
):
485 if rcregs3(ops
[0]) and regno(ops
[0]) is not 0:
489 # 3 bits for destination and source register, must be the same. Full
490 # shift range fits. 16-imm format.
491 def shiftops(opcode
, ops
):
492 if rcregs3(ops
[0]) and regno(ops
[0]) is regno(ops
[1]):
496 # For 16-imm 'addis' and 'addi', we have 3 bits (nonzero) for the
497 # destination register, source register is implied 0, the immediate
498 # must either fit in signed 5-bit, left-shifted by 3, or in signed
499 # 7-bit without shift. ??? That seems backwards.
500 def addiops(opcode
, ops
):
501 if rcregs3(ops
[0]) and regno(ops
[0]) is not 0 \
502 and regno(ops
[1]) is 0 and imm8(ops
[2]) \
503 and immbits(ops
[2]) <= 8 \
504 and ((immval(ops
[2]) & 7) is 0 or immbits(ops
[2]) <= 7):
506 return maybenop(opcode
, ops
)
508 # cmpdi and cmpwi are aliases to uncompressed cmp CR#, L, RA, imm16,
509 # CR# being the target condition register, L being set for d rather
510 # than w. In 16-imm, CR# must be zero, RA must fit in 3 bits, and the
511 # immediate must be 6 bits signed.
512 def cmpiops(opcode
, ops
):
513 if regno(ops
[0]) is 0 and immval(ops
[1]) in (0,1) \
514 and rcregs3(ops
[2]) and immbits(ops
[3]) <= 6:
518 # 16-imm bc, with or without LK, uses 3 bits for BI (CR0 and CR1 only),
519 # and 1 bit for BO1 (to tell BO 12 from negated 4).
520 def bcops(opcode
, ops
):
521 if immval(ops
[0]) in (4,12) and regno(crbtreg(ops
[1])) <= 1 \
522 and immbits(ops
[2]) <= 8:
526 # 2 bits for BI and 3 bits for BO in 10-bit encoding; one extra bit
527 # for each in 16-bit.
528 def bclrops(opcode
, ops
):
529 if immval(ops
[0]) <= 15 and regno(crbtreg(ops
[1])) <= 1 \
530 and immbits(ops
[2]) is 0:
531 if immval(ops
[0]) <= 7 and regno(crbtreg(ops
[1])) is 0:
536 # Map opcodes that might be compressed to a function that returns the
537 # best potential encoding kind for the insn, per the numeric coding
541 # 'attn': binutils won't ever print this
542 'b': uncondbranch
, 'bl': uncondbranch
,
543 'bc': bcops
, 'bcl': bcops
,
544 'bclr': bclrops
, 'bclrl': bclrops
,
545 # Stores and loads, including 16-imm ones
546 'stdx': rstorex
, 'stwx': rstorex
,
547 'std': rstorex
, 'stw': rstorex
, # only offset zero
548 'stfdx': frstorex
, 'stfsx': frstorex
,
549 'stfd': frstorex
, 'stfs': frstorex
, # only offset zero
550 # Assuming lwz* rather than lwa*.
551 'ldx': rloadx
, 'lwzx': rloadx
,
552 'ld': rloadx
, 'lwz': rloadx
, # only offset zero
553 'lfdx': rloadx
, 'lfsx': rloadx
,
554 'lfd': rloadx
, 'lfs': rloadx
, # only offset zero
556 'subf.': subfop
, 'neg.': negop
,
557 # Assuming cmpl stands for cmpd, i.e., cmp with L=1.
558 # cmpw is cmp with L=0, 16-bit only.
560 'sld.': sldop
, 'srd.': srdop
, 'srad.': sradop
,
561 'and': binlog1016ops
, 'nand': binlog1016ops
,
562 'or': binlog1016ops
, 'nor': binlog1016ops
,
563 # assuming popcnt and cntlz mean the *d opcodes.
564 'popcntd': unlog1016ops
, 'cntlzd': unlog1016ops
, 'extsw': unlog1016ops
,
565 # not RT, RB is mapped to nand/nor RT, RB, RB.
566 'xor': binlog16ops
, 'eqv': binlog16ops
,
567 # 'setvl.': unlog16ops, # ??? What's 'setvl.'?
568 # assuming cnttz mean the *d opcode.
569 'cnttzd': unlog16ops
, 'extsb': unlog16ops
, 'extsh': unlog16ops
,
570 'fsub.': binfp1016ops
, 'fadd': binfp1016ops
, 'fmul': binfp1016ops
,
571 'fneg.': unfp1016ops
,
573 'fabs.': unfp16ops
, 'fmr.': unfp16ops
,
574 # ??? are these the intended fp2int and int2fp, for all
575 # combinations of signed/unsigned float/double?
576 'fcfid': cnvfp16ops
, 'fctidz': cnvfp16ops
,
577 'fcfidu': cnvfp16ops
, 'fctiduz': cnvfp16ops
,
578 'fcfids': cnvfp16ops
, 'fctiwz': cnvfp16ops
,
579 'fcfidus': cnvfp16ops
, 'fctiwuz': cnvfp16ops
,
580 # Condition register opcodes.
591 # 'cbank' is not a ppc opcode, not handled
592 'mtspr': mtsprops
, # raw opcode for 'mtlr', 'mtctr'
593 'mfspr': mfsprops
, # raw opcode for 'mflr', 'mfctr'
594 'mtcrf': mtcrfops
, # raw opcode for 'mtcr'
597 'sradi.': shiftops
, 'srawi.': shiftops
,
599 'cmpi': cmpiops
, # raw opcode for 'cmpwi', 'cmpdi'
600 # 'setvli', 'setmvli' are not ppc opcodes, not handled.
603 # We have 4 kinds of insns:
605 # 0: uncompressed; leave input insn unchanged
606 # 1: 16-bit compressed, only in compressed mode
607 # 2: 16-imm, i.e., compressed insn that can't switch-out of compressed mode
608 # 3: 10-bit compressed, may switch to compressed mode
610 # count[0:3] count the occurrences of the base kinds.
611 # count[4] counts extra 10-bit nop-switches to compressed mode,
612 # tentatively introduced before insns that can be 16-bit encoded.
613 # count[5] counts extra 10-bit nop-switches to compressed mode,
614 # tentatively introduced before insns that can be 16-imm encoded.
615 # count[6] counts extra 16-bit nop-switches back to uncompressed,
616 # introduced after a 16-imm insn.
617 # count[7] counts pairs of 10-bit nop-switches and 16-imm insns
618 # that turned out to be followed by 32-bit insns. We assume
619 # a compressor would backtrack the pair into as a single 32-bit
620 # insn, so as to avoid a switch-back nop. The nop and 16-imm
621 # insns remain counted as such, so we count these occurrences
623 count
= [0,0,0,0,0,0,0,0]
624 # Default comments for the insn kinds above.
625 comments
= ['', '\t; 16-bit', '\t; 16-imm', '\t; 10-bit']
627 # curi stands for the insn kind that we read and processed in the
628 # previous iteration of the loop, and previ is the one before it. the
629 # one we're processing in the current iteration will be stored in
630 # nexti until we make it curi at the very end of the loop.
633 for line
in sys
.stdin
:
637 match
= insn
.fullmatch(line
)
640 # Switch to uncompressed mode at function boundaries
645 opcode
= match
['opcode']
646 operands
= match
['operands']
648 if opcode
in copcond
:
649 nexti
= copcond
[opcode
](opcode
,
650 [mapop(op
) for op
in operands
.split(',')])
658 True # Uncompressed mode for good.
660 # If curi was not a single uncompressed mode insn,
661 # tentatively encode a 10-bit nop to enter compressed
662 # mode, and then 16-bit. It takes as much space as
663 # encoding as 32-bit, but offers more possibilities for
664 # subsequent compressed encodings. A compressor proper
665 # would have to go back and change the encoding
666 # afterwards, but wé re just counting.
668 print('\t\th.nop\t\t; 10-bit (tentative)')
670 comment
= '16-bit (tentative)'
672 comment
= '16-bit auto-back'
674 # We can use compressed encoding for the 16-imm nexti
675 # after an uncompressed insn without penalty if it's the
676 # single-insn uncompressed mode slot. For other
677 # configurations, we can either remain in uncompressed
678 # mode, or switch to compressed mode with a 10-bit nop.
680 print('\t\th.nop\t\t; 10-bit (tentative)')
682 comment
= '16-imm (tentative), vs uncompressed'
684 comment
= '16-imm auto-back'
686 # If previ was 16-bit compressed, curi would be in the
687 # single-insn uncompressed slot, so nexti could be encoded
688 # as 16-bit, enabling another 1-insn uncompressed slot
689 # after nexti that a 10-bit insn wouldn't, so make it so.
692 comment
= '16-bit auto-back, vs 10-bit'
694 # After a 16-bit insn, anything goes. If it remains in 16-bit
695 # mode, we can have 1 or 2 as nexti; if it returns to 32-bit
696 # mode, we can have 0 or 3. Using 1 instead of 3 makes room
697 # for a subsequent single-insn compressed mode, so prefer
701 comment
= '16-bit, vs 10-bit'
703 # After a 16-imm insn, we can only switch back to uncompressed
704 # mode with a 16-bit nop.
707 print('\t\t\t\t; backtracking pair above to 32-bit')
711 print('\t\tc.nop\t\t; forced switch back to uncompressed mode')
718 # After a 10-bit insn, another insn that could be encoded as
719 # 10-bit might as well be encoded as 16-bit, to make room for
720 # a single-insn uncompressed insn afterwards.
723 comment
= '16-bit, vs 10-bit'
725 raise "unknown mode for previious insn"
730 comment
= comments
[nexti
]
732 comment
= '\t; ' + comment
734 print(line
+ comment
)
739 transition_bytes
= 2 * (count
[4] + count
[5] + count
[6])
740 compressed_bytes
= 2 * (count
[1] + count
[2] + count
[3])
741 uncompressed_bytes
= 4 * count
[0]
742 total_bytes
= transition_bytes
+ compressed_bytes
+ uncompressed_bytes
743 original_bytes
= 2 * compressed_bytes
+ uncompressed_bytes
747 print('32-bit uncompressed instructions: %i' % count
[0])
748 print('16-bit compressed instructions: %i' % count
[1])
749 print('16-imm compressed-mode instructions: %i' % count
[2])
750 print('10-bit compressed instructions: %i' % count
[3])
751 print('10-bit mode-switching nops: %i' % count
[4])
752 print('10-bit mode-switching nops for imm-16: %i' % count
[5])
753 print('16-bit mode-switching nops after imm-16: %i' % count
[6])
754 print('10-bit nop+16-imm pairs above, backtracked to 32-bit: %i' % count
[7])
755 print('Compressed size estimate: %i' % total_bytes
)
756 print('Original size: %i' % original_bytes
)
757 print('Compressed/original ratio: %f' % (total_bytes
/ original_bytes
))