From a09a9e4cd0f172b4516e1ec211f613f6fee73467 Mon Sep 17 00:00:00 2001 From: Alexandre Oliva Date: Mon, 7 Dec 2020 04:43:12 -0300 Subject: [PATCH] comp16-v1-skel.py: fixed 16-imm logic, implemented rules for most opcodes --- lxo/ChangeLog | 19 ++ openpower/sv/comp16-v1-skel.py | 480 +++++++++++++++++++++++++++------ 2 files changed, 417 insertions(+), 82 deletions(-) diff --git a/lxo/ChangeLog b/lxo/ChangeLog index 2f252d4e8..3e3d2fced 100644 --- a/lxo/ChangeLog +++ b/lxo/ChangeLog @@ -1,3 +1,22 @@ +2020-12-06 + + * 532: Adjusted the logic in comp16-v1-skel.py for 16-bit 16-imm + rather than the 16+16 I'd invented. Implemented the most relevant + opcodes for 10-bit, and many of the 16-bit ones too. Not yet + implemented are conditional branches, Immediate, CR and System + opcodes. With all of nop, unconditional branch, ld/st, + arithmetic, logical and floating-point, we get less than 3% + compression in GCC, with not-entirely-unreasonable reg subsets. + It's not looking good. (8:27) + +2020-12-02 + + * Microwatts meeting. + * 238: Added some thoghts on bl and blr, and implications about + modes. Also detailed my worries about how to preserve dynamic + state, specifically switch-back-to-compressed-after-insn, across + interrupts. (1:44) + 2020-11-30 * 238: Settled the N-without-M issue, it was likely an error in diff --git a/openpower/sv/comp16-v1-skel.py b/openpower/sv/comp16-v1-skel.py index 4e0d64006..62627472c 100644 --- a/openpower/sv/comp16-v1-skel.py +++ b/openpower/sv/comp16-v1-skel.py @@ -4,6 +4,8 @@ # Estimate ppc code compression with Libre-SOC encoding attempt v1. +# Copyright 2020 Alexandre Oliva + # This script is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) @@ -46,12 +48,12 @@ # -- remain in 16-bit mode for the next insn -# -- take the 16 bits that would be the next compressed insn as an -# extension to the present 16-bit insn, and remain in 16-bit mode for -# the subsequent 16-bits +# - a 16-bit immediate insn in compressed mode, that must be followed +# by another compressed insn # At (visible) entry points, mode is forced to return to uncompressed -# mode. +# mode. Every branch target must be in uncompressed mode as well, but +# this script does not enforce that. # The entire code stream is printed, without any attempt to modify the # addresses that go along with or in them; we only insert markers for @@ -66,8 +68,23 @@ import re insn = re.compile('\s+(?P[0-9a-f]+):\s+(?P[^ ]+) *(?P.*)') -opkind = re.compile('(?P(?P[cf]?r)(?P[0-9]+))|(?P-?[0-9]+)|(?P[0-9a-f]+)(?: <.*>)?|(?P-?[0-9]+)\((?Pr[0-9]+)\)') +# reg is a regkind (r, cr, fr) followed by a regnum +xreg = '(?P(?P[cf]?r)(?P[0-9]+))' + +# immediate is a sequence of digits, possibly preceded by a negative sign +ximm = '(?P-?[0-9]+)' + +# branch is a branch target address; ignore an angle-bracketed label after it +xbrt = '(?P[0-9a-f]+)(?: <.*>)?' +# offset is like immediate, but followed by a parenthesized basereg +xoff = '(?P-?[0-9]+)\((?Pr[0-9]+)\)' + +# Combine the above into alternatives, to easily classify operands by +# pattern matching. +opkind = re.compile('|'.join([xreg, ximm, xbrt, xoff])) + +# Pre-parse and classify op into a mop, short for mapped op. def mapop(op): match = opkind.fullmatch(op) @@ -82,87 +99,384 @@ def mapop(op): - int (addr, 16)).bit_length (), op, addr) elif match['offset'] is not None: op = ('ofst', mapop(match['offset']), mapop(match['basereg']), op) + # FIXME: cr exprs not handled else: raise "unrecognized operand kind" return op +# Accessor to enable the mapop representation to change easily. def opclass(mop): return mop[0] + +# Some opclass predicates, for the same reason. +def regp(mop): + return opclass(mop) in { 'r', 'fr', 'cr' } \ + or (opclass(mop) is 'imm' and mop[1] is 0) +def immp(mop): + return opclass(mop) in { 'imm', 'pcoff' } +def rofp(mop): + return opclass(mop) is 'ofst' + +# Some more accessors. + +# Return the reg number if mop fits regp. def regno(mop): - if mop[0] in { 'r', 'fr', 'cr' }: + if regp(mop) \ + or (immp(mop) and mop[1] is 0): return mop[1] - else: - raise "operand is not a register" + raise "operand is not a register" +def immval(mop): + if immp(mop): + return int(mop[2]) + raise "operand is not an immediate" + +# Return the immediate length if mop fits immp. def immbits(mop): - if mop[0] is 'imm': + if immp(mop): return mop[1] - else: - raise "operand is not an immediate" + raise "operand is not an immediate" -# Following are predicates to be used in copcond, to tell the mode in -# which opcode with ops as operands is to be represented +# Return the register sub-mop if mop fits rofp. +def rofreg(mop): + if rofp(mop): + return mop[2] + raise "operand is not an offset" -# Any occurrence of the opcode can be compressed. -def anyops(opcode, ops): - return 1 +# Return the offset sub-opt if mop fits rofp. +def rofset(mop): + if rofp(mop): + return mop[1] + raise "operand is not an offset" -# Compress iff first and second operands are the same. -def same01(opcode, ops): - if ops[0] == ops[1]: - return 1 - else: - return 0 +# Following are predicates to be used in copcond, to tell the mode in +# which opcode with ops as operands is to be represented. -# Registers representable in a made-up 3-bit mapping. # TODO: use insn_histogram.py to show the best targets # (remember to exclude nop - ori r0,r0,0 as this skews numbers) -cregs2 = { 1, 2, 3, 4, 5, 6, 7, 31 } +# Registers representable in a made-up 3-bit mapping. +# It must contain 0 for proper working of at least storex. +cregs3 = { 0, 31, 1, 2, 3, 4, 5, 6, 7 } +# Ditto in a 2-bit mapping. It needs not contain 0, but it must be a +# subset of cregs3 for proper working of at least storex. +cregs2 = { 2, 3, 4, 5 } +# Use the same sets for FP for now. +cfregs3 = cregs3 +cfregs2 = cregs2 # Return true iff mop is a regular register present in cregs2 -def bin2regs3(mop): - return opclass(mop) is 'r' and regno(mop) in cregs2 +def rcregs2(mop): + return opclass(mop) in { 'r', 'imm' } and regno(mop) in cregs2 + +# Return true iff mop is a regular register present in cregs3 +def rcregs3(mop): + return opclass(mop) in { 'r', 'imm' } and regno(mop) in cregs3 + +# Return true iff mop is a floating-point register present in cfregs2 +def rcfregs2(mop): + return opclass(mop) is 'fr' and regno(mop) in cfregs2 + +# Return true iff mop is a floating-point register present in cfregs3 +def rcfregs3(mop): + return opclass(mop) is 'fr' and regno(mop) in cfregs3 + # Return true iff mop is an immediate of at most 8 bits. def imm8(mop): - return opclass(mop) is 'imm' and immbits(mop) <= 8 + return immp(mop) and immbits(mop) <= 8 + +# Return true iff mop is an immediate of at most 12 bits. +def imm12(mop): + return immp(mop) and immbits(mop) <= 12 # Compress binary opcodes iff the first two operands (output and first # input operand) are registers representable in 3 bits in compressed # mode, and the immediate operand can be represented in 8 bits. def bin2regs3imm8(opcode, ops): - if bin2regs3(ops[0]) and bin2regs3(ops[1]) and imm8(ops[2]): + if rcregs3(ops[0]) and rcregs3(ops[1]) and imm8(ops[2]): return 1 - else: - return 0 + return 0 + +# Recognize do-nothing insns, particularly ori r0,r0,0. +def maybenop(opcode, ops): + if opcode in ['ori', 'addi'] and regno(ops[0]) is regno(ops[1]) \ + and ops[0][0] is 'r' and regno(ops[0]) is 0 \ + and imm8(ops[2]) and immbits(ops[2]) is 0: + return 3 + return 0 + +# Recognize an unconditional branch, that can be represented with a +# 6-bit operand in 10-bit mode, an an additional 4 bits in 16-bit +# mode. In both cases, the offset is shifted left by 2 bits. +def uncondbranch(opcode, ops): + if imm8(ops[0]): + return 3 + if imm12(ops[0]): + return 1 + return 0 + +# 2 bits for RT and RA. RB is r0 in 10-bit, and 3 bits in 16-bit +# ??? there's a general assumption that, if an insn can be represented +# in 10-bits, then it can also be represented in 10 bits. +# This will not be the case if cregs3 can't represent register 0. +def storexaddr(opcode, ops): + # Canonicalize offset in ops[1] to reg, imm + if rofp(ops[1]): + ops = (ops[0], rofreg(ops[1]), rofset(ops[1])) + # Require offset 0 for compression of non-indexed form. + if not regp(ops[2]): + return 0 + # If any of the registers is zero, and the other fits in cregs2, + # it fits in 10-bit. + if (rcregs2(ops[1]) and regno(ops[2]) is 0) \ + or (regno(ops[1]) is 0 and rcregs2(ops[2])): + return 3 + # For 16-bit one must fit rcregs2 and the other rcregs3. + if (rcregs2(ops[1]) and rcregs3(ops[2])) \ + or (rcregs3(ops[1]) and rcregs2(ops[2])): + return 1 + return 0 +def rstorex(opcode, ops): + if rcregs2(ops[0]): + return storexaddr(opcode, ops) + return 0 +def frstorex(opcode, ops): + if rcfregs2(ops[0]): + return storexaddr(opcode, ops) + return 0 + +# 3 bits for RA, 3 bits for RB, 3 bits for RT for 16-bit. for 10-bit, +# RB and RT must match. ??? It's not clear what that means WRT +# register mapping of different kinds of registers, e.g. when RT is a +# floating-point register.. +def loadxaddr(opcode, ops): + if rofp(ops[1]): + ops = (ops[0], rofreg(ops[1]), rofset(ops[1])) + # Require offset 0 for compression of non-indexed form. + if not regp(ops[2]): + return 0 + if rcregs3(ops[1]) and rcregs3(ops[2]): + if regno(ops[0]) in { regno(ops[1]), regno(ops[2]) }: + return 3 + return 1 + return 0 +def rloadx(opcode, ops): + if rcregs3(ops[0]): + return loadxaddr(opcode, ops) + return 0 +def frloadx(opcode, ops): + if rcfregs3(ops[0]): + return loadxaddr(opcode, ops) + return 0 + +# 3 bits for RA, 3 bits for RB, 3 bits for RT for 16-bit. for 10-bit, +# RB and RT must match. RA must not be zero, but in 16-bit mode we +# can swap RA and RB to make it fit. +def addop(opcode, ops): + if rcregs3(ops[0]) and rcregs3(ops[1]) and rcregs3(ops[2]): + if regno(ops[0]) in { regno(ops[1]), regno(ops[2]) }: + return 3 + if regno(ops[1]) is not 0 or regno(ops[2]) is not 0: + return 1 + return 0 + +# 3 bits for RA, 3 bits for RB, 3 bits for RT for 16-bit. for 10-bit, +# RA and RT must match. ??? The spec says RB, but the actual opcode +# is subf., subtract from, and it subtracts RA from RB. 'neg.' would +# make no sense as described there if we didn't use RA. +def subfop(opcode, ops): + if rcregs3(ops[0]) and rcregs3(ops[1]) and rcregs3(ops[2]): + if regno(ops[0]) is regno(ops[1]): + return 3 + return 1 + return 0 +def negop(opcode, ops): + if rcregs3(ops[0]) and rcregs3(ops[1]): + return 3 + return 0 + +# 3 bits for RA and 3 bits for RB. L (op1) must be 1 for 10-bit. +# op0 is a cr, must be zero for 10-bit. +def cmpop(opcode, ops): + if rcregs3(ops[2]) and rcregs3(ops[3]): + if regno(ops[0]) is 0 and immval(ops[1]) is 1: + return 3 + return 1 + return 0 + +# 3 bits for RS, 3 bits for RB, 3 bits for RS, 16-bit only. +def sldop(opcode, ops): + if rcregs3(ops[0]) and rcregs3(ops[1]) and rcregs3(ops[2]): + return 1 + return 0 +# same as sld, except RS must be nonzero. +def srdop(opcode, ops): + if regno(ops[1]) is not 0: + return sldop(opcode, ops) + return 0 +# same as sld, except RS is given by RA, so they must be the same. +def sradop(opcode, ops): + if regno(ops[0]) is regno(ops[1]): + return sldop(opcode, ops) + return 0 + +# binary logical ops: and, nand, or, nor. +# 3 bits for RA (nonzero), 3 bits for RB, 3 bits for RT in 16-bit mode. +# RT is implicitly RB in 10-bit mode. +def binlog1016ops(opcode, ops): + if rcregs3(ops[0]) and rcregs3(ops[1]) and rcregs3(ops[2]) \ + and regno(ops[1]) is not 0: + # mr RT, RB AKA or RT, RB, RB takes the 10-bit encoding + # of the 16-bit nor; we've already ruled out r0 as RB above. + if regno(ops[0]) is regno(ops[2]) and opcode is not 'nor': + return 3 + # or and and, with two identical inputs, stand for mr. + # nor and nand, likewise, stand for not, that has its + # own unary 10-bit encoding. + if regno(ops[1]) is regno(ops[2]): + return 3 + return 1 + return 0 +# 3 bits for RB, 3 bits for RT in 16-bit mode. +# RT is implicitly RB in 10-bit mode. +def unlog1016ops(opcode, ops): + if rcregs3(ops[0]) and rcregs3(ops[1]): + if regno(ops[0]) is regno(ops[1]): + return 3 + return 1 + return 0 +# 16-bit only logical ops; no 10-bit encoding available +# same constraints as the 1016 ones above. +def binlog16ops(opcode, ops): + if rcregs3(ops[0]) and rcregs3(ops[1]) and rcregs3(ops[2]) \ + and regno(ops[1]) is not 0: + return 1 + return 0 +def unlog16ops(opcode, ops): + if rcregs3(ops[0]) and rcregs3(ops[1]): + return 1 + return 0 + +# binary floating-point ops +# 3 bits for FRA (nonzero), 3 bits for FRB, 3 bits for FRT in 16-bit mode. +# FRT is implicitly FRB in 10-bit mode. +def binfp1016ops(opcode, ops): + if rcfregs3(ops[0]) and rcfregs3(ops[1]) and rcfregs3(ops[2]) \ + and regno(ops[1]) is not 0: + if regno(ops[0]) is regno(ops[2]): + return 3 + return 1 + return 0 +def unfp1016ops(opcode, ops): + if rcfregs3(ops[0]) and rcfregs3(ops[1]): + if regno(ops[0]) is regno(ops[1]): + return 3 + return 1 + return 0 +def binfp16ops(opcode, ops): + if rcfregs3(ops[0]) and rcfregs3(ops[1]) and rcfregs3(ops[2]) \ + and regno(ops[1]) is not 0: + return 1 + return 0 +def unfp16ops(opcode, ops): + if rcfregs3(ops[0]) and rcfregs3(ops[1]): + return 1 + return 0 + +def cnvfp16ops(opcode, ops): + if rcfregs2(ops[0]) and rcfregs2(ops[1]): + return 1 + return 0 # Map opcodes that might be compressed to a function that returns the # best potential encoding kind for the insn, per the numeric coding # below. copcond = { - + 'ori': maybenop, 'addi': maybenop, + # 'attn': binutils won't ever print this + 'b': uncondbranch, 'bl': uncondbranch, + # 'bc', 'bcl': only in 16-imm mode, not implemented yet + # 'bclr', 'bclrl': available in 10- or 16-bit, not implemented yet + # 16-imm opcodes not implemented yet. + 'stdx': rstorex, 'stwx': rstorex, + 'std': rstorex, 'stw': rstorex, # only offset zero + 'stfdx': frstorex, 'stfsx': frstorex, + 'stfd': frstorex, 'stfs': frstorex, # only offset zero + # Assuming lwz* rather than lwa*. + 'ldx': rloadx, 'lwzx': rloadx, + 'ld': rloadx, 'lwz': rloadx, # only offset zero + 'lfdx': rloadx, 'lfsx': rloadx, + 'lfd': rloadx, 'lfs': rloadx, # only offset zero + 'add': addop, + 'subf.': subfop, 'neg.': negop, + # Assuming cmpl stands for cmpd, i.e., cmp with L=1. + # cmpw is cmp with L=0, 16-bit only. + 'cmp': cmpop, + 'sld.': sldop, 'srd.': srdop, 'srad.': sradop, + 'and': binlog1016ops, 'nand': binlog1016ops, + 'or': binlog1016ops, 'nor': binlog1016ops, + # assuming popcnt and cntlz mean the *d opcodes. + 'popcntd': unlog1016ops, 'cntlzd': unlog1016ops, 'extsw': unlog1016ops, + # not RT, RB is mapped to nand/nor RT, RB, RB. + 'xor': binlog16ops, 'eqv': binlog16ops, + # 'setvl.': unlog16ops, # ??? What's 'setvl.'? + # assuming cnttz mean the *d opcode. + 'cnttzd': unlog16ops, 'extsb': unlog16ops, 'extsh': unlog16ops, + 'fsub.': binfp1016ops, 'fadd': binfp1016ops, 'fmul': binfp1016ops, + 'fneg.': unfp1016ops, + 'fdiv': binfp16ops, + 'fabs.': unfp16ops, 'fmr.': unfp16ops, + # ??? are these the intended fp2int and int2fp, for all + # combinations of signed/unsigned float/double? + 'fcfid': cnvfp16ops, 'fctidz': cnvfp16ops, + 'fcfidu': cnvfp16ops, 'fctiduz': cnvfp16ops, + 'fcfids': cnvfp16ops, 'fctiwz': cnvfp16ops, + 'fcfidus': cnvfp16ops, 'fctiwuz': cnvfp16ops, + # Not implemented yet: + # 10- and 16-bit: + # 'mcrf': + # 'cbank': + # 16-bit only: + # 'crnor': + # 'crandc': + # 'crxor': + # 'crnand': + # 'crand': + # 'creqv': + # 'crorc': + # 'cror': + # 'mtlr': + # 'mtctr': + # 'mflr': + # 'mfctr': + # 'mtcr': + # 'mfcr': } # We have 4 kinds of insns: # 0: uncompressed; leave input insn unchanged # 1: 16-bit compressed, only in compressed mode -# 2: 16-bit extended by another 16-bit, only in compressed mode +# 2: 16-imm, i.e., compressed insn that can't switch-out of compressed mode # 3: 10-bit compressed, may switch to compressed mode # count[0:3] count the occurrences of the base kinds. # count[4] counts extra 10-bit nop-switches to compressed mode, -# tentatively introduced before insns that can be 16-bit encoded. -count = [0,0,0,0,0] -# Default comments for the insn kinds above. 2 is always tentative. -comments = ['', '\t; 16-bit', '\t; tentative 16+16-bit', '\t; 10-bit'] - -# cur stands for the insn kind that we read and processed in the -# previous iteration of the loop, and prev is the one before it. the +# tentatively introduced before insns that can be 16-bit encoded. +# count[5] counts extra 10-bit nop-switches to compressed mode, +# tentatively introduced before insns that can be 16-imm encoded. +# count[6] counts extra 16-bit nop-switches back to uncompressed, +# introduced after a 16-imm insn. +count = [0,0,0,0,0,0,0] +# Default comments for the insn kinds above. +comments = ['', '\t; 16-bit', '\t; 16-imm', '\t; 10-bit'] + +# curi stands for the insn kind that we read and processed in the +# previous iteration of the loop, and previ is the one before it. the # one we're processing in the current iteration will be stored in -# nexti until we make it cur at the very end of the loop. -prev = cur = 0 +# nexti until we make it curi at the very end of the loop. +previ = curi = 0 for line in sys.stdin: if line[-1] is '\n': @@ -172,7 +486,7 @@ for line in sys.stdin: if match is None: print(line) # Switch to uncompressed mode at function boundaries - prev = prev2 = 0 + previ = curi = 0 continue addr = match['addr'] @@ -187,41 +501,44 @@ for line in sys.stdin: comment = None - if cur is 0: + if curi is 0: if nexti is 0: True # Uncompressed mode for good. elif nexti is 1: - # If cur was not a single uncompressed mode insn, + # If curi was not a single uncompressed mode insn, # tentatively encode a 10-bit nop to enter compressed # mode, and then 16-bit. It takes as much space as # encoding as 32-bit, but offers more possibilities for # subsequent compressed encodings. A compressor proper # would have to go back and change the encoding # afterwards, but wé re just counting. - if prev is not 1: - print('\t\th.nop\t\t; tentatively switch to compressed mode') + if previ is not 1: + print('\t\th.nop\t\t; 10-bit (tentative)') count[4] += 1 - comment = 'tentatively compressed to 16-bit' + comment = '16-bit (tentative)' + else: + comment = '16-bit auto-back' elif nexti is 2: - # We can use compressed encoding for nexti after an - # uncompressed insn only if it's the single-insn - # uncompressed mode slot. For anything else, we're better - # off using uncompressed encoding for nexti, since it makes - # no sense to spend a 10-bit nop to switch to compressed - # mode for a 16+16-bit insn. If subsequent insns would - # benefit from compressed encoding, we can switch then. - if prev is not 1: - nexti = 0 - comment = 'not worth a nop for 16+16-bit' + # We can use compressed encoding for the 16-imm nexti + # after an uncompressed insn without penalty if it's the + # single-insn uncompressed mode slot. For other + # configurations, we can either remain in uncompressed + # mode, or switch to compressed mode with a 10-bit nop. + if previ is not 1: + print('\t\th.nop\t\t; 10-bit (tentative)') + count[5] += 1 + comment = '16-imm (tentative), vs uncompressed' + else: + comment = '16-imm auto-back' elif nexti is 3: - # If prev was 16-bit compressed, cur would be in the + # If previ was 16-bit compressed, curi would be in the # single-insn uncompressed slot, so nexti could be encoded # as 16-bit, enabling another 1-insn uncompressed slot # after nexti that a 10-bit insn wouldn't, so make it so. - if prev is 1: + if previ is 1: nexti = 1 - comment = '16-bit, could be 10-bit' - elif cur is 1: + comment = '16-bit auto-back, vs 10-bit' + elif curi is 1: # After a 16-bit insn, anything goes. If it remains in 16-bit # mode, we can have 1 or 2 as nexti; if it returns to 32-bit # mode, we can have 0 or 3. Using 1 instead of 3 makes room @@ -229,29 +546,26 @@ for line in sys.stdin: # that. if nexti is 3: nexti = 1 - comment = '16-bit, could be 10-bit' - elif cur is 2: - # After a 16+16-bit insn, we can't switch directly to 32-bit - # mode. However, cur could have been encoded as 32-bit, since - # any 16+16-bit insn can. Indeed, we may have an arbitrary - # long sequence of 16+16-bit insns before nexti, and if nexti - # can only be encoded in 32-bit mode, we can "resolve" all - # previous adjacent 16+16-bit insns to the corresponding - # 32-bit insns in the encoding, and "adjust" the 16-bit or - # 10-bit insn that enabled the potential 16+16-bit encoding to - # switch to 32-bit mode then instead. + comment = '16-bit, vs 10-bit' + elif curi is 2: + # After a 16-imm insn, we can only switch back to uncompressed + # mode with a 16-bit nop. if nexti is 0: - prev = cur = 0 - comment = '32-bit, like tentative 16+16-bit insns above' - elif cur is 3: + print('\t\tc.nop\t\t; forced switch back to uncompressed mode') + count[6] += 1 + previ = curi + curi = 1 + elif nexti is 3: + nexti = 1 + elif curi is 3: # After a 10-bit insn, another insn that could be encoded as # 10-bit might as well be encoded as 16-bit, to make room for # a single-insn uncompressed insn afterwards. if nexti is 3: nexti = 1 - comment = '16-bit, could be 10-bit' + comment = '16-bit, vs 10-bit' else: - raise "unknown mode for previous insn" + raise "unknown mode for previious insn" count[nexti] += 1 @@ -262,10 +576,10 @@ for line in sys.stdin: print(line + comment) - prev = cur - cur = nexti + previ = curi + curi = nexti -transition_bytes = 2 * count[4] +transition_bytes = 2 * (count[4] + count[5] + count[6]) compressed_bytes = 2 * (count[1] + count[3]) uncompressed_bytes = 4 * (count[0] + count[2]) total_bytes = transition_bytes + compressed_bytes + uncompressed_bytes @@ -275,9 +589,11 @@ print() print('Summary') print('32-bit uncompressed instructions: %i' % count[0]) print('16-bit compressed instructions: %i' % count[1]) -print('16+16-bit (tentative) compressed-mode instructions: %i' % count[2]) +print('16-imm compressed-mode instructions: %i' % count[2]) print('10-bit compressed instructions: %i' % count[3]) -print('10-bit (tentative) mode-switching nops: %i' % count[4]) +print('10-bit mode-switching nops: %i' % count[4]) +print('10-bit mode-switching nops for imm-16: %i' % count[5]) +print('16-bit mode-switching nops after imm-16: %i' % count[6]) print('Compressed size estimate: %i' % total_bytes) print('Original size: %i' % original_bytes) print('Compressed/original ratio: %f' % (total_bytes / original_bytes)) -- 2.30.2