sync_up: Add link from discussion page
[libreriscv.git] / lxo / 532 / comp16-v1alt-skel.py
1 #! /bin/env python3
2 # see https://bugs.libre-soc.org/show_bug.cgi?id=532
3
4 # Estimate ppc code compression with Libre-SOC encoding attempt v1alt.
5
6
7 # Copyright 2020 Alexandre Oliva
8
9 # This script is free software; you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation; either version 3, or (at your option)
12 # any later version.
13
14 # This script is distributed in the hope that it will be useful, but
15 # WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 # General Public License for more details.
18
19 # You should have received a copy of the GNU General Public License
20 # along with this script; see the file COPYING3. If not see
21 # <http://www.gnu.org/licenses/>.
22
23 # Skeleton originally by Alexandre Oliva <oliva@gnu.org>.
24
25
26 # Feed this script the output of objdump -M raw --no-show-raw-insn ppc-prog
27
28 # It will look for insns that can be represented in compressed mode,
29 # according to the encoding rules in the copcond dictionary below.
30
31 # Nothing is assumed as to the actual bit-encoding of the insns, this
32 # is just to experiment with insn selection and get a quick feedback
33 # loop for the encoding options in compressed mode.
34
35 # This script is intended to compare the compression ratio between v1,
36 # and an alternate mode-switching strategy that does away with 10-bit
37 # insns to enter compressed mode, and instead uses a major 6-bit
38 # opcode of a 32-bit insn to signal the insn encodes 10 mode bits, and
39 # a compressed insn in the remaining 16 bits. These 10 bits each
40 # correspond to an upcoming insn, telling whether or not it's
41 # compressed, so that any compressible insns among the 10 subsequent
42 # insns can be encoded as such without any further overhead.
43
44 # This would enable us to use the mode-switching bits in 16-bit insns
45 # for other purposes, but this script does not attempt to do so, so as
46 # to make for a simpler, more direct comparison.
47
48
49 # At (visible) entry points, mode is forced to return to uncompressed
50 # mode. Every branch target must be in uncompressed mode as well, but
51 # this script does not enforce that. In this model, the mode bits are
52 # cleared when branches are taken: they are static, but they shall not
53 # carry over across branch targets. Mode-switching insns can only
54 # appear in uncompressed mode, and they reset the mode bits for
55 # upcoming insns, rather than appending.
56
57 # The entire code stream is printed, without any attempt to modify the
58 # addresses that go along with or in them; we only insert markers for
59 # the transition points, and for the compressed instructions.
60
61 # The really useful information is printed at the end: a summary of
62 # transition and compressed-insn counts, and the achieved compression
63 # rate.
64
65 import sys
66 import re
67
68 modebits = 10
69
70 insn = re.compile('\s+(?P<addr>[0-9a-f]+):\s+(?P<opcode>[^ ]+) *(?P<operands>.*)')
71
72 # reg is a regkind (r, cr, fr) followed by a regnum
73 xreg = '(?P<reg>(?P<regkind>[cf]?r)(?P<regnum>[0-9]+))'
74
75 # immediate is a sequence of digits, possibly preceded by a negative sign
76 ximm = '(?P<immediate>-?[0-9]+)'
77
78 # branch is a branch target address; ignore an angle-bracketed label after it
79 xbrt = '(?P<branch>[0-9a-f]+)(?: <.*>)?'
80
81 # offset is like immediate, but followed by a parenthesized basereg
82 xoff = '(?P<offset>-?[0-9]+)\((?P<basereg>r[0-9]+)\)'
83
84 # creg is the cr, cond names one of its bits
85 crbit = '(?:4\*(?P<creg>cr[0-7])\+)?(?P<cond>gt|lt|eq|so)'
86
87 # Combine the above into alternatives, to easily classify operands by
88 # pattern matching.
89 opkind = re.compile('|'.join([xreg, ximm, xbrt, xoff, crbit]))
90
91 # Pre-parse and classify op into a mop, short for mapped op.
92 def mapop(op):
93 match = opkind.fullmatch(op)
94
95 if match is None:
96 op = ('other', op)
97 elif match['reg'] is not None:
98 op = (match['regkind'], int(match['regnum']), op)
99 elif match['immediate'] is not None:
100 op = ('imm', int (op).bit_length (), op)
101 elif match['branch'] is not None:
102 op = ('pcoff', (int (match['branch'], 16)
103 - int (addr, 16)).bit_length (), op, addr)
104 elif match['offset'] is not None:
105 op = ('ofst', mapop(match['offset']), mapop(match['basereg']), op)
106 elif match['cond'] is not None:
107 if match['creg'] is None:
108 creg = 'cr0'
109 else:
110 creg = match['creg']
111 op = ('crbit', mapop(creg), ('cond', match['cond']), op)
112 else:
113 raise "unrecognized operand kind"
114
115 return op
116
117 # Accessor to enable the mapop representation to change easily.
118 def opclass(mop):
119 return mop[0]
120
121 # Some opclass predicates, for the same reason.
122 def regp(mop):
123 return opclass(mop) in { 'r', 'fr', 'cr' } \
124 or (opclass(mop) is 'imm' and mop[1] is 0)
125 def immp(mop):
126 return opclass(mop) in { 'imm', 'pcoff' }
127 def rofp(mop):
128 return opclass(mop) is 'ofst'
129 def crbt(mop):
130 return opclass(mop) is 'crbit'
131
132 # Some more accessors.
133
134 # Return the reg number if mop fits regp.
135 def regno(mop):
136 if regp(mop) \
137 or (immp(mop) and mop[1] is 0):
138 return mop[1]
139 raise "operand is not a register"
140
141 def immval(mop):
142 if immp(mop):
143 return int(mop[2])
144 raise "operand is not an immediate"
145
146 # Return the immediate length if mop fits immp.
147 def immbits(mop):
148 if immp(mop):
149 return mop[1]
150 raise "operand is not an immediate"
151
152 # Return the register sub-mop if mop fits rofp.
153 def rofreg(mop):
154 if rofp(mop):
155 return mop[2]
156 raise "operand is not an offset"
157
158 # Return the offset sub-opt if mop fits rofp.
159 def rofset(mop):
160 if rofp(mop):
161 return mop[1]
162 raise "operand is not an offset"
163
164 # Return the register sub-mop if mop fits crbit.
165 def crbtreg(mop):
166 if crbt(mop):
167 return mop[1]
168 raise "operand is not a condition register bit"
169
170 # Return the cond bit name if mop fits crbit.
171 def crbtcnd(mop):
172 if crbt(mop):
173 return mop[2]
174 raise "operand is not a condition register bit"
175
176 # Following are predicates to be used in copcond, to tell the mode in
177 # which opcode with ops as operands is to be represented.
178
179 # TODO: use insn_histogram.py to show the best targets
180 # (remember to exclude nop - ori r0,r0,0 as this skews numbers)
181 # Registers representable in a made-up 3-bit mapping.
182 # It must contain 0 for proper working of at least storex.
183 #cregs3 = { 0, 31, 1, 2, 3, 4, 5, 6, 7 }
184 cregs3 = { 0, 9, 3, 1, 2, 31, 10, 30, 4 }
185 # Ditto in a 2-bit mapping. It needs not contain 0, but it must be a
186 # subset of cregs3 for proper working of at least storex.
187 cregs2 = { 9, 3, 1, 2 }
188 # Use the same sets for FP for now.
189 cfregs3 = cregs3
190 cfregs2 = cregs2
191 ccregs2 = { 0, 1, 2, 3 }
192
193 # Return true iff mop is a regular register present in cregs2
194 def rcregs2(mop):
195 return opclass(mop) in { 'r', 'imm' } and regno(mop) in cregs2
196
197 # Return true iff mop is a regular register present in cregs3
198 def rcregs3(mop):
199 return opclass(mop) in { 'r', 'imm' } and regno(mop) in cregs3
200
201 # Return true iff mop is a floating-point register present in cfregs2
202 def rcfregs2(mop):
203 return opclass(mop) is 'fr' and regno(mop) in cfregs2
204
205 # Return true iff mop is a floating-point register present in cfregs3
206 def rcfregs3(mop):
207 return opclass(mop) is 'fr' and regno(mop) in cfregs3
208
209 # Return true iff mop is a condition register present in ccregs2
210 def rccregs2(mop):
211 return opclass(mop) is 'cr' and regno(mop) in ccregs2
212
213 # Return true iff mop is an immediate of at most 8 bits.
214 def imm8(mop):
215 return immp(mop) and immbits(mop) <= 8
216
217 # Return true iff mop is an immediate of at most 12 bits.
218 def imm12(mop):
219 return immp(mop) and immbits(mop) <= 12
220
221 # Compress binary opcodes iff the first two operands (output and first
222 # input operand) are registers representable in 3 bits in compressed
223 # mode, and the immediate operand can be represented in 8 bits.
224 def bin2regs3imm8(opcode, ops):
225 if rcregs3(ops[0]) and rcregs3(ops[1]) and imm8(ops[2]):
226 return 1
227 return 0
228
229 # Recognize do-nothing insns, particularly ori r0,r0,0.
230 def maybenop(opcode, ops):
231 if opcode in ['ori', 'addi'] and regno(ops[0]) is regno(ops[1]) \
232 and opclass(ops[0]) is 'r' and regno(ops[0]) is 0 \
233 and imm8(ops[2]) and immbits(ops[2]) is 0:
234 return 3
235 return 0
236
237 # Recognize an unconditional branch, that can be represented with a
238 # 6-bit operand in 10-bit mode, an an additional 4 bits in 16-bit
239 # mode. In both cases, the offset is shifted left by 2 bits.
240 def uncondbranch(opcode, ops):
241 if imm8(ops[0]):
242 return 3
243 if imm12(ops[0]):
244 return 1
245 return 0
246
247 # 2 bits for RT and RA. RB is r0 in 10-bit, and 3 bits in 16-bit ???
248 # there's a general assumption that, if an insn can be represented in
249 # 10-bits, then it can also be represented in 16 bits. This will not
250 # be the case if cregs3 can't represent register 0. For
251 # register+offset addresses, we support 16-imm stdi, fstdi, with 3-bit
252 # immediates left-shifted by 3; stwi, fstsi, with 2-bit immediates
253 # left-shifted by 2; stdspi for 6-bit immediate left-shifted by 3
254 # biased by -256, and stwspi for 6-bit immediate left-shifted by 2
255 # also biased by -256. fstdi and fstsi store in memory a
256 # floating-point register, the others do a general-purpose register.
257 def storexaddr(opcode, ops):
258 # Canonicalize offset in ops[1] to reg, imm
259 if rofp(ops[1]):
260 ops = (ops[0], rofreg(ops[1]), rofset(ops[1]))
261 shift = memshifts[opcode[-1]]
262 if immval(ops[2]) & ((1 << shift) - 1) is not 0:
263 return 0
264 if rcregs3(ops[1]) and immbits(ops[2]) <= shift + 3:
265 return 2
266 if regno(ops[1]) is 1 and opclass(ops[0]) is not 'fr' \
267 and (immval(ops[2]) - 256).bit_length() <= shift + 6:
268 return 2
269 # Require offset 0 for compression of non-indexed form.
270 if not regp(ops[2]):
271 return 0
272 # If any of the registers is zero, and the other fits in cregs2,
273 # it fits in 10-bit.
274 if (rcregs2(ops[1]) and regno(ops[2]) is 0) \
275 or (regno(ops[1]) is 0 and rcregs2(ops[2])):
276 return 3
277 # For 16-bit one must fit rcregs2 and the other rcregs3.
278 if (rcregs2(ops[1]) and rcregs3(ops[2])) \
279 or (rcregs3(ops[1]) and rcregs2(ops[2])):
280 return 1
281 return 0
282 def rstorex(opcode, ops):
283 if rcregs2(ops[0]):
284 return storexaddr(opcode, ops)
285 return 0
286 def frstorex(opcode, ops):
287 if rcfregs2(ops[0]):
288 return storexaddr(opcode, ops)
289 return 0
290
291 memshifts = { 'd': 3, 'w': 2, 'z': 2, 's': 2 }
292
293 # 3 bits for RA, 3 bits for RB, 3 bits for RT for 16-bit. for 10-bit,
294 # RB and RT must match. ??? It's not clear what that means WRT
295 # register mapping of different kinds of registers, e.g. when RT is a
296 # floating-point register..
297 # For register+offset addresses, we support 16-imm ldi, fldi, with
298 # 3-bit immediates left-shifted by 3; lwi, flsi, with 2-bit immediates
299 # left-shifted by 2; ldspi for 6-bit immediate left-shifted by 3
300 # biased by -256, and lwspi for 6-bit immediate left-shifted by 2 also
301 # biased by -256. fldi and flsi load to floating-point registers, the
302 # others load to general-purpose registers.
303 def loadxaddr(opcode, ops):
304 if rofp(ops[1]):
305 ops = (ops[0], rofreg(ops[1]), rofset(ops[1]))
306 shift = memshifts[opcode[-1]]
307 if immval(ops[2]) & ((1 << shift) - 1) is not 0:
308 return 0
309 if rcregs3(ops[1]) and immbits(ops[2]) <= shift + 3:
310 return 2
311 if regno(ops[1]) is 1 and opclass(ops[0]) is not 'fr' \
312 and (immval(ops[2]) - 256).bit_length() <= shift + 6:
313 return 2
314 # Otherwise require offset 0 for compression of non-indexed form.
315 if not regp(ops[2]):
316 return 0
317 if rcregs3(ops[1]) and rcregs3(ops[2]):
318 if regno(ops[0]) in { regno(ops[1]), regno(ops[2]) }:
319 return 3
320 return 1
321 return 0
322 def rloadx(opcode, ops):
323 if rcregs3(ops[0]):
324 return loadxaddr(opcode, ops)
325 return 0
326 def frloadx(opcode, ops):
327 if rcfregs3(ops[0]):
328 return loadxaddr(opcode, ops)
329 return 0
330
331 # 3 bits for RA, 3 bits for RB, 3 bits for RT for 16-bit. for 10-bit,
332 # RB and RT must match. RA must not be zero, but in 16-bit mode we
333 # can swap RA and RB to make it fit.
334 def addop(opcode, ops):
335 if rcregs3(ops[0]) and rcregs3(ops[1]) and rcregs3(ops[2]):
336 if regno(ops[0]) in { regno(ops[1]), regno(ops[2]) }:
337 return 3
338 if regno(ops[1]) is not 0 or regno(ops[2]) is not 0:
339 return 1
340 return 0
341
342 # 3 bits for RA, 3 bits for RB, 3 bits for RT for 16-bit. for 10-bit,
343 # RA and RT must match. ??? The spec says RB, but the actual opcode
344 # is subf., subtract from, and it subtracts RA from RB. 'neg.' would
345 # make no sense as described there if we didn't use RA.
346 def subfop(opcode, ops):
347 if rcregs3(ops[0]) and rcregs3(ops[1]) and rcregs3(ops[2]):
348 if regno(ops[0]) is regno(ops[1]):
349 return 3
350 return 1
351 return 0
352 def negop(opcode, ops):
353 if rcregs3(ops[0]) and rcregs3(ops[1]):
354 return 3
355 return 0
356
357 # 3 bits for RA and 3 bits for RB. L (op1) must be 1 for 10-bit.
358 # op0 is a cr, must be zero for 10-bit.
359 def cmpop(opcode, ops):
360 if rcregs3(ops[2]) and rcregs3(ops[3]):
361 if regno(ops[0]) is 0 and immval(ops[1]) is 1:
362 return 3
363 return 1
364 return 0
365
366 # 3 bits for RS, 3 bits for RB, 3 bits for RS, 16-bit only.
367 def sldop(opcode, ops):
368 if rcregs3(ops[0]) and rcregs3(ops[1]) and rcregs3(ops[2]):
369 return 1
370 return 0
371 # same as sld, except RS must be nonzero.
372 def srdop(opcode, ops):
373 if regno(ops[1]) is not 0:
374 return sldop(opcode, ops)
375 return 0
376 # same as sld, except RS is given by RA, so they must be the same.
377 def sradop(opcode, ops):
378 if regno(ops[0]) is regno(ops[1]):
379 return sldop(opcode, ops)
380 return 0
381
382 # binary logical ops: and, nand, or, nor.
383 # 3 bits for RA (nonzero), 3 bits for RB, 3 bits for RT in 16-bit mode.
384 # RT is implicitly RB in 10-bit mode.
385 def binlog1016ops(opcode, ops):
386 if rcregs3(ops[0]) and rcregs3(ops[1]) and rcregs3(ops[2]) \
387 and regno(ops[1]) is not 0:
388 # mr RT, RB AKA or RT, RB, RB takes the 10-bit encoding
389 # of the 16-bit nor; we've already ruled out r0 as RB above.
390 if regno(ops[0]) is regno(ops[2]) and opcode is not 'nor':
391 return 3
392 # or and and, with two identical inputs, stand for mr.
393 # nor and nand, likewise, stand for not, that has its
394 # own unary 10-bit encoding.
395 if regno(ops[1]) is regno(ops[2]):
396 return 3
397 return 1
398 return 0
399 # 3 bits for RB, 3 bits for RT in 16-bit mode.
400 # RT is implicitly RB in 10-bit mode.
401 def unlog1016ops(opcode, ops):
402 if rcregs3(ops[0]) and rcregs3(ops[1]):
403 if regno(ops[0]) is regno(ops[1]):
404 return 3
405 return 1
406 return 0
407 # 16-bit only logical ops; no 10-bit encoding available
408 # same constraints as the 1016 ones above.
409 def binlog16ops(opcode, ops):
410 if rcregs3(ops[0]) and rcregs3(ops[1]) and rcregs3(ops[2]) \
411 and regno(ops[1]) is not 0:
412 return 1
413 return 0
414 def unlog16ops(opcode, ops):
415 if rcregs3(ops[0]) and rcregs3(ops[1]):
416 return 1
417 return 0
418
419 # binary floating-point ops
420 # 3 bits for FRA (nonzero), 3 bits for FRB, 3 bits for FRT in 16-bit mode.
421 # FRT is implicitly FRB in 10-bit mode.
422 def binfp1016ops(opcode, ops):
423 if rcfregs3(ops[0]) and rcfregs3(ops[1]) and rcfregs3(ops[2]) \
424 and regno(ops[1]) is not 0:
425 if regno(ops[0]) is regno(ops[2]):
426 return 3
427 return 1
428 return 0
429 def unfp1016ops(opcode, ops):
430 if rcfregs3(ops[0]) and rcfregs3(ops[1]):
431 if regno(ops[0]) is regno(ops[1]):
432 return 3
433 return 1
434 return 0
435 def binfp16ops(opcode, ops):
436 if rcfregs3(ops[0]) and rcfregs3(ops[1]) and rcfregs3(ops[2]) \
437 and regno(ops[1]) is not 0:
438 return 1
439 return 0
440 def unfp16ops(opcode, ops):
441 if rcfregs3(ops[0]) and rcfregs3(ops[1]):
442 return 1
443 return 0
444
445 def cnvfp16ops(opcode, ops):
446 if rcfregs2(ops[0]) and rcfregs2(ops[1]):
447 return 1
448 return 0
449
450 # Move between CRs. 3 bits for destination, 3 bits for source in
451 # 16-bit mode. That covers all possibilities. For 10-bit mode, only
452 # 2 bits for destination.
453 def mcrfop(opcode, ops):
454 if rccregs2(ops[0]):
455 return 3
456 return 1
457 # Logical ops between two CRs into one. 2 bits for destination, that
458 # must coincide with one of the inputs, 3 bits for the other input.
459 # 16-bit only.
460 def crops(opcode, ops):
461 if rccregs2(ops[0]) and regno(ops[0]) is regno(ops[1]):
462 return 1
463 return 0
464
465 # 3 bits for general-purpose register; immediate identifies the
466 # special purpose register to move to: 8 for lr, 9 for ctr. 16-bit
467 # only. mtspr imm,rN moves from rN to the spr; mfspr rN,imm moves
468 # from spr to rN.
469 def mtsprops(opcode, ops):
470 if immval(ops[0]) in (8, 9) and rcregs3(ops[1]):
471 return 1
472 return 0
473 def mfsprops(opcode, ops):
474 if immval(ops[1]) in (8, 9) and rcregs3(ops[0]):
475 return 1
476 return 0
477
478 # 3 bits for nonzero general-purpose register; the immediate is a
479 # per-CR mask (8-bits). mtcr rN is mtcrf 0xFF, rN. mfcr rN is a raw
480 # opcode, not an alias.
481 def mtcrfops(opcode, ops):
482 if immval(ops[0]) is 255 and rcregs3(ops[1]) and regno(ops[1]) is not 0:
483 return 1
484 return 0
485 def mfcrops(opcode, ops):
486 if rcregs3(ops[0]) and regno(ops[0]) is not 0:
487 return 1
488 return 0
489
490 # 3 bits for destination and source register, must be the same. Full
491 # shift range fits. 16-imm format.
492 def shiftops(opcode, ops):
493 if rcregs3(ops[0]) and regno(ops[0]) is regno(ops[1]):
494 return 2
495 return 0
496
497 # For 16-imm 'addis' and 'addi', we have 3 bits (nonzero) for the
498 # destination register, source register is implied 0, the immediate
499 # must either fit in signed 5-bit, left-shifted by 3, or in signed
500 # 7-bit without shift. ??? That seems backwards.
501 def addiops(opcode, ops):
502 if rcregs3(ops[0]) and regno(ops[0]) is not 0 \
503 and regno(ops[1]) is 0 and imm8(ops[2]) \
504 and immbits(ops[2]) <= 8 \
505 and ((immval(ops[2]) & 7) is 0 or immbits(ops[2]) <= 7):
506 return 2
507 return maybenop(opcode, ops)
508
509 # cmpdi and cmpwi are aliases to uncompressed cmp CR#, L, RA, imm16,
510 # CR# being the target condition register, L being set for d rather
511 # than w. In 16-imm, CR# must be zero, RA must fit in 3 bits, and the
512 # immediate must be 6 bits signed.
513 def cmpiops(opcode, ops):
514 if regno(ops[0]) is 0 and immval(ops[1]) in (0,1) \
515 and rcregs3(ops[2]) and immbits(ops[3]) <= 6:
516 return 2
517 return 0
518
519 # 16-imm bc, with or without LK, uses 3 bits for BI (CR0 and CR1 only),
520 # and 1 bit for BO1 (to tell BO 12 from negated 4).
521 def bcops(opcode, ops):
522 if immval(ops[0]) in (4,12) and regno(crbtreg(ops[1])) <= 1 \
523 and immbits(ops[2]) <= 8:
524 return 2
525 return 0
526
527 # 2 bits for BI and 3 bits for BO in 10-bit encoding; one extra bit
528 # for each in 16-bit.
529 def bclrops(opcode, ops):
530 if immval(ops[0]) <= 15 and regno(crbtreg(ops[1])) <= 1 \
531 and immbits(ops[2]) is 0:
532 if immval(ops[0]) <= 7 and regno(crbtreg(ops[1])) is 0:
533 return 3
534 return 1
535 return 0
536
537 # Map opcodes that might be compressed to a function that returns the
538 # best potential encoding kind for the insn, per the numeric coding
539 # below.
540 copcond = {
541 'ori': maybenop,
542 # 'attn': binutils won't ever print this
543 'b': uncondbranch, 'bl': uncondbranch,
544 'bc': bcops, 'bcl': bcops,
545 'bclr': bclrops, 'bclrl': bclrops,
546 # Stores and loads, including 16-imm ones
547 'stdx': rstorex, 'stwx': rstorex,
548 'std': rstorex, 'stw': rstorex, # only offset zero
549 'stfdx': frstorex, 'stfsx': frstorex,
550 'stfd': frstorex, 'stfs': frstorex, # only offset zero
551 # Assuming lwz* rather than lwa*.
552 'ldx': rloadx, 'lwzx': rloadx,
553 'ld': rloadx, 'lwz': rloadx, # only offset zero
554 'lfdx': rloadx, 'lfsx': rloadx,
555 'lfd': rloadx, 'lfs': rloadx, # only offset zero
556 'add': addop,
557 'subf.': subfop, 'neg.': negop,
558 # Assuming cmpl stands for cmpd, i.e., cmp with L=1.
559 # cmpw is cmp with L=0, 16-bit only.
560 'cmp': cmpop,
561 'sld.': sldop, 'srd.': srdop, 'srad.': sradop,
562 'and': binlog1016ops, 'nand': binlog1016ops,
563 'or': binlog1016ops, 'nor': binlog1016ops,
564 # assuming popcnt and cntlz mean the *d opcodes.
565 'popcntd': unlog1016ops, 'cntlzd': unlog1016ops, 'extsw': unlog1016ops,
566 # not RT, RB is mapped to nand/nor RT, RB, RB.
567 'xor': binlog16ops, 'eqv': binlog16ops,
568 # 'setvl.': unlog16ops, # ??? What's 'setvl.'?
569 # assuming cnttz mean the *d opcode.
570 'cnttzd': unlog16ops, 'extsb': unlog16ops, 'extsh': unlog16ops,
571 'fsub.': binfp1016ops, 'fadd': binfp1016ops, 'fmul': binfp1016ops,
572 'fneg.': unfp1016ops,
573 'fdiv': binfp16ops,
574 'fabs.': unfp16ops, 'fmr.': unfp16ops,
575 # ??? are these the intended fp2int and int2fp, for all
576 # combinations of signed/unsigned float/double?
577 'fcfid': cnvfp16ops, 'fctidz': cnvfp16ops,
578 'fcfidu': cnvfp16ops, 'fctiduz': cnvfp16ops,
579 'fcfids': cnvfp16ops, 'fctiwz': cnvfp16ops,
580 'fcfidus': cnvfp16ops, 'fctiwuz': cnvfp16ops,
581 # Condition register opcodes.
582 'mcrf': mcrfop,
583 'crnor': crops,
584 'crandc': crops,
585 'crxor': crops,
586 'crnand': crops,
587 'crand': crops,
588 'creqv': crops,
589 'crorc': crops,
590 'cror': crops,
591 # System opcodes.
592 # 'cbank' is not a ppc opcode, not handled
593 'mtspr': mtsprops, # raw opcode for 'mtlr', 'mtctr'
594 'mfspr': mfsprops, # raw opcode for 'mflr', 'mfctr'
595 'mtcrf': mtcrfops, # raw opcode for 'mtcr'
596 'mfcr': mfcrops,
597 # 16-imm opcodes.
598 'sradi.': shiftops, 'srawi.': shiftops,
599 'addi': addiops,
600 'cmpi': cmpiops, # raw opcode for 'cmpwi', 'cmpdi'
601 # 'setvli', 'setmvli' are not ppc opcodes, not handled.
602 }
603
604 # v1 has 4 kinds of insns:
605
606 # 0: uncompressed; leave input insn unchanged
607 # 1: 16-bit compressed, only in compressed mode
608 # 2: 16-imm, i.e., compressed insn that can't switch-out of compressed mode
609 # 3: 10-bit compressed, may switch to compressed mode
610
611 # In v1alt, we map 1, 2 and 3 to compressed (count[1]). If we have a
612 # compressing insn, and we've run out of bits from the latest
613 # mode-switch insn, we output another (count[2]).
614
615 count = [0,0,0]
616 # Default comments for the insn kinds above.
617 comments = ['', '\t; 16-bit', '\t; 6+10-bit mode']
618
619 # This counts the remaining bits to use from the latest mode-switching
620 # insn.
621 remobits = 0
622
623 for line in sys.stdin:
624 if line[-1] is '\n':
625 line = line[:-1]
626
627 match = insn.fullmatch(line)
628 if match is None:
629 print(line)
630 # Switch to uncompressed mode at function boundaries
631 remobits = 0
632 continue
633
634 addr = match['addr']
635 opcode = match['opcode']
636 operands = match['operands']
637
638 if opcode in copcond:
639 nexti = copcond[opcode](opcode,
640 [mapop(op) for op in operands.split(',')])
641 else:
642 nexti = 0
643
644 comment = None
645
646 if nexti is not 0:
647 nexti = 1
648 if remobits is 0:
649 remobits = modebits + 1
650 print('\t\th.nop\t\t; 16-bit mode-switching prefix')
651 count[2] += 1
652
653 count[nexti] += 1
654
655 if comment is None:
656 comment = comments[nexti]
657 else:
658 comment = '\t; ' + comment
659
660 print(line + comment)
661
662 if remobits > 0:
663 remobits -= 1
664
665 transition_bytes = 2 * count[2]
666 compressed_bytes = 2 * count[1]
667 uncompressed_bytes = 4 * count[0]
668 total_bytes = transition_bytes + compressed_bytes + uncompressed_bytes
669 original_bytes = 2 * compressed_bytes + uncompressed_bytes
670
671 print()
672 print('Summary')
673 print('32-bit uncompressed instructions: %i' % count[0])
674 print('16-bit compressed instructions: %i' % count[1])
675 print('16-bit mode-switching nops: %i' % count[2])
676 print('Compressed size estimate: %i' % total_bytes)
677 print('Original size: %i' % original_bytes)
678 print('Compressed/original ratio: %f' % (total_bytes / original_bytes))