power_insn: major refactoring and cleanup
[openpower-isa.git] / src / openpower / sv / trans / svp64.py
1 # SPDX-License-Identifier: LGPLv3+
2 # Copyright (C) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
3 # Funded by NLnet http://nlnet.nl
4
5 """SVP64 OpenPOWER v3.0B assembly translator
6
7 This class takes raw svp64 assembly mnemonics (aliases excluded) and creates
8 an EXT001-encoded "svp64 prefix" (as a .long) followed by a v3.0B opcode.
9
10 It is very simple and straightforward, the only weirdness being the
11 extraction of the register information and conversion to v3.0B numbering.
12
13 Encoding format of svp64: https://libre-soc.org/openpower/sv/svp64/
14 Encoding format of arithmetic: https://libre-soc.org/openpower/sv/normal/
15 Encoding format of LDST: https://libre-soc.org/openpower/sv/ldst/
16 **TODO format of branches: https://libre-soc.org/openpower/sv/branches/**
17 **TODO format of CRs: https://libre-soc.org/openpower/sv/cr_ops/**
18 Bugtracker: https://bugs.libre-soc.org/show_bug.cgi?id=578
19 """
20
21 import functools
22 import os
23 import sys
24 from collections import OrderedDict
25 import inspect
26
27 from openpower.decoder.pseudo.pagereader import ISA
28 from openpower.decoder.power_svp64 import SVP64RM, get_regtype, decode_extra
29 from openpower.decoder.selectable_int import SelectableInt
30 from openpower.consts import SVP64MODE
31 from openpower.decoder.power_insn import SVP64Instruction
32 from openpower.decoder.power_insn import Database
33 from openpower.decoder.power_insn import Style
34 from openpower.decoder.power_insn import WordInstruction
35 from openpower.decoder.power_enums import find_wiki_dir
36
37 # for debug logging
38 from openpower.util import log
39
40
41 # decode GPR into sv extra
42 def get_extra_gpr(etype, regmode, field):
43 if regmode == 'scalar':
44 # cut into 2-bits 5-bits SS FFFFF
45 sv_extra = field >> 5
46 field = field & 0b11111
47 else:
48 # cut into 5-bits 2-bits FFFFF SS
49 sv_extra = field & 0b11
50 field = field >> 2
51 return sv_extra, field
52
53
54 # decode 3-bit CR into sv extra
55 def get_extra_cr_3bit(etype, regmode, field):
56 if regmode == 'scalar':
57 # cut into 2-bits 3-bits SS FFF
58 sv_extra = field >> 3
59 field = field & 0b111
60 else:
61 # cut into 3-bits 4-bits FFF SSSS but will cut 2 zeros off later
62 sv_extra = field & 0b1111
63 field = field >> 4
64 return sv_extra, field
65
66
67 # decodes SUBVL
68 def decode_subvl(encoding):
69 pmap = {'2': 0b01, '3': 0b10, '4': 0b11}
70 assert encoding in pmap, \
71 "encoding %s for SUBVL not recognised" % encoding
72 return pmap[encoding]
73
74
75 # decodes elwidth
76 def decode_elwidth(encoding):
77 pmap = {'8': 0b11, '16': 0b10, '32': 0b01}
78 assert encoding in pmap, \
79 "encoding %s for elwidth not recognised" % encoding
80 return pmap[encoding]
81
82
83 # decodes predicate register encoding
84 def decode_predicate(encoding):
85 pmap = { # integer
86 '1<<r3': (0, 0b001),
87 'r3': (0, 0b010),
88 '~r3': (0, 0b011),
89 'r10': (0, 0b100),
90 '~r10': (0, 0b101),
91 'r30': (0, 0b110),
92 '~r30': (0, 0b111),
93 # CR
94 'lt': (1, 0b000),
95 'nl': (1, 0b001), 'ge': (1, 0b001), # same value
96 'gt': (1, 0b010),
97 'ng': (1, 0b011), 'le': (1, 0b011), # same value
98 'eq': (1, 0b100),
99 'ne': (1, 0b101),
100 'so': (1, 0b110), 'un': (1, 0b110), # same value
101 'ns': (1, 0b111), 'nu': (1, 0b111), # same value
102 }
103 assert encoding in pmap, \
104 "encoding %s for predicate not recognised" % encoding
105 return pmap[encoding]
106
107
108 # decodes "Mode" in similar way to BO field (supposed to, anyway)
109 def decode_bo(encoding):
110 pmap = { # TODO: double-check that these are the same as Branch BO
111 'lt': 0b000,
112 'nl': 0b001, 'ge': 0b001, # same value
113 'gt': 0b010,
114 'ng': 0b011, 'le': 0b011, # same value
115 'eq': 0b100,
116 'ne': 0b101,
117 'so': 0b110, 'un': 0b110, # same value
118 'ns': 0b111, 'nu': 0b111, # same value
119 }
120 assert encoding in pmap, \
121 "encoding %s for BO Mode not recognised" % encoding
122 # barse-ackwards MSB0/LSB0. sigh. this would be nice to be the
123 # same as the decode_predicate() CRfield table above, but (inv,CRbit)
124 # is how it is in the spec [decode_predicate is (CRbit,inv)]
125 mapped = pmap[encoding]
126 si = SelectableInt(0, 3)
127 si[0] = mapped & 1 # inv
128 si[1:3] = mapped >> 1 # CR
129 return int(si)
130
131
132 # partial-decode fail-first mode
133 def decode_ffirst(encoding):
134 if encoding in ['RC1', '~RC1']:
135 return encoding
136 return decode_bo(encoding)
137
138
139 def decode_reg(field, macros=None):
140 if macros is None:
141 macros = {}
142 # decode the field number. "5.v" or "3.s" or "9"
143 # and now also "*0", and "*%0". note: *NOT* to add "*%rNNN" etc.
144 # https://bugs.libre-soc.org/show_bug.cgi?id=884#c0
145 if field.startswith(("*%", "*")):
146 if field.startswith("*%"):
147 field = field[2:]
148 else:
149 field = field[1:]
150 while field in macros:
151 field = macros[field]
152 return int(field), "vector" # actual register number
153
154 # try old convention (to be retired)
155 field = field.split(".")
156 regmode = 'scalar' # default
157 if len(field) == 2:
158 if field[1] == 's':
159 regmode = 'scalar'
160 elif field[1] == 'v':
161 regmode = 'vector'
162 field = int(field[0]) # actual register number
163 return field, regmode
164
165
166 def decode_imm(field):
167 ldst_imm = "(" in field and field[-1] == ')'
168 if ldst_imm:
169 return field[:-1].split("(")
170 else:
171 return None, field
172
173
174 def crf_extra(etype, rname, extra_idx, regmode, field, extras):
175 """takes a CR Field number (CR0-CR127), splits into EXTRA2/3 and v3.0
176 the scalar/vector mode (crNN.v or crNN.s) changes both the format
177 of the EXTRA2/3 encoding as well as what range of registers is possible.
178 this function can be used for both BF/BFA and BA/BB/BT by first removing
179 the bottom 2 bits of BA/BB/BT then re-instating them after encoding.
180 see https://libre-soc.org/openpower/sv/svp64/appendix/#cr_extra
181 for specification
182 """
183 sv_extra, field = get_extra_cr_3bit(etype, regmode, field)
184 # now sanity-check (and shrink afterwards)
185 if etype == 'EXTRA2':
186 # 3-bit CR Field (BF, BFA) EXTRA2 encoding
187 if regmode == 'scalar':
188 # range is CR0-CR15 in increments of 1
189 assert (sv_extra >> 1) == 0, \
190 "scalar CR %s cannot fit into EXTRA2 %s" % \
191 (rname, str(extras[extra_idx]))
192 # all good: encode as scalar
193 sv_extra = sv_extra & 0b01
194 else: # vector
195 # range is CR0-CR127 in increments of 16
196 assert sv_extra & 0b111 == 0, \
197 "vector CR %s cannot fit into EXTRA2 %s" % \
198 (rname, str(extras[extra_idx]))
199 # all good: encode as vector (bit 2 set)
200 sv_extra = 0b10 | (sv_extra >> 3)
201 else:
202 # 3-bit CR Field (BF, BFA) EXTRA3 encoding
203 if regmode == 'scalar':
204 # range is CR0-CR31 in increments of 1
205 assert (sv_extra >> 2) == 0, \
206 "scalar CR %s cannot fit into EXTRA3 %s" % \
207 (rname, str(extras[extra_idx]))
208 # all good: encode as scalar
209 sv_extra = sv_extra & 0b11
210 else: # vector
211 # range is CR0-CR127 in increments of 8
212 assert sv_extra & 0b11 == 0, \
213 "vector CR %s cannot fit into EXTRA3 %s" % \
214 (rname, str(extras[extra_idx]))
215 # all good: encode as vector (bit 3 set)
216 sv_extra = 0b100 | (sv_extra >> 2)
217 return sv_extra, field
218
219
220 def to_number(field):
221 if field.startswith("0x"):
222 return eval(field)
223 if field.startswith("0b"):
224 return eval(field)
225 return int(field)
226
227
228 DB = Database(find_wiki_dir())
229
230
231 # decodes svp64 assembly listings and creates EXT001 svp64 prefixes
232 class SVP64Asm:
233 def __init__(self, lst, bigendian=False, macros=None):
234 if macros is None:
235 macros = {}
236 self.macros = macros
237 self.lst = lst
238 self.trans = self.translate(lst)
239 self.isa = ISA() # reads the v3.0B pseudo-code markdown files
240 self.svp64 = SVP64RM() # reads the svp64 Remap entries for registers
241 assert bigendian == False, "error, bigendian not supported yet"
242
243 def __iter__(self):
244 yield from self.trans
245
246 def translate_one(self, insn, macros=None):
247 if macros is None:
248 macros = {}
249 macros.update(self.macros)
250 isa = self.isa
251 svp64 = self.svp64
252 insn_no_comments = insn.partition('#')[0].strip()
253 if not insn_no_comments:
254 return
255
256 # find first space, to get opcode
257 ls = insn_no_comments.split()
258 opcode = ls[0]
259 # now find opcode fields
260 fields = ''.join(ls[1:]).split(',')
261 mfields = list(filter(bool, map(str.strip, fields)))
262 log("opcode, fields", ls, opcode, mfields)
263 fields = []
264 # macro substitution
265 for field in mfields:
266 fields.append(macro_subst(macros, field))
267 log("opcode, fields substed", ls, opcode, fields)
268
269 # identify if it is a word instruction
270 record = None
271 if os.environ.get("INSNDB"):
272 record = DB[opcode]
273 if record is not None:
274 insn = WordInstruction.assemble(db=DB,
275 entry=opcode, arguments=fields)
276 yield " ".join((
277 f".long 0x{int(insn):08X}",
278 "#",
279 opcode,
280 ",".join(fields),
281 ))
282 return
283
284 # identify if is a svp64 mnemonic
285 if not opcode.startswith('sv.'):
286 yield insn # unaltered
287 return
288 opcode = opcode[3:] # strip leading "sv"
289
290 # start working on decoding the svp64 op: sv.basev30Bop/vec2/mode
291 opmodes = opcode.split("/") # split at "/"
292 v30b_op_orig = opmodes.pop(0) # first is the v3.0B
293 # check instruction ends with dot
294 rc_mode = v30b_op_orig.endswith('.')
295 if rc_mode:
296 v30b_op = v30b_op_orig[:-1]
297 else:
298 v30b_op = v30b_op_orig
299
300 record = None
301 if os.environ.get("INSNDB"):
302 record = DB[v30b_op]
303 if record is not None:
304 insn = SVP64Instruction.assemble(db=DB,
305 entry=v30b_op_orig,
306 arguments=fields,
307 specifiers=opmodes)
308 prefix = int(insn.prefix)
309 suffix = int(insn.suffix)
310 yield f".long 0x{prefix:08X}"
311 yield from insn.suffix.disassemble(db=DB, style=Style.SHORT)
312 return
313
314 # look up the 32-bit op (original, with "." if it has it)
315 if v30b_op_orig in isa.instr:
316 isa_instr = isa.instr[v30b_op_orig]
317 else:
318 raise Exception("opcode %s of '%s' not supported" %
319 (v30b_op_orig, insn))
320
321 # look up the svp64 op, first the original (with "." if it has it)
322 if v30b_op_orig in svp64.instrs:
323 rm = svp64.instrs[v30b_op_orig] # one row of the svp64 RM CSV
324 # then without the "." (if there was one)
325 elif v30b_op in svp64.instrs:
326 rm = svp64.instrs[v30b_op] # one row of the svp64 RM CSV
327 else:
328 raise Exception(f"opcode {v30b_op_orig!r} of "
329 f"{insn!r} not an svp64 instruction")
330
331 # get regs info e.g. "RT,RA,RB"
332 v30b_regs = isa_instr.regs[0]
333 log("v3.0B op", v30b_op, "Rc=1" if rc_mode else '')
334 log("v3.0B regs", opcode, v30b_regs)
335 log("RM", rm)
336
337 # right. the first thing to do is identify the ordering of
338 # the registers, by name. the EXTRA2/3 ordering is in
339 # rm['0']..rm['3'] but those fields contain the names RA, BB
340 # etc. we have to read the pseudocode to understand which
341 # reg is which in our instruction. sigh.
342
343 # first turn the svp64 rm into a "by name" dict, recording
344 # which position in the RM EXTRA it goes into
345 # also: record if the src or dest was a CR, for sanity-checking
346 # (elwidth overrides on CRs are banned)
347 decode = decode_extra(rm)
348 dest_reg_cr, src_reg_cr, svp64_src, svp64_dest = decode
349
350 log("EXTRA field index, src", svp64_src)
351 log("EXTRA field index, dest", svp64_dest)
352
353 # okaaay now we identify the field value (opcode N,N,N) with
354 # the pseudo-code info (opcode RT, RA, RB)
355 assert len(fields) == len(v30b_regs), \
356 "length of fields %s must match insn `%s` fields %s" % \
357 (str(v30b_regs), insn, str(fields))
358 opregfields = zip(fields, v30b_regs) # err that was easy
359
360 # now for each of those find its place in the EXTRA encoding
361 # note there is the possibility (for LD/ST-with-update) of
362 # RA occurring **TWICE**. to avoid it getting added to the
363 # v3.0B suffix twice, we spot it as a duplicate, here
364 extras = OrderedDict()
365 for idx, (field, regname) in enumerate(opregfields):
366 imm, regname = decode_imm(regname)
367 rtype = get_regtype(regname)
368 log(" idx find", rtype, idx, field, regname, imm)
369 if rtype is None:
370 # probably an immediate field, append it straight
371 extras[('imm', idx, False)] = (idx, field, None, None, None)
372 continue
373 extra = svp64_src.get(regname, None)
374 if extra is not None:
375 extra = ('s', extra, False) # not a duplicate
376 extras[extra] = (idx, field, regname, rtype, imm)
377 log(" idx src", idx, extra, extras[extra])
378 dextra = svp64_dest.get(regname, None)
379 log("regname in", regname, dextra)
380 if dextra is not None:
381 is_a_duplicate = extra is not None # duplicate spotted
382 dextra = ('d', dextra, is_a_duplicate)
383 extras[dextra] = (idx, field, regname, rtype, imm)
384 log(" idx dst", idx, extra, extras[dextra])
385
386 # great! got the extra fields in their associated positions:
387 # also we know the register type. now to create the EXTRA encodings
388 etype = rm['Etype'] # Extra type: EXTRA3/EXTRA2
389 ptype = rm['Ptype'] # Predication type: Twin / Single
390 extra_bits = 0
391 v30b_newfields = []
392 for extra_idx, (idx, field, rname, rtype, iname) in extras.items():
393 # is it a field we don't alter/examine? if so just put it
394 # into newfields
395 if rtype is None:
396 v30b_newfields.append(field)
397 continue
398
399 # identify if this is a ld/st immediate(reg) thing
400 ldst_imm = "(" in field and field[-1] == ')'
401 if ldst_imm:
402 immed, field = field[:-1].split("(")
403
404 field, regmode = decode_reg(field, macros=macros)
405 log(" ", extra_idx, rname, rtype,
406 regmode, iname, field, end=" ")
407
408 # see Mode field https://libre-soc.org/openpower/sv/svp64/
409 # XXX TODO: the following is a bit of a laborious repeated
410 # mess, which could (and should) easily be parameterised.
411 # XXX also TODO: the LD/ST modes which are different
412 # https://libre-soc.org/openpower/sv/ldst/
413
414 # rright. SVP64 register numbering is from 0 to 127
415 # for GPRs, FPRs *and* CR Fields, where for v3.0 the GPRs and RPFs
416 # are 0-31 and CR Fields are only 0-7. the SVP64 RM "Extra"
417 # area is used to extend the numbering from the 32-bit
418 # instruction, and also to record whether the register
419 # is scalar or vector. on a per-operand basis. this
420 # results in a slightly finnicky encoding: here we go...
421
422 # encode SV-GPR and SV-FPR field into extra, v3.0field
423 if rtype in ['GPR', 'FPR']:
424 sv_extra, field = get_extra_gpr(etype, regmode, field)
425 # now sanity-check. EXTRA3 is ok, EXTRA2 has limits
426 # (and shrink to a single bit if ok)
427 if etype == 'EXTRA2':
428 if regmode == 'scalar':
429 # range is r0-r63 in increments of 1
430 assert (sv_extra >> 1) == 0, \
431 "scalar GPR %s cannot fit into EXTRA2 %s" % \
432 (rname, str(extras[extra_idx]))
433 # all good: encode as scalar
434 sv_extra = sv_extra & 0b01
435 else:
436 # range is r0-r127 in increments of 2 (r0 r2 ... r126)
437 assert sv_extra & 0b01 == 0, \
438 "%s: vector field %s cannot fit " \
439 "into EXTRA2 %s" % \
440 (insn, rname, str(extras[extra_idx]))
441 # all good: encode as vector (bit 2 set)
442 sv_extra = 0b10 | (sv_extra >> 1)
443 elif regmode == 'vector':
444 # EXTRA3 vector bit needs marking
445 sv_extra |= 0b100
446
447 # encode SV-CR 3-bit field into extra, v3.0field.
448 # 3-bit is for things like BF and BFA
449 elif rtype == 'CR_3bit':
450 sv_extra, field = crf_extra(etype, rname, extra_idx,
451 regmode, field, extras)
452
453 # encode SV-CR 5-bit field into extra, v3.0field
454 # 5-bit is for things like BA BB BC BT etc.
455 # *sigh* this is the same as 3-bit except the 2 LSBs of the
456 # 5-bit field are passed through unaltered.
457 elif rtype == 'CR_5bit':
458 cr_subfield = field & 0b11 # record bottom 2 bits for later
459 field = field >> 2 # strip bottom 2 bits
460 # use the exact same 3-bit function for the top 3 bits
461 sv_extra, field = crf_extra(etype, rname, extra_idx,
462 regmode, field, extras)
463 # reconstruct the actual 5-bit CR field (preserving the
464 # bottom 2 bits, unaltered)
465 field = (field << 2) | cr_subfield
466
467 else:
468 raise Exception("no type match: %s" % rtype)
469
470 # capture the extra field info
471 log("=>", "%5s" % bin(sv_extra), field)
472 extras[extra_idx] = sv_extra
473
474 # append altered field value to v3.0b, differs for LDST
475 # note that duplicates are skipped e.g. EXTRA2 contains
476 # *BOTH* s:RA *AND* d:RA which happens on LD/ST-with-update
477 srcdest, idx, duplicate = extra_idx
478 if duplicate: # skip adding to v3.0b fields, already added
479 continue
480 if ldst_imm:
481 v30b_newfields.append(("%s(%s)" % (immed, str(field))))
482 else:
483 v30b_newfields.append(str(field))
484
485 log("new v3.0B fields", v30b_op, v30b_newfields)
486 log("extras", extras)
487
488 # rright. now we have all the info. start creating SVP64 instruction.
489 svp64_insn = SVP64Instruction.pair(prefix=0, suffix=0)
490 svp64_prefix = svp64_insn.prefix
491 svp64_rm = svp64_insn.prefix.rm
492
493 # begin with EXTRA fields
494 for idx, sv_extra in extras.items():
495 log(idx)
496 if idx is None:
497 continue
498 if idx[0] == 'imm':
499 continue
500 srcdest, idx, duplicate = idx
501 if etype == 'EXTRA2':
502 svp64_rm.extra2[idx] = sv_extra
503 else:
504 svp64_rm.extra3[idx] = sv_extra
505
506 # identify if the op is a LD/ST.
507 # see https://libre-soc.org/openpower/sv/ldst/
508 is_ldst = rm['mode'] in ['LDST_IDX', 'LDST_IMM']
509 is_ldst_idx = rm['mode'] == 'LDST_IDX'
510 is_ldst_imm = rm['mode'] == 'LDST_IMM'
511 is_ld = v30b_op.startswith("l") and is_ldst
512 is_st = v30b_op.startswith("s") and is_ldst
513
514 # branch-conditional or CR detection
515 is_bc = rm['mode'] == 'BRANCH'
516 is_cr = rm['mode'] == 'CROP'
517
518 # parts of svp64_rm
519 mmode = 0 # bit 0
520 pmask = 0 # bits 1-3
521 destwid = 0 # bits 4-5
522 srcwid = 0 # bits 6-7
523 subvl = 0 # bits 8-9
524 smask = 0 # bits 16-18 but only for twin-predication
525 mode = 0 # bits 19-23
526
527 mask_m_specified = False
528 has_pmask = False
529 has_smask = False
530
531 saturation = None
532 src_zero = 0
533 dst_zero = 0
534 sv_mode = None
535
536 mapreduce = False
537 reverse_gear = False
538 mapreduce_crm = False
539
540 predresult = False
541 failfirst = False
542 ldst_elstride = 0
543 ldst_postinc = 0
544 sea = False
545
546 vli = False
547 sea = False
548
549 # ok let's start identifying opcode augmentation fields
550 for encmode in opmodes:
551 # predicate mask (src and dest)
552 if encmode.startswith("m="):
553 pme = encmode
554 pmmode, pmask = decode_predicate(encmode[2:])
555 smmode, smask = pmmode, pmask
556 mmode = pmmode
557 mask_m_specified = True
558 # predicate mask (dest)
559 elif encmode.startswith("dm="):
560 pme = encmode
561 pmmode, pmask = decode_predicate(encmode[3:])
562 mmode = pmmode
563 has_pmask = True
564 # predicate mask (src, twin-pred)
565 elif encmode.startswith("sm="):
566 sme = encmode
567 smmode, smask = decode_predicate(encmode[3:])
568 mmode = smmode
569 has_smask = True
570 # vec2/3/4
571 elif encmode.startswith("vec"):
572 subvl = decode_subvl(encmode[3:])
573 # elwidth (both src and dest, like mask)
574 elif encmode.startswith("w="):
575 destwid = decode_elwidth(encmode[2:])
576 srcwid = decode_elwidth(encmode[2:])
577 # just dest width
578 elif encmode.startswith("dw="):
579 destwid = decode_elwidth(encmode[3:])
580 # just src width
581 elif encmode.startswith("sw="):
582 srcwid = decode_elwidth(encmode[3:])
583 # post-increment
584 elif encmode == 'pi':
585 ldst_postinc = 1
586 # in indexed mode, set sv_mode=0b00
587 assert is_ldst_imm is True
588 sv_mode = 0b00
589 # element-strided LD/ST
590 elif encmode == 'els':
591 ldst_elstride = 1
592 # in indexed mode, set sv_mode=0b01
593 if is_ldst_idx:
594 sv_mode = 0b01
595 # saturation
596 elif encmode == 'sats':
597 assert sv_mode is None
598 saturation = 1
599 sv_mode = 0b10
600 elif encmode == 'satu':
601 assert sv_mode is None
602 sv_mode = 0b10
603 saturation = 0
604 # predicate zeroing
605 elif encmode == 'zz': # TODO, a lot more checking on legality
606 dst_zero = 1 # NOT on cr_ops, that's RM[6]
607 src_zero = 1
608 elif encmode == 'sz':
609 src_zero = 1
610 elif encmode == 'dz':
611 dst_zero = 1
612 # failfirst
613 elif encmode.startswith("ff="):
614 assert sv_mode is None
615 if is_cr: # sigh, CROPs is different
616 sv_mode = 0b10
617 else:
618 sv_mode = 0b01
619 failfirst = decode_ffirst(encmode[3:])
620 assert sea is False, "cannot use failfirst with signed-address"
621 # predicate-result, interestingly same as fail-first
622 elif encmode.startswith("pr="):
623 assert sv_mode is None
624 sv_mode = 0b11
625 predresult = decode_ffirst(encmode[3:])
626 # map-reduce mode, reverse-gear
627 elif encmode == 'mrr':
628 assert sv_mode is None
629 sv_mode = 0b00
630 mapreduce = True
631 reverse_gear = True
632 # map-reduce mode
633 elif encmode == 'mr':
634 assert sv_mode is None
635 sv_mode = 0b00
636 mapreduce = True
637 elif encmode == 'crm': # CR on map-reduce
638 assert sv_mode is None
639 sv_mode = 0b00
640 mapreduce_crm = True
641 elif encmode == 'vli':
642 assert failfirst is not False, "VLi only allowed in failfirst"
643 vli = True
644 elif encmode == 'sea':
645 assert is_ldst_idx
646 sea = True
647 assert failfirst is False, "cannot use ffirst+signed-address"
648 elif is_bc:
649 if encmode == 'all':
650 svp64_rm.branch.ALL = 1
651 elif encmode == 'snz':
652 svp64_rm.branch.sz = 1
653 svp64_rm.branch.SNZ = 1
654 elif encmode == 'sl':
655 svp64_rm.branch.SL = 1
656 elif encmode == 'slu':
657 svp64_rm.branch.SLu = 1
658 elif encmode == 'lru':
659 svp64_rm.branch.LRu = 1
660 elif encmode == 'vs':
661 svp64_rm.branch.VLS = 1
662 elif encmode == 'vsi':
663 svp64_rm.branch.VLS = 1
664 svp64_rm.branch.vls.VLi = 1
665 elif encmode == 'vsb':
666 svp64_rm.branch.VLS = 1
667 svp64_rm.branch.vls.VSb = 1
668 elif encmode == 'vsbi':
669 svp64_rm.branch.VLS = 1
670 svp64_rm.branch.vls.VSb = 1
671 svp64_rm.branch.vls.VLi = 1
672 elif encmode == 'ctr':
673 svp64_rm.branch.CTR = 1
674 elif encmode == 'cti':
675 svp64_rm.branch.CTR = 1
676 svp64_rm.branch.ctr.CTi = 1
677 else:
678 raise AssertionError("unknown encmode %s" % encmode)
679 else:
680 raise AssertionError("unknown encmode %s" % encmode)
681
682 # post-inc only available on ld-with-update
683 if ldst_postinc:
684 assert "u" in opcode, "/pi only available on ld/st-update"
685
686 # sanity check if dz/zz used in branch-mode
687 if is_bc and dst_zero:
688 raise AssertionError("dz/zz not supported in branch, use 'sz'")
689
690 # check sea *after* all qualifiers are evaluated
691 if sea:
692 assert sv_mode in (None, 0b00, 0b01)
693
694 if ptype == '2P':
695 # since m=xx takes precedence (overrides) sm=xx and dm=xx,
696 # treat them as mutually exclusive
697 if mask_m_specified:
698 assert not has_smask,\
699 "cannot have both source-mask and predicate mask"
700 assert not has_pmask,\
701 "cannot have both dest-mask and predicate mask"
702 # since the default is INT predication (ALWAYS), if you
703 # specify one CR mask, you must specify both, to avoid
704 # mixing INT and CR reg types
705 if has_pmask and pmmode == 1:
706 assert has_smask, \
707 "need explicit source-mask in CR twin predication"
708 if has_smask and smmode == 1:
709 assert has_pmask, \
710 "need explicit dest-mask in CR twin predication"
711 # sanity-check that 2Pred mask is same mode
712 if has_pmask and has_smask:
713 assert smmode == pmmode, \
714 "predicate masks %s and %s must be same reg type" % \
715 (pme, sme)
716
717 # sanity-check that twin-predication mask only specified in 2P mode
718 if ptype == '1P':
719 assert not has_smask, \
720 "source-mask can only be specified on Twin-predicate ops"
721 assert not has_pmask, \
722 "dest-mask can only be specified on Twin-predicate ops"
723
724 # construct the mode field, doing sanity-checking along the way
725 if src_zero:
726 assert has_smask or mask_m_specified, \
727 "src zeroing requires a source predicate"
728 if dst_zero:
729 assert has_pmask or mask_m_specified, \
730 "dest zeroing requires a dest predicate"
731
732 # okaaay, so there are 4 different modes, here, which will be
733 # partly-merged-in: is_ldst is merged in with "normal", but
734 # is_bc is so different it's done separately. likewise is_cr
735 # (when it is done). here are the maps:
736
737 # for "normal" arithmetic: https://libre-soc.org/openpower/sv/normal/
738 """
739 | 0-1 | 2 | 3 4 | description |
740 | --- | --- |---------|-------------------------- |
741 | 00 | 0 | dz sz | simple mode |
742 | 00 | 1 | 0 RG | scalar reduce mode (mapreduce) |
743 | 01 | inv | CR-bit | Rc=1: ffirst CR sel |
744 | 01 | inv | VLi RC1 | Rc=0: ffirst z/nonz |
745 | 10 | N | dz sz | sat mode: N=0/1 u/s |
746 | 11 | inv | CR-bit | Rc=1: pred-result CR sel |
747 | 11 | inv | zz RC1 | Rc=0: pred-result z/nonz |
748 """
749
750 # https://libre-soc.org/openpower/sv/ldst/
751 # for LD/ST-immediate:
752 """
753 | 0-1 | 2 | 3 4 | description |
754 | --- | --- |---------|--------------------------- |
755 | 00 | 0 | zz els | normal mode |
756 | 00 | 1 | pi lf | post-inc, LD-fault-first |
757 | 01 | inv | CR-bit | Rc=1: ffirst CR sel |
758 | 01 | inv | els RC1 | Rc=0: ffirst z/nonz |
759 | 10 | N | zz els | sat mode: N=0/1 u/s |
760 | 11 | inv | CR-bit | Rc=1: pred-result CR sel |
761 | 11 | inv | els RC1 | Rc=0: pred-result z/nonz |
762 """
763
764 # for LD/ST-indexed (RA+RB):
765 """
766 | 0-1 | 2 | 3 4 | description |
767 | --- | --- |---------|----------------------------- |
768 | 00 | SEA | dz sz | normal mode |
769 | 01 | SEA | dz sz | strided (scalar only source) |
770 | 10 | N | dz sz | sat mode: N=0/1 u/s |
771 | 11 | inv | CR-bit | Rc=1: pred-result CR sel |
772 | 11 | inv | dz RC1 | Rc=0: pred-result z/nonz |
773 """
774
775 # and leaving out branches and cr_ops for now because they're
776 # under development
777 """ TODO branches and cr_ops
778 """
779
780 if is_bc:
781 sv_mode = int(svp64_rm.mode[0, 1])
782 if src_zero:
783 svp64_rm.branch.sz = 1
784
785 else:
786 ######################################
787 # "element-strided" mode, ldst_idx
788 if sv_mode == 0b01 and is_ldst_idx:
789 mode |= src_zero << SVP64MODE.SZ # predicate zeroing
790 mode |= dst_zero << SVP64MODE.DZ # predicate zeroing
791 mode |= sea << SVP64MODE.SEA # el-strided
792
793 ######################################
794 # "normal" mode
795 elif sv_mode is None:
796 mode |= src_zero << SVP64MODE.SZ # predicate zeroing
797 mode |= dst_zero << SVP64MODE.DZ # predicate zeroing
798 if is_ldst:
799 # TODO: for now, LD/ST-indexed is ignored.
800 mode |= ldst_elstride << SVP64MODE.ELS_NORMAL # el-strided
801 else:
802 # TODO, reduce and subvector mode
803 # 00 1 dz CRM reduce mode (mapreduce), SUBVL=1
804 # 00 1 SVM CRM subvector reduce mode, SUBVL>1
805 pass
806 sv_mode = 0b00
807
808 ######################################
809 # ldst-immediate "post" (and "load-fault-first" modes)
810 elif sv_mode == 0b00 and ldst_postinc == 1: # (or ldst_ld_ffirst)
811 mode |= (0b1 << SVP64MODE.LDI_POST) # sets bit 2
812 mode |= (ldst_postinc << SVP64MODE.LDI_PI) # sets post-inc
813
814 ######################################
815 # "mapreduce" modes
816 elif sv_mode == 0b00:
817 mode |= (0b1 << SVP64MODE.REDUCE) # sets mapreduce
818 assert dst_zero == 0, "dest-zero not allowed in mapreduce mode"
819 if reverse_gear:
820 mode |= (0b1 << SVP64MODE.RG) # sets Reverse-gear mode
821 if mapreduce_crm:
822 mode |= (0b1 << SVP64MODE.CRM) # sets CRM mode
823 assert rc_mode, "CRM only allowed when Rc=1"
824 # bit of weird encoding to jam zero-pred or SVM mode in.
825 # SVM mode can be enabled only when SUBVL=2/3/4 (vec2/3/4)
826 if subvl == 0:
827 mode |= dst_zero << SVP64MODE.DZ # predicate zeroing
828
829 ######################################
830 # "failfirst" modes
831 elif failfirst is not False and not is_cr: # sv_mode == 0b01:
832 assert src_zero == 0, "dest-zero not allowed in failfirst mode"
833 if failfirst == 'RC1':
834 mode |= (0b1 << SVP64MODE.RC1) # sets RC1 mode
835 mode |= (dst_zero << SVP64MODE.DZ) # predicate dst-zeroing
836 assert rc_mode == False, "ffirst RC1 only ok when Rc=0"
837 elif failfirst == '~RC1':
838 mode |= (0b1 << SVP64MODE.RC1) # sets RC1 mode
839 mode |= (dst_zero << SVP64MODE.DZ) # predicate dst-zeroing
840 mode |= (0b1 << SVP64MODE.INV) # ... with inversion
841 assert rc_mode == False, "ffirst RC1 only ok when Rc=0"
842 else:
843 assert dst_zero == 0, "dst-zero not allowed in ffirst BO"
844 assert rc_mode, "ffirst BO only possible when Rc=1"
845 mode |= (failfirst << SVP64MODE.BO_LSB) # set BO
846
847 # (crops is really different)
848 elif failfirst is not False and is_cr:
849 if failfirst in ['RC1', '~RC1']:
850 mode |= (src_zero << SVP64MODE.SZ) # predicate src-zeroing
851 mode |= (dst_zero << SVP64MODE.DZ) # predicate dst-zeroing
852 if failfirst == '~RC1':
853 mode |= (0b1 << SVP64MODE.INV) # ... with inversion
854 else:
855 assert dst_zero == src_zero, "dz must equal sz in ffirst BO"
856 mode |= (failfirst << SVP64MODE.BO_LSB) # set BO
857 svp64_rm.cr_op.zz = dst_zero
858 if vli:
859 sv_mode |= 1 # set VLI in LSB of 2-bit mode
860 #svp64_rm.cr_op.vli = 1
861
862 ######################################
863 # "saturation" modes
864 elif sv_mode == 0b10:
865 mode |= src_zero << SVP64MODE.SZ # predicate zeroing
866 mode |= dst_zero << SVP64MODE.DZ # predicate zeroing
867 mode |= (saturation << SVP64MODE.N) # signed/us saturation
868
869 ######################################
870 # "predicate-result" modes. err... code-duplication from ffirst
871 elif sv_mode == 0b11:
872 assert src_zero == 0, "dest-zero not allowed in predresult mode"
873 if predresult == 'RC1':
874 mode |= (0b1 << SVP64MODE.RC1) # sets RC1 mode
875 mode |= (dst_zero << SVP64MODE.DZ) # predicate dst-zeroing
876 assert rc_mode == False, "pr-mode RC1 only ok when Rc=0"
877 elif predresult == '~RC1':
878 mode |= (0b1 << SVP64MODE.RC1) # sets RC1 mode
879 mode |= (dst_zero << SVP64MODE.DZ) # predicate dst-zeroing
880 mode |= (0b1 << SVP64MODE.INV) # ... with inversion
881 assert rc_mode == False, "pr-mode RC1 only ok when Rc=0"
882 else:
883 assert dst_zero == 0, "dst-zero not allowed in pr-mode BO"
884 assert rc_mode, "pr-mode BO only possible when Rc=1"
885 mode |= (predresult << SVP64MODE.BO_LSB) # set BO
886
887 # whewww.... modes all done :)
888 # now put into svp64_rm, but respect MSB0 order
889 if sv_mode & 1:
890 mode |= (0b1 << SVP64MODE.MOD2_LSB)
891 if sv_mode & 2:
892 mode |= (0b1 << SVP64MODE.MOD2_MSB)
893
894 if sea:
895 mode |= (0b1 << SVP64MODE.SEA)
896
897 # this is a mess. really look forward to replacing it with Insn DB
898 if not is_bc:
899 svp64_rm.mode = mode # mode: bits 19-23
900 if vli and not is_cr:
901 svp64_rm.normal.ffrc0.VLi = 1
902
903 # put in predicate masks into svp64_rm
904 if ptype == '2P':
905 svp64_rm.smask = smask # source pred: bits 16-18
906
907 # put in elwidths unless cr
908 if not is_cr:
909 svp64_rm.ewsrc = srcwid # srcwid: bits 6-7
910 svp64_rm.elwidth = destwid # destwid: bits 4-5
911
912 svp64_rm.mmode = mmode # mask mode: bit 0
913 svp64_rm.mask = pmask # 1-pred: bits 1-3
914 svp64_rm.subvl = subvl # and subvl: bits 8-9
915
916 # nice debug printout. (and now for something completely different)
917 # https://youtu.be/u0WOIwlXE9g?t=146
918 svp64_rm_value = int(svp64_rm)
919 log("svp64_rm", hex(svp64_rm_value), bin(svp64_rm_value))
920 log(" mmode 0 :", bin(mmode))
921 log(" pmask 1-3 :", bin(pmask))
922 log(" dstwid 4-5 :", bin(destwid))
923 log(" srcwid 6-7 :", bin(srcwid))
924 log(" subvl 8-9 :", bin(subvl))
925 log(" mode 19-23:", bin(svp64_rm.mode))
926 offs = 2 if etype == 'EXTRA2' else 3 # 2 or 3 bits
927 for idx, sv_extra in extras.items():
928 if idx is None:
929 continue
930 if idx[0] == 'imm':
931 continue
932 srcdest, idx, duplicate = idx
933 start = (10+idx*offs)
934 end = start + offs-1
935 log(" extra%d %2d-%2d:" % (idx, start, end),
936 bin(sv_extra))
937 if ptype == '2P':
938 log(" smask 16-17:", bin(smask))
939 log()
940
941 # update prefix PO and ID (aka PID)
942 svp64_prefix.PO = 0x1
943 svp64_prefix.id = 0b11
944
945 # fiinally yield the svp64 prefix and the thingy. v3.0b opcode
946 rc = '.' if rc_mode else ''
947 yield ".long 0x%08x" % int(svp64_prefix)
948 log(v30b_op, v30b_newfields)
949
950 v30b_op_rc = v30b_op
951 if not v30b_op.endswith('.'):
952 v30b_op_rc += rc
953
954 record = None
955 if os.environ.get("INSNDB"):
956 record = DB[opcode]
957 if record is not None:
958 insn = WordInstruction.assemble(db=DB,
959 entry=opcode, arguments=fields)
960 yield " ".join((
961 f".long 0x{int(insn):08X}",
962 "#",
963 opcode,
964 ",".join(fields),
965 ))
966 else:
967 if not v30b_op.endswith('.'):
968 v30b_op += rc
969 yield "%s %s" % (v30b_op, ", ".join(v30b_newfields))
970 for (name, span) in svp64_insn.traverse("SVP64"):
971 value = svp64_insn.storage[span]
972 log(name, f"{value.value:0{value.bits}b}", span)
973 log("new v3.0B fields", v30b_op, v30b_newfields)
974
975 def translate(self, lst):
976 for insn in lst:
977 yield from self.translate_one(insn)
978
979
980 def macro_subst(macros, txt):
981 again = True
982 log("subst", txt, macros)
983 while again:
984 again = False
985 for macro, value in macros.items():
986 if macro == txt:
987 again = True
988 replaced = txt.replace(macro, value)
989 log("macro", txt, "replaced", replaced, macro, value)
990 txt = replaced
991 continue
992 toreplace = '%s.s' % macro
993 if toreplace == txt:
994 again = True
995 replaced = txt.replace(toreplace, "%s.s" % value)
996 log("macro", txt, "replaced", replaced, toreplace, value)
997 txt = replaced
998 continue
999 toreplace = '%s.v' % macro
1000 if toreplace == txt:
1001 again = True
1002 replaced = txt.replace(toreplace, "%s.v" % value)
1003 log("macro", txt, "replaced", replaced, toreplace, value)
1004 txt = replaced
1005 continue
1006 toreplace = '*%s' % macro
1007 if toreplace in txt:
1008 again = True
1009 replaced = txt.replace(toreplace, '*%s' % value)
1010 log("macro", txt, "replaced", replaced, toreplace, value)
1011 txt = replaced
1012 continue
1013 toreplace = '(%s)' % macro
1014 if toreplace in txt:
1015 again = True
1016 replaced = txt.replace(toreplace, '(%s)' % value)
1017 log("macro", txt, "replaced", replaced, toreplace, value)
1018 txt = replaced
1019 continue
1020 log(" processed", txt)
1021 return txt
1022
1023
1024 def get_ws(line):
1025 # find whitespace
1026 ws = ''
1027 while line:
1028 if not line[0].isspace():
1029 break
1030 ws += line[0]
1031 line = line[1:]
1032 return ws, line
1033
1034
1035 def asm_process():
1036 # get an input file and an output file
1037 args = sys.argv[1:]
1038 if len(args) == 0:
1039 infile = sys.stdin
1040 outfile = sys.stdout
1041 # read the whole lot in advance in case of in-place
1042 lines = list(infile.readlines())
1043 elif len(args) != 2:
1044 print("pysvp64asm [infile | -] [outfile | -]", file=sys.stderr)
1045 exit(0)
1046 else:
1047 if args[0] == '--':
1048 infile = sys.stdin
1049 else:
1050 infile = open(args[0], "r")
1051 # read the whole lot in advance in case of in-place overwrite
1052 lines = list(infile.readlines())
1053
1054 if args[1] == '--':
1055 outfile = sys.stdout
1056 else:
1057 outfile = open(args[1], "w")
1058
1059 # read the line, look for custom insn, process it
1060 macros = {} # macros which start ".set"
1061 isa = SVP64Asm([])
1062 for line in lines:
1063 op = line.split("#")[0].strip()
1064 # identify macros
1065 if op.startswith(".set"):
1066 macro = op[4:].split(",")
1067 (macro, value) = map(str.strip, macro)
1068 macros[macro] = value
1069
1070 if not op or op.startswith("#"):
1071 outfile.write(line)
1072 continue
1073 (ws, line) = get_ws(line)
1074 lst = isa.translate_one(op, macros)
1075 lst = '; '.join(lst)
1076 outfile.write("%s%s # %s\n" % (ws, lst, op))
1077
1078
1079 if __name__ == '__main__':
1080 lst = ['slw 3, 1, 4',
1081 'extsw 5, 3',
1082 'sv.extsw 5, 3',
1083 'sv.cmpi 5, 1, 3, 2',
1084 'sv.setb 5, 31',
1085 'sv.isel 64.v, 3, 2, 65.v',
1086 'sv.setb/dm=r3/sm=1<<r3 5, 31',
1087 'sv.setb/m=r3 5, 31',
1088 'sv.setb/vec2 5, 31',
1089 'sv.setb/sw=8/ew=16 5, 31',
1090 'sv.extsw./ff=eq 5, 31',
1091 'sv.extsw./satu/sz/dz/sm=r3/dm=r3 5, 31',
1092 'sv.extsw./pr=eq 5.v, 31',
1093 'sv.add. 5.v, 2.v, 1.v',
1094 'sv.add./m=r3 5.v, 2.v, 1.v',
1095 ]
1096 lst += [
1097 'sv.stw 5.v, 4(1.v)',
1098 'sv.ld 5.v, 4(1.v)',
1099 'setvl. 2, 3, 4, 0, 1, 1',
1100 'sv.setvl. 2, 3, 4, 0, 1, 1',
1101 ]
1102 lst = [
1103 "sv.stfsu 0.v, 16(4.v)",
1104 ]
1105 lst = [
1106 "sv.stfsu/els 0.v, 16(4)",
1107 ]
1108 lst = [
1109 'sv.add./mr 5.v, 2.v, 1.v',
1110 ]
1111 macros = {'win2': '50', 'win': '60'}
1112 lst = [
1113 'sv.addi win2.v, win.v, -1',
1114 'sv.add./mrr 5.v, 2.v, 1.v',
1115 #'sv.lhzsh 5.v, 11(9.v), 15',
1116 #'sv.lwzsh 5.v, 11(9.v), 15',
1117 'sv.ffmadds 6.v, 2.v, 4.v, 6.v',
1118 ]
1119 lst = [
1120 #'sv.fmadds 0.v, 8.v, 16.v, 4.v',
1121 #'sv.ffadds 0.v, 8.v, 4.v',
1122 'svremap 11, 0, 1, 2, 3, 2, 1',
1123 'svshape 8, 1, 1, 1, 0',
1124 'svshape 8, 1, 1, 1, 1',
1125 ]
1126 lst = [
1127 #'sv.lfssh 4.v, 11(8.v), 15',
1128 #'sv.lwzsh 4.v, 11(8.v), 15',
1129 #'sv.svstep. 2.v, 4, 0',
1130 #'sv.fcfids. 48.v, 64.v',
1131 'sv.fcoss. 80.v, 0.v',
1132 'sv.fcoss. 20.v, 0.v',
1133 ]
1134 lst = [
1135 'sv.bc/all 3,12,192',
1136 'sv.bclr/vsbi 3,81.v,192',
1137 'sv.ld 5.v, 4(1.v)',
1138 'sv.svstep. 2.v, 4, 0',
1139 ]
1140 lst = [
1141 'maxs 3,12,5',
1142 'maxs. 3,12,5',
1143 'avgadd 3,12,5',
1144 'absdu 3,12,5',
1145 'absds 3,12,5',
1146 'absdacu 3,12,5',
1147 'absdacs 3,12,5',
1148 'cprop 3,12,5',
1149 'svindex 0,0,1,0,0,0,0',
1150 ]
1151 lst = [
1152 'sv.svstep./m=r3 2.v, 4, 0',
1153 'ternlogi 0,0,0,0x5',
1154 'fmvis 5,65535',
1155 'fmvis 5,1',
1156 'fmvis 5,2',
1157 'fmvis 5,4',
1158 'fmvis 5,8',
1159 'fmvis 5,16',
1160 'fmvis 5,32',
1161 'fmvis 5,64',
1162 'fmvis 5,32768',
1163 ]
1164 lst = [
1165 'sv.andi. *80, *80, 1',
1166 'sv.ffmadds. 6.v, 2.v, 4.v, 6.v', # incorrectly inserted 32-bit op
1167 'sv.ffmadds 6.v, 2.v, 4.v, 6.v', # correctly converted to .long
1168 'svshape2 8, 1, 31, 7, 1, 1',
1169 'sv.ld 5.v, 4(1.v)',
1170 'sv.stw 5.v, 4(1.v)',
1171 'sv.bc/all 3,12,192',
1172 'pcdec. 0,0,0,0',
1173 ]
1174 lst = [
1175 #"sv.cmp/ff=gt *0,*1,*2,0",
1176 "dsld 5,4,5,3",
1177
1178 ]
1179 isa = SVP64Asm(lst, macros=macros)
1180 log("list:\n", "\n\t".join(list(isa)))
1181 # running svp64.py is designed to test hard-coded lists
1182 # (above) - which strictly speaking should all be unit tests.
1183 # if you need to actually do assembler translation at the
1184 # commandline use "pysvp64asm" - see setup.py
1185 # XXX NO. asm_process()