1e38dc14a76ed4c77b2afa858548f52fdcfb7998
[openpower-isa.git] / src / openpower / sv / trans / svp64.py
1 # SPDX-License-Identifier: LGPLv3+
2 # Copyright (C) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
3 # Funded by NLnet http://nlnet.nl
4
5 """SVP64 OpenPOWER v3.0B assembly translator
6
7 This class takes raw svp64 assembly mnemonics (aliases excluded) and creates
8 an EXT001-encoded "svp64 prefix" (as a .long) followed by a v3.0B opcode.
9
10 It is very simple and straightforward, the only weirdness being the
11 extraction of the register information and conversion to v3.0B numbering.
12
13 Encoding format of svp64: https://libre-soc.org/openpower/sv/svp64/
14 Encoding format of arithmetic: https://libre-soc.org/openpower/sv/normal/
15 Encoding format of LDST: https://libre-soc.org/openpower/sv/ldst/
16 **TODO format of branches: https://libre-soc.org/openpower/sv/branches/**
17 **TODO format of CRs: https://libre-soc.org/openpower/sv/cr_ops/**
18 Bugtracker: https://bugs.libre-soc.org/show_bug.cgi?id=578
19 """
20
21 import functools
22 import os
23 import sys
24 from collections import OrderedDict
25 import inspect
26
27 from openpower.decoder.pseudo.pagereader import ISA
28 from openpower.decoder.power_svp64 import SVP64RM, get_regtype, decode_extra
29 from openpower.decoder.selectable_int import SelectableInt
30 from openpower.consts import SVP64MODE
31 from openpower.decoder.power_insn import SVP64Instruction
32 from openpower.decoder.power_insn import Database
33 from openpower.decoder.power_insn import WordInstruction
34 from openpower.decoder.power_enums import find_wiki_dir
35
36 # for debug logging
37 from openpower.util import log
38
39
40 # decode GPR into sv extra
41 def get_extra_gpr(etype, regmode, field):
42 if regmode == 'scalar':
43 # cut into 2-bits 5-bits SS FFFFF
44 sv_extra = field >> 5
45 field = field & 0b11111
46 else:
47 # cut into 5-bits 2-bits FFFFF SS
48 sv_extra = field & 0b11
49 field = field >> 2
50 return sv_extra, field
51
52
53 # decode 3-bit CR into sv extra
54 def get_extra_cr_3bit(etype, regmode, field):
55 if regmode == 'scalar':
56 # cut into 2-bits 3-bits SS FFF
57 sv_extra = field >> 3
58 field = field & 0b111
59 else:
60 # cut into 3-bits 4-bits FFF SSSS but will cut 2 zeros off later
61 sv_extra = field & 0b1111
62 field = field >> 4
63 return sv_extra, field
64
65
66 # decodes SUBVL
67 def decode_subvl(encoding):
68 pmap = {'2': 0b01, '3': 0b10, '4': 0b11}
69 assert encoding in pmap, \
70 "encoding %s for SUBVL not recognised" % encoding
71 return pmap[encoding]
72
73
74 # decodes elwidth
75 def decode_elwidth(encoding):
76 pmap = {'8': 0b11, '16': 0b10, '32': 0b01}
77 assert encoding in pmap, \
78 "encoding %s for elwidth not recognised" % encoding
79 return pmap[encoding]
80
81
82 # decodes predicate register encoding
83 def decode_predicate(encoding):
84 pmap = { # integer
85 '1<<r3': (0, 0b001),
86 'r3': (0, 0b010),
87 '~r3': (0, 0b011),
88 'r10': (0, 0b100),
89 '~r10': (0, 0b101),
90 'r30': (0, 0b110),
91 '~r30': (0, 0b111),
92 # CR
93 'lt': (1, 0b000),
94 'nl': (1, 0b001), 'ge': (1, 0b001), # same value
95 'gt': (1, 0b010),
96 'ng': (1, 0b011), 'le': (1, 0b011), # same value
97 'eq': (1, 0b100),
98 'ne': (1, 0b101),
99 'so': (1, 0b110), 'un': (1, 0b110), # same value
100 'ns': (1, 0b111), 'nu': (1, 0b111), # same value
101 }
102 assert encoding in pmap, \
103 "encoding %s for predicate not recognised" % encoding
104 return pmap[encoding]
105
106
107 # decodes "Mode" in similar way to BO field (supposed to, anyway)
108 def decode_bo(encoding):
109 pmap = { # TODO: double-check that these are the same as Branch BO
110 'lt': 0b000,
111 'nl': 0b001, 'ge': 0b001, # same value
112 'gt': 0b010,
113 'ng': 0b011, 'le': 0b011, # same value
114 'eq': 0b100,
115 'ne': 0b101,
116 'so': 0b110, 'un': 0b110, # same value
117 'ns': 0b111, 'nu': 0b111, # same value
118 }
119 assert encoding in pmap, \
120 "encoding %s for BO Mode not recognised" % encoding
121 # barse-ackwards MSB0/LSB0. sigh. this would be nice to be the
122 # same as the decode_predicate() CRfield table above, but (inv,CRbit)
123 # is how it is in the spec [decode_predicate is (CRbit,inv)]
124 mapped = pmap[encoding]
125 si = SelectableInt(0, 3)
126 si[0] = mapped & 1 # inv
127 si[1:3] = mapped >> 1 # CR
128 return int(si)
129
130
131 # partial-decode fail-first mode
132 def decode_ffirst(encoding):
133 if encoding in ['RC1', '~RC1']:
134 return encoding
135 return decode_bo(encoding)
136
137
138 def decode_reg(field, macros=None):
139 if macros is None:
140 macros = {}
141 # decode the field number. "5.v" or "3.s" or "9"
142 # and now also "*0", and "*%0". note: *NOT* to add "*%rNNN" etc.
143 # https://bugs.libre-soc.org/show_bug.cgi?id=884#c0
144 if field.startswith(("*%", "*")):
145 if field.startswith("*%"):
146 field = field[2:]
147 else:
148 field = field[1:]
149 while field in macros:
150 field = macros[field]
151 return int(field), "vector" # actual register number
152
153 # try old convention (to be retired)
154 field = field.split(".")
155 regmode = 'scalar' # default
156 if len(field) == 2:
157 if field[1] == 's':
158 regmode = 'scalar'
159 elif field[1] == 'v':
160 regmode = 'vector'
161 field = int(field[0]) # actual register number
162 return field, regmode
163
164
165 def decode_imm(field):
166 ldst_imm = "(" in field and field[-1] == ')'
167 if ldst_imm:
168 return field[:-1].split("(")
169 else:
170 return None, field
171
172
173 def crf_extra(etype, rname, extra_idx, regmode, field, extras):
174 """takes a CR Field number (CR0-CR127), splits into EXTRA2/3 and v3.0
175 the scalar/vector mode (crNN.v or crNN.s) changes both the format
176 of the EXTRA2/3 encoding as well as what range of registers is possible.
177 this function can be used for both BF/BFA and BA/BB/BT by first removing
178 the bottom 2 bits of BA/BB/BT then re-instating them after encoding.
179 see https://libre-soc.org/openpower/sv/svp64/appendix/#cr_extra
180 for specification
181 """
182 sv_extra, field = get_extra_cr_3bit(etype, regmode, field)
183 # now sanity-check (and shrink afterwards)
184 if etype == 'EXTRA2':
185 # 3-bit CR Field (BF, BFA) EXTRA2 encoding
186 if regmode == 'scalar':
187 # range is CR0-CR15 in increments of 1
188 assert (sv_extra >> 1) == 0, \
189 "scalar CR %s cannot fit into EXTRA2 %s" % \
190 (rname, str(extras[extra_idx]))
191 # all good: encode as scalar
192 sv_extra = sv_extra & 0b01
193 else: # vector
194 # range is CR0-CR127 in increments of 16
195 assert sv_extra & 0b111 == 0, \
196 "vector CR %s cannot fit into EXTRA2 %s" % \
197 (rname, str(extras[extra_idx]))
198 # all good: encode as vector (bit 2 set)
199 sv_extra = 0b10 | (sv_extra >> 3)
200 else:
201 # 3-bit CR Field (BF, BFA) EXTRA3 encoding
202 if regmode == 'scalar':
203 # range is CR0-CR31 in increments of 1
204 assert (sv_extra >> 2) == 0, \
205 "scalar CR %s cannot fit into EXTRA3 %s" % \
206 (rname, str(extras[extra_idx]))
207 # all good: encode as scalar
208 sv_extra = sv_extra & 0b11
209 else: # vector
210 # range is CR0-CR127 in increments of 8
211 assert sv_extra & 0b11 == 0, \
212 "vector CR %s cannot fit into EXTRA3 %s" % \
213 (rname, str(extras[extra_idx]))
214 # all good: encode as vector (bit 3 set)
215 sv_extra = 0b100 | (sv_extra >> 2)
216 return sv_extra, field
217
218
219 def to_number(field):
220 if field.startswith("0x"):
221 return eval(field)
222 if field.startswith("0b"):
223 return eval(field)
224 return int(field)
225
226
227 DB = Database(find_wiki_dir())
228
229
230 # decodes svp64 assembly listings and creates EXT001 svp64 prefixes
231 class SVP64Asm:
232 def __init__(self, lst, bigendian=False, macros=None):
233 if macros is None:
234 macros = {}
235 self.macros = macros
236 self.lst = lst
237 self.trans = self.translate(lst)
238 self.isa = ISA() # reads the v3.0B pseudo-code markdown files
239 self.svp64 = SVP64RM() # reads the svp64 Remap entries for registers
240 assert bigendian == False, "error, bigendian not supported yet"
241
242 def __iter__(self):
243 yield from self.trans
244
245 def translate_one(self, insn, macros=None):
246 if macros is None:
247 macros = {}
248 macros.update(self.macros)
249 isa = self.isa
250 svp64 = self.svp64
251 insn_no_comments = insn.partition('#')[0]
252 # find first space, to get opcode
253 ls = insn_no_comments.split()
254 opcode = ls[0]
255 # now find opcode fields
256 fields = ''.join(ls[1:]).split(',')
257 mfields = list(filter(bool, map(str.strip, fields)))
258 log("opcode, fields", ls, opcode, mfields)
259 fields = []
260 # macro substitution
261 for field in mfields:
262 fields.append(macro_subst(macros, field))
263 log("opcode, fields substed", ls, opcode, fields)
264
265 # identify if it is a word instruction
266 record = None
267 if os.environ.get("INSNDB"):
268 record = DB[opcode]
269 if record is not None:
270 insn = WordInstruction.assemble(db=DB,
271 opcode=opcode, arguments=fields)
272 yield " ".join((
273 f".long 0x{int(insn):08X}",
274 "#",
275 opcode,
276 ",".join(fields),
277 ))
278 return
279
280 # identify if is a svp64 mnemonic
281 if not opcode.startswith('sv.'):
282 yield insn # unaltered
283 return
284 opcode = opcode[3:] # strip leading "sv"
285
286 # start working on decoding the svp64 op: sv.basev30Bop/vec2/mode
287 opmodes = opcode.split("/") # split at "/"
288 v30b_op_orig = opmodes.pop(0) # first is the v3.0B
289 # check instruction ends with dot
290 rc_mode = v30b_op_orig.endswith('.')
291 if rc_mode:
292 v30b_op = v30b_op_orig[:-1]
293 else:
294 v30b_op = v30b_op_orig
295
296 record = None
297 if os.environ.get("INSNDB"):
298 record = DB[v30b_op]
299 if record is not None:
300 insn = SVP64Instruction.assemble(db=DB,
301 opcode=v30b_op_orig,
302 arguments=fields,
303 specifiers=opmodes)
304 prefix = int(insn.prefix)
305 suffix = int(insn.suffix)
306 yield " ".join((
307 f".long 0x{prefix:08X};",
308 f".long 0x{suffix:08X};",
309 "#",
310 opcode,
311 ",".join(fields),
312 ))
313 return
314
315 # look up the 32-bit op (original, with "." if it has it)
316 if v30b_op_orig in isa.instr:
317 isa_instr = isa.instr[v30b_op_orig]
318 else:
319 raise Exception("opcode %s of '%s' not supported" %
320 (v30b_op_orig, insn))
321
322 # look up the svp64 op, first the original (with "." if it has it)
323 if v30b_op_orig in svp64.instrs:
324 rm = svp64.instrs[v30b_op_orig] # one row of the svp64 RM CSV
325 # then without the "." (if there was one)
326 elif v30b_op in svp64.instrs:
327 rm = svp64.instrs[v30b_op] # one row of the svp64 RM CSV
328 else:
329 raise Exception(f"opcode {v30b_op_orig!r} of "
330 f"{insn!r} not an svp64 instruction")
331
332 # get regs info e.g. "RT,RA,RB"
333 v30b_regs = isa_instr.regs[0]
334 log("v3.0B op", v30b_op, "Rc=1" if rc_mode else '')
335 log("v3.0B regs", opcode, v30b_regs)
336 log("RM", rm)
337
338 # right. the first thing to do is identify the ordering of
339 # the registers, by name. the EXTRA2/3 ordering is in
340 # rm['0']..rm['3'] but those fields contain the names RA, BB
341 # etc. we have to read the pseudocode to understand which
342 # reg is which in our instruction. sigh.
343
344 # first turn the svp64 rm into a "by name" dict, recording
345 # which position in the RM EXTRA it goes into
346 # also: record if the src or dest was a CR, for sanity-checking
347 # (elwidth overrides on CRs are banned)
348 decode = decode_extra(rm)
349 dest_reg_cr, src_reg_cr, svp64_src, svp64_dest = decode
350
351 log("EXTRA field index, src", svp64_src)
352 log("EXTRA field index, dest", svp64_dest)
353
354 # okaaay now we identify the field value (opcode N,N,N) with
355 # the pseudo-code info (opcode RT, RA, RB)
356 assert len(fields) == len(v30b_regs), \
357 "length of fields %s must match insn `%s` fields %s" % \
358 (str(v30b_regs), insn, str(fields))
359 opregfields = zip(fields, v30b_regs) # err that was easy
360
361 # now for each of those find its place in the EXTRA encoding
362 # note there is the possibility (for LD/ST-with-update) of
363 # RA occurring **TWICE**. to avoid it getting added to the
364 # v3.0B suffix twice, we spot it as a duplicate, here
365 extras = OrderedDict()
366 for idx, (field, regname) in enumerate(opregfields):
367 imm, regname = decode_imm(regname)
368 rtype = get_regtype(regname)
369 log(" idx find", rtype, idx, field, regname, imm)
370 if rtype is None:
371 # probably an immediate field, append it straight
372 extras[('imm', idx, False)] = (idx, field, None, None, None)
373 continue
374 extra = svp64_src.get(regname, None)
375 if extra is not None:
376 extra = ('s', extra, False) # not a duplicate
377 extras[extra] = (idx, field, regname, rtype, imm)
378 log(" idx src", idx, extra, extras[extra])
379 dextra = svp64_dest.get(regname, None)
380 log("regname in", regname, dextra)
381 if dextra is not None:
382 is_a_duplicate = extra is not None # duplicate spotted
383 dextra = ('d', dextra, is_a_duplicate)
384 extras[dextra] = (idx, field, regname, rtype, imm)
385 log(" idx dst", idx, extra, extras[dextra])
386
387 # great! got the extra fields in their associated positions:
388 # also we know the register type. now to create the EXTRA encodings
389 etype = rm['Etype'] # Extra type: EXTRA3/EXTRA2
390 ptype = rm['Ptype'] # Predication type: Twin / Single
391 extra_bits = 0
392 v30b_newfields = []
393 for extra_idx, (idx, field, rname, rtype, iname) in extras.items():
394 # is it a field we don't alter/examine? if so just put it
395 # into newfields
396 if rtype is None:
397 v30b_newfields.append(field)
398 continue
399
400 # identify if this is a ld/st immediate(reg) thing
401 ldst_imm = "(" in field and field[-1] == ')'
402 if ldst_imm:
403 immed, field = field[:-1].split("(")
404
405 field, regmode = decode_reg(field, macros=macros)
406 log(" ", extra_idx, rname, rtype,
407 regmode, iname, field, end=" ")
408
409 # see Mode field https://libre-soc.org/openpower/sv/svp64/
410 # XXX TODO: the following is a bit of a laborious repeated
411 # mess, which could (and should) easily be parameterised.
412 # XXX also TODO: the LD/ST modes which are different
413 # https://libre-soc.org/openpower/sv/ldst/
414
415 # rright. SVP64 register numbering is from 0 to 127
416 # for GPRs, FPRs *and* CR Fields, where for v3.0 the GPRs and RPFs
417 # are 0-31 and CR Fields are only 0-7. the SVP64 RM "Extra"
418 # area is used to extend the numbering from the 32-bit
419 # instruction, and also to record whether the register
420 # is scalar or vector. on a per-operand basis. this
421 # results in a slightly finnicky encoding: here we go...
422
423 # encode SV-GPR and SV-FPR field into extra, v3.0field
424 if rtype in ['GPR', 'FPR']:
425 sv_extra, field = get_extra_gpr(etype, regmode, field)
426 # now sanity-check. EXTRA3 is ok, EXTRA2 has limits
427 # (and shrink to a single bit if ok)
428 if etype == 'EXTRA2':
429 if regmode == 'scalar':
430 # range is r0-r63 in increments of 1
431 assert (sv_extra >> 1) == 0, \
432 "scalar GPR %s cannot fit into EXTRA2 %s" % \
433 (rname, str(extras[extra_idx]))
434 # all good: encode as scalar
435 sv_extra = sv_extra & 0b01
436 else:
437 # range is r0-r127 in increments of 2 (r0 r2 ... r126)
438 assert sv_extra & 0b01 == 0, \
439 "%s: vector field %s cannot fit " \
440 "into EXTRA2 %s" % \
441 (insn, rname, str(extras[extra_idx]))
442 # all good: encode as vector (bit 2 set)
443 sv_extra = 0b10 | (sv_extra >> 1)
444 elif regmode == 'vector':
445 # EXTRA3 vector bit needs marking
446 sv_extra |= 0b100
447
448 # encode SV-CR 3-bit field into extra, v3.0field.
449 # 3-bit is for things like BF and BFA
450 elif rtype == 'CR_3bit':
451 sv_extra, field = crf_extra(etype, rname, extra_idx,
452 regmode, field, extras)
453
454 # encode SV-CR 5-bit field into extra, v3.0field
455 # 5-bit is for things like BA BB BC BT etc.
456 # *sigh* this is the same as 3-bit except the 2 LSBs of the
457 # 5-bit field are passed through unaltered.
458 elif rtype == 'CR_5bit':
459 cr_subfield = field & 0b11 # record bottom 2 bits for later
460 field = field >> 2 # strip bottom 2 bits
461 # use the exact same 3-bit function for the top 3 bits
462 sv_extra, field = crf_extra(etype, rname, extra_idx,
463 regmode, field, extras)
464 # reconstruct the actual 5-bit CR field (preserving the
465 # bottom 2 bits, unaltered)
466 field = (field << 2) | cr_subfield
467
468 else:
469 raise Exception("no type match: %s" % rtype)
470
471 # capture the extra field info
472 log("=>", "%5s" % bin(sv_extra), field)
473 extras[extra_idx] = sv_extra
474
475 # append altered field value to v3.0b, differs for LDST
476 # note that duplicates are skipped e.g. EXTRA2 contains
477 # *BOTH* s:RA *AND* d:RA which happens on LD/ST-with-update
478 srcdest, idx, duplicate = extra_idx
479 if duplicate: # skip adding to v3.0b fields, already added
480 continue
481 if ldst_imm:
482 v30b_newfields.append(("%s(%s)" % (immed, str(field))))
483 else:
484 v30b_newfields.append(str(field))
485
486 log("new v3.0B fields", v30b_op, v30b_newfields)
487 log("extras", extras)
488
489 # rright. now we have all the info. start creating SVP64 instruction.
490 svp64_insn = SVP64Instruction.pair(prefix=0, suffix=0)
491 svp64_prefix = svp64_insn.prefix
492 svp64_rm = svp64_insn.prefix.rm
493
494 # begin with EXTRA fields
495 for idx, sv_extra in extras.items():
496 log(idx)
497 if idx is None:
498 continue
499 if idx[0] == 'imm':
500 continue
501 srcdest, idx, duplicate = idx
502 if etype == 'EXTRA2':
503 svp64_rm.extra2[idx] = sv_extra
504 else:
505 svp64_rm.extra3[idx] = sv_extra
506
507 # identify if the op is a LD/ST.
508 # see https://libre-soc.org/openpower/sv/ldst/
509 is_ldst = rm['mode'] in ['LDST_IDX', 'LDST_IMM']
510 is_ldst_idx = rm['mode'] == 'LDST_IDX'
511 is_ldst_imm = rm['mode'] == 'LDST_IMM'
512 is_ld = v30b_op.startswith("l") and is_ldst
513 is_st = v30b_op.startswith("s") and is_ldst
514
515 # branch-conditional or CR detection
516 is_bc = rm['mode'] == 'BRANCH'
517 is_cr = rm['mode'] == 'CROP'
518
519 # parts of svp64_rm
520 mmode = 0 # bit 0
521 pmask = 0 # bits 1-3
522 destwid = 0 # bits 4-5
523 srcwid = 0 # bits 6-7
524 subvl = 0 # bits 8-9
525 smask = 0 # bits 16-18 but only for twin-predication
526 mode = 0 # bits 19-23
527
528 mask_m_specified = False
529 has_pmask = False
530 has_smask = False
531
532 saturation = None
533 src_zero = 0
534 dst_zero = 0
535 sv_mode = None
536
537 mapreduce = False
538 reverse_gear = False
539 mapreduce_crm = False
540
541 predresult = False
542 failfirst = False
543 ldst_elstride = 0
544 ldst_postinc = 0
545 sea = False
546
547 vli = False
548 sea = False
549
550 # ok let's start identifying opcode augmentation fields
551 for encmode in opmodes:
552 # predicate mask (src and dest)
553 if encmode.startswith("m="):
554 pme = encmode
555 pmmode, pmask = decode_predicate(encmode[2:])
556 smmode, smask = pmmode, pmask
557 mmode = pmmode
558 mask_m_specified = True
559 # predicate mask (dest)
560 elif encmode.startswith("dm="):
561 pme = encmode
562 pmmode, pmask = decode_predicate(encmode[3:])
563 mmode = pmmode
564 has_pmask = True
565 # predicate mask (src, twin-pred)
566 elif encmode.startswith("sm="):
567 sme = encmode
568 smmode, smask = decode_predicate(encmode[3:])
569 mmode = smmode
570 has_smask = True
571 # vec2/3/4
572 elif encmode.startswith("vec"):
573 subvl = decode_subvl(encmode[3:])
574 # elwidth (both src and dest, like mask)
575 elif encmode.startswith("w="):
576 destwid = decode_elwidth(encmode[2:])
577 srcwid = decode_elwidth(encmode[2:])
578 # just dest width
579 elif encmode.startswith("dw="):
580 destwid = decode_elwidth(encmode[3:])
581 # just src width
582 elif encmode.startswith("sw="):
583 srcwid = decode_elwidth(encmode[3:])
584 # post-increment
585 elif encmode == 'pi':
586 ldst_postinc = 1
587 # in indexed mode, set sv_mode=0b00
588 assert is_ldst_imm is True
589 sv_mode = 0b00
590 # element-strided LD/ST
591 elif encmode == 'els':
592 ldst_elstride = 1
593 # in indexed mode, set sv_mode=0b01
594 if is_ldst_idx:
595 sv_mode = 0b01
596 # saturation
597 elif encmode == 'sats':
598 assert sv_mode is None
599 saturation = 1
600 sv_mode = 0b10
601 elif encmode == 'satu':
602 assert sv_mode is None
603 sv_mode = 0b10
604 saturation = 0
605 # predicate zeroing
606 elif encmode == 'zz': # TODO, a lot more checking on legality
607 dst_zero = 1 # NOT on cr_ops, that's RM[6]
608 src_zero = 1
609 elif encmode == 'sz':
610 src_zero = 1
611 elif encmode == 'dz':
612 dst_zero = 1
613 # failfirst
614 elif encmode.startswith("ff="):
615 assert sv_mode is None
616 if is_cr: # sigh, CROPs is different
617 sv_mode = 0b10
618 else:
619 sv_mode = 0b01
620 failfirst = decode_ffirst(encmode[3:])
621 assert sea is False, "cannot use failfirst with signed-address"
622 # predicate-result, interestingly same as fail-first
623 elif encmode.startswith("pr="):
624 assert sv_mode is None
625 sv_mode = 0b11
626 predresult = decode_ffirst(encmode[3:])
627 # map-reduce mode, reverse-gear
628 elif encmode == 'mrr':
629 assert sv_mode is None
630 sv_mode = 0b00
631 mapreduce = True
632 reverse_gear = True
633 # map-reduce mode
634 elif encmode == 'mr':
635 assert sv_mode is None
636 sv_mode = 0b00
637 mapreduce = True
638 elif encmode == 'crm': # CR on map-reduce
639 assert sv_mode is None
640 sv_mode = 0b00
641 mapreduce_crm = True
642 elif encmode == 'vli':
643 assert failfirst is not False, "VLi only allowed in failfirst"
644 vli = True
645 elif encmode == 'sea':
646 assert is_ldst_idx
647 sea = True
648 assert failfirst is False, "cannot use ffirst+signed-address"
649 elif is_bc:
650 if encmode == 'all':
651 svp64_rm.branch.ALL = 1
652 elif encmode == 'snz':
653 svp64_rm.branch.sz = 1
654 svp64_rm.branch.SNZ = 1
655 elif encmode == 'sl':
656 svp64_rm.branch.SL = 1
657 elif encmode == 'slu':
658 svp64_rm.branch.SLu = 1
659 elif encmode == 'lru':
660 svp64_rm.branch.LRu = 1
661 elif encmode == 'vs':
662 svp64_rm.branch.VLS = 1
663 elif encmode == 'vsi':
664 svp64_rm.branch.VLS = 1
665 svp64_rm.branch.vls.VLi = 1
666 elif encmode == 'vsb':
667 svp64_rm.branch.VLS = 1
668 svp64_rm.branch.vls.VSb = 1
669 elif encmode == 'vsbi':
670 svp64_rm.branch.VLS = 1
671 svp64_rm.branch.vls.VSb = 1
672 svp64_rm.branch.vls.VLi = 1
673 elif encmode == 'ctr':
674 svp64_rm.branch.CTR = 1
675 elif encmode == 'cti':
676 svp64_rm.branch.CTR = 1
677 svp64_rm.branch.ctr.CTi = 1
678 else:
679 raise AssertionError("unknown encmode %s" % encmode)
680 else:
681 raise AssertionError("unknown encmode %s" % encmode)
682
683 # post-inc only available on ld-with-update
684 if ldst_postinc:
685 assert "u" in opcode, "/pi only available on ld/st-update"
686
687 # sanity check if dz/zz used in branch-mode
688 if is_bc and dst_zero:
689 raise AssertionError("dz/zz not supported in branch, use 'sz'")
690
691 # check sea *after* all qualifiers are evaluated
692 if sea:
693 assert sv_mode in (None, 0b00, 0b01)
694
695 if ptype == '2P':
696 # since m=xx takes precedence (overrides) sm=xx and dm=xx,
697 # treat them as mutually exclusive
698 if mask_m_specified:
699 assert not has_smask,\
700 "cannot have both source-mask and predicate mask"
701 assert not has_pmask,\
702 "cannot have both dest-mask and predicate mask"
703 # since the default is INT predication (ALWAYS), if you
704 # specify one CR mask, you must specify both, to avoid
705 # mixing INT and CR reg types
706 if has_pmask and pmmode == 1:
707 assert has_smask, \
708 "need explicit source-mask in CR twin predication"
709 if has_smask and smmode == 1:
710 assert has_pmask, \
711 "need explicit dest-mask in CR twin predication"
712 # sanity-check that 2Pred mask is same mode
713 if has_pmask and has_smask:
714 assert smmode == pmmode, \
715 "predicate masks %s and %s must be same reg type" % \
716 (pme, sme)
717
718 # sanity-check that twin-predication mask only specified in 2P mode
719 if ptype == '1P':
720 assert not has_smask, \
721 "source-mask can only be specified on Twin-predicate ops"
722 assert not has_pmask, \
723 "dest-mask can only be specified on Twin-predicate ops"
724
725 # construct the mode field, doing sanity-checking along the way
726 if src_zero:
727 assert has_smask or mask_m_specified, \
728 "src zeroing requires a source predicate"
729 if dst_zero:
730 assert has_pmask or mask_m_specified, \
731 "dest zeroing requires a dest predicate"
732
733 # okaaay, so there are 4 different modes, here, which will be
734 # partly-merged-in: is_ldst is merged in with "normal", but
735 # is_bc is so different it's done separately. likewise is_cr
736 # (when it is done). here are the maps:
737
738 # for "normal" arithmetic: https://libre-soc.org/openpower/sv/normal/
739 """
740 | 0-1 | 2 | 3 4 | description |
741 | --- | --- |---------|-------------------------- |
742 | 00 | 0 | dz sz | simple mode |
743 | 00 | 1 | 0 RG | scalar reduce mode (mapreduce) |
744 | 01 | inv | CR-bit | Rc=1: ffirst CR sel |
745 | 01 | inv | VLi RC1 | Rc=0: ffirst z/nonz |
746 | 10 | N | dz sz | sat mode: N=0/1 u/s |
747 | 11 | inv | CR-bit | Rc=1: pred-result CR sel |
748 | 11 | inv | zz RC1 | Rc=0: pred-result z/nonz |
749 """
750
751 # https://libre-soc.org/openpower/sv/ldst/
752 # for LD/ST-immediate:
753 """
754 | 0-1 | 2 | 3 4 | description |
755 | --- | --- |---------|--------------------------- |
756 | 00 | 0 | zz els | normal mode |
757 | 00 | 1 | pi lf | post-inc, LD-fault-first |
758 | 01 | inv | CR-bit | Rc=1: ffirst CR sel |
759 | 01 | inv | els RC1 | Rc=0: ffirst z/nonz |
760 | 10 | N | zz els | sat mode: N=0/1 u/s |
761 | 11 | inv | CR-bit | Rc=1: pred-result CR sel |
762 | 11 | inv | els RC1 | Rc=0: pred-result z/nonz |
763 """
764
765 # for LD/ST-indexed (RA+RB):
766 """
767 | 0-1 | 2 | 3 4 | description |
768 | --- | --- |---------|----------------------------- |
769 | 00 | SEA | dz sz | normal mode |
770 | 01 | SEA | dz sz | strided (scalar only source) |
771 | 10 | N | dz sz | sat mode: N=0/1 u/s |
772 | 11 | inv | CR-bit | Rc=1: pred-result CR sel |
773 | 11 | inv | dz RC1 | Rc=0: pred-result z/nonz |
774 """
775
776 # and leaving out branches and cr_ops for now because they're
777 # under development
778 """ TODO branches and cr_ops
779 """
780
781 if is_bc:
782 sv_mode = int(svp64_rm.mode[0, 1])
783 if src_zero:
784 svp64_rm.branch.sz = 1
785
786 else:
787 ######################################
788 # "element-strided" mode, ldst_idx
789 if sv_mode == 0b01 and is_ldst_idx:
790 mode |= src_zero << SVP64MODE.SZ # predicate zeroing
791 mode |= dst_zero << SVP64MODE.DZ # predicate zeroing
792 mode |= sea << SVP64MODE.SEA # el-strided
793
794 ######################################
795 # "normal" mode
796 elif sv_mode is None:
797 mode |= src_zero << SVP64MODE.SZ # predicate zeroing
798 mode |= dst_zero << SVP64MODE.DZ # predicate zeroing
799 if is_ldst:
800 # TODO: for now, LD/ST-indexed is ignored.
801 mode |= ldst_elstride << SVP64MODE.ELS_NORMAL # el-strided
802 else:
803 # TODO, reduce and subvector mode
804 # 00 1 dz CRM reduce mode (mapreduce), SUBVL=1
805 # 00 1 SVM CRM subvector reduce mode, SUBVL>1
806 pass
807 sv_mode = 0b00
808
809 ######################################
810 # ldst-immediate "post" (and "load-fault-first" modes)
811 elif sv_mode == 0b00 and ldst_postinc == 1: # (or ldst_ld_ffirst)
812 mode |= (0b1 << SVP64MODE.LDI_POST) # sets bit 2
813 mode |= (ldst_postinc << SVP64MODE.LDI_PI) # sets post-inc
814
815 ######################################
816 # "mapreduce" modes
817 elif sv_mode == 0b00:
818 mode |= (0b1 << SVP64MODE.REDUCE) # sets mapreduce
819 assert dst_zero == 0, "dest-zero not allowed in mapreduce mode"
820 if reverse_gear:
821 mode |= (0b1 << SVP64MODE.RG) # sets Reverse-gear mode
822 if mapreduce_crm:
823 mode |= (0b1 << SVP64MODE.CRM) # sets CRM mode
824 assert rc_mode, "CRM only allowed when Rc=1"
825 # bit of weird encoding to jam zero-pred or SVM mode in.
826 # SVM mode can be enabled only when SUBVL=2/3/4 (vec2/3/4)
827 if subvl == 0:
828 mode |= dst_zero << SVP64MODE.DZ # predicate zeroing
829
830 ######################################
831 # "failfirst" modes
832 elif failfirst is not False and not is_cr: # sv_mode == 0b01:
833 assert src_zero == 0, "dest-zero not allowed in failfirst mode"
834 if failfirst == 'RC1':
835 mode |= (0b1 << SVP64MODE.RC1) # sets RC1 mode
836 mode |= (dst_zero << SVP64MODE.DZ) # predicate dst-zeroing
837 assert rc_mode == False, "ffirst RC1 only ok when Rc=0"
838 elif failfirst == '~RC1':
839 mode |= (0b1 << SVP64MODE.RC1) # sets RC1 mode
840 mode |= (dst_zero << SVP64MODE.DZ) # predicate dst-zeroing
841 mode |= (0b1 << SVP64MODE.INV) # ... with inversion
842 assert rc_mode == False, "ffirst RC1 only ok when Rc=0"
843 else:
844 assert dst_zero == 0, "dst-zero not allowed in ffirst BO"
845 assert rc_mode, "ffirst BO only possible when Rc=1"
846 mode |= (failfirst << SVP64MODE.BO_LSB) # set BO
847
848 # (crops is really different)
849 elif failfirst is not False and is_cr:
850 if failfirst in ['RC1', '~RC1']:
851 mode |= (src_zero << SVP64MODE.SZ) # predicate src-zeroing
852 mode |= (dst_zero << SVP64MODE.DZ) # predicate dst-zeroing
853 if failfirst == '~RC1':
854 mode |= (0b1 << SVP64MODE.INV) # ... with inversion
855 else:
856 assert dst_zero == src_zero, "dz must equal sz in ffirst BO"
857 mode |= (failfirst << SVP64MODE.BO_LSB) # set BO
858 svp64_rm.cr_op.zz = dst_zero
859 if vli:
860 sv_mode |= 1 # set VLI in LSB of 2-bit mode
861 #svp64_rm.cr_op.vli = 1
862
863 ######################################
864 # "saturation" modes
865 elif sv_mode == 0b10:
866 mode |= src_zero << SVP64MODE.SZ # predicate zeroing
867 mode |= dst_zero << SVP64MODE.DZ # predicate zeroing
868 mode |= (saturation << SVP64MODE.N) # signed/us saturation
869
870 ######################################
871 # "predicate-result" modes. err... code-duplication from ffirst
872 elif sv_mode == 0b11:
873 assert src_zero == 0, "dest-zero not allowed in predresult mode"
874 if predresult == 'RC1':
875 mode |= (0b1 << SVP64MODE.RC1) # sets RC1 mode
876 mode |= (dst_zero << SVP64MODE.DZ) # predicate dst-zeroing
877 assert rc_mode == False, "pr-mode RC1 only ok when Rc=0"
878 elif predresult == '~RC1':
879 mode |= (0b1 << SVP64MODE.RC1) # sets RC1 mode
880 mode |= (dst_zero << SVP64MODE.DZ) # predicate dst-zeroing
881 mode |= (0b1 << SVP64MODE.INV) # ... with inversion
882 assert rc_mode == False, "pr-mode RC1 only ok when Rc=0"
883 else:
884 assert dst_zero == 0, "dst-zero not allowed in pr-mode BO"
885 assert rc_mode, "pr-mode BO only possible when Rc=1"
886 mode |= (predresult << SVP64MODE.BO_LSB) # set BO
887
888 # whewww.... modes all done :)
889 # now put into svp64_rm, but respect MSB0 order
890 if sv_mode & 1:
891 mode |= (0b1 << SVP64MODE.MOD2_LSB)
892 if sv_mode & 2:
893 mode |= (0b1 << SVP64MODE.MOD2_MSB)
894
895 if sea:
896 mode |= (0b1 << SVP64MODE.SEA)
897
898 # this is a mess. really look forward to replacing it with Insn DB
899 if not is_bc:
900 svp64_rm.mode = mode # mode: bits 19-23
901 if vli and not is_cr:
902 svp64_rm.normal.ffrc0.VLi = 1
903
904 # put in predicate masks into svp64_rm
905 if ptype == '2P':
906 svp64_rm.smask = smask # source pred: bits 16-18
907
908 # put in elwidths unless cr
909 if not is_cr:
910 svp64_rm.ewsrc = srcwid # srcwid: bits 6-7
911 svp64_rm.elwidth = destwid # destwid: bits 4-5
912
913 svp64_rm.mmode = mmode # mask mode: bit 0
914 svp64_rm.mask = pmask # 1-pred: bits 1-3
915 svp64_rm.subvl = subvl # and subvl: bits 8-9
916
917 # nice debug printout. (and now for something completely different)
918 # https://youtu.be/u0WOIwlXE9g?t=146
919 svp64_rm_value = int(svp64_rm)
920 log("svp64_rm", hex(svp64_rm_value), bin(svp64_rm_value))
921 log(" mmode 0 :", bin(mmode))
922 log(" pmask 1-3 :", bin(pmask))
923 log(" dstwid 4-5 :", bin(destwid))
924 log(" srcwid 6-7 :", bin(srcwid))
925 log(" subvl 8-9 :", bin(subvl))
926 log(" mode 19-23:", bin(svp64_rm.mode))
927 offs = 2 if etype == 'EXTRA2' else 3 # 2 or 3 bits
928 for idx, sv_extra in extras.items():
929 if idx is None:
930 continue
931 if idx[0] == 'imm':
932 continue
933 srcdest, idx, duplicate = idx
934 start = (10+idx*offs)
935 end = start + offs-1
936 log(" extra%d %2d-%2d:" % (idx, start, end),
937 bin(sv_extra))
938 if ptype == '2P':
939 log(" smask 16-17:", bin(smask))
940 log()
941
942 # update prefix PO and ID (aka PID)
943 svp64_prefix.PO = 0x1
944 svp64_prefix.id = 0b11
945
946 # fiinally yield the svp64 prefix and the thingy. v3.0b opcode
947 rc = '.' if rc_mode else ''
948 yield ".long 0x%08x" % int(svp64_prefix)
949 log(v30b_op, v30b_newfields)
950
951 v30b_op_rc = v30b_op
952 if not v30b_op.endswith('.'):
953 v30b_op_rc += rc
954
955 record = None
956 if os.environ.get("INSNDB"):
957 record = DB[opcode]
958 if record is not None:
959 insn = WordInstruction.assemble(db=DB,
960 opcode=opcode, arguments=fields)
961 yield " ".join((
962 f".long 0x{int(insn):08X}",
963 "#",
964 opcode,
965 ",".join(fields),
966 ))
967 else:
968 if not v30b_op.endswith('.'):
969 v30b_op += rc
970 yield "%s %s" % (v30b_op, ", ".join(v30b_newfields))
971 for (name, span) in svp64_insn.traverse("SVP64"):
972 value = svp64_insn.storage[span]
973 log(name, f"{value.value:0{value.bits}b}", span)
974 log("new v3.0B fields", v30b_op, v30b_newfields)
975
976 def translate(self, lst):
977 for insn in lst:
978 yield from self.translate_one(insn)
979
980
981 def macro_subst(macros, txt):
982 again = True
983 log("subst", txt, macros)
984 while again:
985 again = False
986 for macro, value in macros.items():
987 if macro == txt:
988 again = True
989 replaced = txt.replace(macro, value)
990 log("macro", txt, "replaced", replaced, macro, value)
991 txt = replaced
992 continue
993 toreplace = '%s.s' % macro
994 if toreplace == txt:
995 again = True
996 replaced = txt.replace(toreplace, "%s.s" % value)
997 log("macro", txt, "replaced", replaced, toreplace, value)
998 txt = replaced
999 continue
1000 toreplace = '%s.v' % macro
1001 if toreplace == txt:
1002 again = True
1003 replaced = txt.replace(toreplace, "%s.v" % value)
1004 log("macro", txt, "replaced", replaced, toreplace, value)
1005 txt = replaced
1006 continue
1007 toreplace = '*%s' % macro
1008 if toreplace in txt:
1009 again = True
1010 replaced = txt.replace(toreplace, '*%s' % value)
1011 log("macro", txt, "replaced", replaced, toreplace, value)
1012 txt = replaced
1013 continue
1014 toreplace = '(%s)' % macro
1015 if toreplace in txt:
1016 again = True
1017 replaced = txt.replace(toreplace, '(%s)' % value)
1018 log("macro", txt, "replaced", replaced, toreplace, value)
1019 txt = replaced
1020 continue
1021 log(" processed", txt)
1022 return txt
1023
1024
1025 def get_ws(line):
1026 # find whitespace
1027 ws = ''
1028 while line:
1029 if not line[0].isspace():
1030 break
1031 ws += line[0]
1032 line = line[1:]
1033 return ws, line
1034
1035
1036 def asm_process():
1037 # get an input file and an output file
1038 args = sys.argv[1:]
1039 if len(args) == 0:
1040 infile = sys.stdin
1041 outfile = sys.stdout
1042 # read the whole lot in advance in case of in-place
1043 lines = list(infile.readlines())
1044 elif len(args) != 2:
1045 print("pysvp64asm [infile | -] [outfile | -]", file=sys.stderr)
1046 exit(0)
1047 else:
1048 if args[0] == '--':
1049 infile = sys.stdin
1050 else:
1051 infile = open(args[0], "r")
1052 # read the whole lot in advance in case of in-place overwrite
1053 lines = list(infile.readlines())
1054
1055 if args[1] == '--':
1056 outfile = sys.stdout
1057 else:
1058 outfile = open(args[1], "w")
1059
1060 # read the line, look for custom insn, process it
1061 macros = {} # macros which start ".set"
1062 isa = SVP64Asm([])
1063 for line in lines:
1064 op = line.split("#")[0].strip()
1065 # identify macros
1066 if op.startswith(".set"):
1067 macro = op[4:].split(",")
1068 (macro, value) = map(str.strip, macro)
1069 macros[macro] = value
1070 record = DB[op]
1071 if not op.startswith('sv.') and record is None:
1072 outfile.write(line)
1073 continue
1074
1075 (ws, line) = get_ws(line)
1076 lst = isa.translate_one(op, macros)
1077 lst = '; '.join(lst)
1078 outfile.write("%s%s # %s\n" % (ws, lst, op))
1079
1080
1081 if __name__ == '__main__':
1082 lst = ['slw 3, 1, 4',
1083 'extsw 5, 3',
1084 'sv.extsw 5, 3',
1085 'sv.cmpi 5, 1, 3, 2',
1086 'sv.setb 5, 31',
1087 'sv.isel 64.v, 3, 2, 65.v',
1088 'sv.setb/dm=r3/sm=1<<r3 5, 31',
1089 'sv.setb/m=r3 5, 31',
1090 'sv.setb/vec2 5, 31',
1091 'sv.setb/sw=8/ew=16 5, 31',
1092 'sv.extsw./ff=eq 5, 31',
1093 'sv.extsw./satu/sz/dz/sm=r3/dm=r3 5, 31',
1094 'sv.extsw./pr=eq 5.v, 31',
1095 'sv.add. 5.v, 2.v, 1.v',
1096 'sv.add./m=r3 5.v, 2.v, 1.v',
1097 ]
1098 lst += [
1099 'sv.stw 5.v, 4(1.v)',
1100 'sv.ld 5.v, 4(1.v)',
1101 'setvl. 2, 3, 4, 0, 1, 1',
1102 'sv.setvl. 2, 3, 4, 0, 1, 1',
1103 ]
1104 lst = [
1105 "sv.stfsu 0.v, 16(4.v)",
1106 ]
1107 lst = [
1108 "sv.stfsu/els 0.v, 16(4)",
1109 ]
1110 lst = [
1111 'sv.add./mr 5.v, 2.v, 1.v',
1112 ]
1113 macros = {'win2': '50', 'win': '60'}
1114 lst = [
1115 'sv.addi win2.v, win.v, -1',
1116 'sv.add./mrr 5.v, 2.v, 1.v',
1117 #'sv.lhzsh 5.v, 11(9.v), 15',
1118 #'sv.lwzsh 5.v, 11(9.v), 15',
1119 'sv.ffmadds 6.v, 2.v, 4.v, 6.v',
1120 ]
1121 lst = [
1122 #'sv.fmadds 0.v, 8.v, 16.v, 4.v',
1123 #'sv.ffadds 0.v, 8.v, 4.v',
1124 'svremap 11, 0, 1, 2, 3, 2, 1',
1125 'svshape 8, 1, 1, 1, 0',
1126 'svshape 8, 1, 1, 1, 1',
1127 ]
1128 lst = [
1129 #'sv.lfssh 4.v, 11(8.v), 15',
1130 #'sv.lwzsh 4.v, 11(8.v), 15',
1131 #'sv.svstep. 2.v, 4, 0',
1132 #'sv.fcfids. 48.v, 64.v',
1133 'sv.fcoss. 80.v, 0.v',
1134 'sv.fcoss. 20.v, 0.v',
1135 ]
1136 lst = [
1137 'sv.bc/all 3,12,192',
1138 'sv.bclr/vsbi 3,81.v,192',
1139 'sv.ld 5.v, 4(1.v)',
1140 'sv.svstep. 2.v, 4, 0',
1141 ]
1142 lst = [
1143 'maxs 3,12,5',
1144 'maxs. 3,12,5',
1145 'avgadd 3,12,5',
1146 'absdu 3,12,5',
1147 'absds 3,12,5',
1148 'absdacu 3,12,5',
1149 'absdacs 3,12,5',
1150 'cprop 3,12,5',
1151 'svindex 0,0,1,0,0,0,0',
1152 ]
1153 lst = [
1154 'sv.svstep./m=r3 2.v, 4, 0',
1155 'ternlogi 0,0,0,0x5',
1156 'fmvis 5,65535',
1157 'fmvis 5,1',
1158 'fmvis 5,2',
1159 'fmvis 5,4',
1160 'fmvis 5,8',
1161 'fmvis 5,16',
1162 'fmvis 5,32',
1163 'fmvis 5,64',
1164 'fmvis 5,32768',
1165 ]
1166 lst = [
1167 'sv.andi. *80, *80, 1',
1168 'sv.ffmadds. 6.v, 2.v, 4.v, 6.v', # incorrectly inserted 32-bit op
1169 'sv.ffmadds 6.v, 2.v, 4.v, 6.v', # correctly converted to .long
1170 'svshape2 8, 1, 31, 7, 1, 1',
1171 'sv.ld 5.v, 4(1.v)',
1172 'sv.stw 5.v, 4(1.v)',
1173 'sv.bc/all 3,12,192',
1174 'pcdec. 0,0,0,0',
1175 ]
1176 lst = [
1177 #"sv.cmp/ff=gt *0,*1,*2,0",
1178 "dsld 5,4,5,3",
1179
1180 ]
1181 isa = SVP64Asm(lst, macros=macros)
1182 log("list:\n", "\n\t".join(list(isa)))
1183 # running svp64.py is designed to test hard-coded lists
1184 # (above) - which strictly speaking should all be unit tests.
1185 # if you need to actually do assembler translation at the
1186 # commandline use "pysvp64asm" - see setup.py
1187 # XXX NO. asm_process()