2cf113e56ad2463ad91bd472a26e28094d8a41d6
[openpower-isa.git] / src / openpower / decoder / isa / caller.py
1 # SPDX-License-Identifier: LGPLv3+
2 # Copyright (C) 2020, 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
3 # Copyright (C) 2020 Michael Nolan
4 # Funded by NLnet http://nlnet.nl
5 """core of the python-based POWER9 simulator
6
7 this is part of a cycle-accurate POWER9 simulator. its primary purpose is
8 not speed, it is for both learning and educational purposes, as well as
9 a method of verifying the HDL.
10
11 related bugs:
12
13 * https://bugs.libre-soc.org/show_bug.cgi?id=424
14 """
15
16 from collections import namedtuple
17 from copy import deepcopy
18 from functools import wraps
19 import os
20 import errno
21 import struct
22 from openpower.syscalls import ppc_flags
23 import sys
24 from elftools.elf.elffile import ELFFile # for isinstance
25
26 from nmigen.sim import Settle
27 import openpower.syscalls
28 from openpower.consts import (MSRb, PIb, # big-endian (PowerISA versions)
29 SVP64CROffs, SVP64MODEb)
30 from openpower.decoder.helpers import (ISACallerHelper, ISAFPHelpers, exts,
31 gtu, undefined, copy_assign_rhs)
32 from openpower.decoder.isa.mem import Mem, MemMMap, MemException, LoadedELF
33 from openpower.decoder.isa.radixmmu import RADIX
34 from openpower.decoder.isa.svshape import SVSHAPE
35 from openpower.decoder.isa.svstate import SVP64State
36 from openpower.decoder.orderedset import OrderedSet
37 from openpower.decoder.power_enums import (FPTRANS_INSNS, CRInSel, CROutSel,
38 In1Sel, In2Sel, In3Sel, LDSTMode,
39 MicrOp, OutSel, SVMode,
40 SVP64LDSTmode, SVP64PredCR,
41 SVP64PredInt, SVP64PredMode,
42 SVP64RMMode, SVPType, XER_bits,
43 insns, spr_byname, spr_dict,
44 BFP_FLAG_NAMES)
45 from openpower.insndb.core import SVP64Instruction
46 from openpower.decoder.power_svp64 import SVP64RM, decode_extra
47 from openpower.decoder.selectable_int import (FieldSelectableInt,
48 SelectableInt, selectconcat,
49 EFFECTIVELY_UNLIMITED)
50 from openpower.consts import DEFAULT_MSR
51 from openpower.fpscr import FPSCRState
52 from openpower.xer import XERState
53 from openpower.util import LogType, log
54
55 LDST_UPDATE_INSNS = ['ldu', 'lwzu', 'lbzu', 'lhzu', 'lhau', 'lfsu', 'lfdu',
56 'stwu', 'stbu', 'sthu', 'stfsu', 'stfdu', 'stdu',
57 ]
58
59
60 instruction_info = namedtuple('instruction_info',
61 'func read_regs uninit_regs write_regs ' +
62 'special_regs op_fields form asmregs')
63
64 special_sprs = {
65 'LR': 8,
66 'CTR': 9,
67 'TAR': 815,
68 'XER': 1,
69 'VRSAVE': 256}
70
71
72 # rrright. this is here basically because the compiler pywriter returns
73 # results in a specific priority order. to make sure regs match up they
74 # need partial sorting. sigh.
75 REG_SORT_ORDER = {
76 # TODO (lkcl): adjust other registers that should be in a particular order
77 # probably CA, CA32, and CR
78 "FRT": 0,
79 "FRA": 0,
80 "FRB": 0,
81 "FRC": 0,
82 "FRS": 0,
83 "RT": 0,
84 "RA": 0,
85 "RB": 0,
86 "RC": 0,
87 "RS": 0,
88 "BI": 0,
89 "CR": 0,
90 "LR": 0,
91 "CTR": 0,
92 "TAR": 0,
93 "MSR": 0,
94 "SVSTATE": 0,
95 "SVSHAPE0": 0,
96 "SVSHAPE1": 0,
97 "SVSHAPE2": 0,
98 "SVSHAPE3": 0,
99
100 "CA": 0,
101 "CA32": 0,
102
103 "FPSCR": 1,
104
105 "overflow": 7, # should definitely be last
106 "CR0": 8, # likewise
107 }
108
109 fregs = ['FRA', 'FRB', 'FRC', 'FRS', 'FRT']
110
111
112 def get_masked_reg(regs, base, offs, ew_bits):
113 # rrrright. start by breaking down into row/col, based on elwidth
114 gpr_offs = offs // (64 // ew_bits)
115 gpr_col = offs % (64 // ew_bits)
116 # compute the mask based on ew_bits
117 mask = (1 << ew_bits) - 1
118 # now select the 64-bit register, but get its value (easier)
119 val = regs[base + gpr_offs]
120 # shift down so element we want is at LSB
121 val >>= gpr_col * ew_bits
122 # mask so we only return the LSB element
123 return val & mask
124
125
126 def set_masked_reg(regs, base, offs, ew_bits, value):
127 # rrrright. start by breaking down into row/col, based on elwidth
128 gpr_offs = offs // (64//ew_bits)
129 gpr_col = offs % (64//ew_bits)
130 # compute the mask based on ew_bits
131 mask = (1 << ew_bits)-1
132 # now select the 64-bit register, but get its value (easier)
133 val = regs[base+gpr_offs]
134 # now mask out the bit we don't want
135 val = val & ~(mask << (gpr_col*ew_bits))
136 # then wipe the bit we don't want from the value
137 value = value & mask
138 # OR the new value in, shifted up
139 val |= value << (gpr_col*ew_bits)
140 regs[base+gpr_offs] = val
141
142
143 def create_args(reglist, extra=None):
144 retval = list(OrderedSet(reglist))
145 retval.sort(key=lambda reg: REG_SORT_ORDER.get(reg, 0))
146 if extra is not None:
147 return [extra] + retval
148 return retval
149
150
151 def create_full_args(*, read_regs, special_regs, uninit_regs, write_regs,
152 extra=None):
153 return create_args([
154 *read_regs, *uninit_regs, *write_regs, *special_regs], extra=extra)
155
156
157 def is_ffirst_mode(dec2):
158 rm_mode = yield dec2.rm_dec.mode
159 return rm_mode == SVP64RMMode.FFIRST.value
160
161
162 class GPR(dict):
163 def __init__(self, decoder, isacaller, svstate, regfile):
164 dict.__init__(self)
165 self.sd = decoder
166 self.isacaller = isacaller
167 self.svstate = svstate
168 for i in range(len(regfile)):
169 self[i] = SelectableInt(regfile[i], 64)
170
171 def __call__(self, ridx, is_vec=False, offs=0, elwidth=64):
172 if isinstance(ridx, SelectableInt):
173 ridx = ridx.value
174 if elwidth == 64:
175 return self[ridx+offs]
176 # rrrright. start by breaking down into row/col, based on elwidth
177 gpr_offs = offs // (64//elwidth)
178 gpr_col = offs % (64//elwidth)
179 # now select the 64-bit register, but get its value (easier)
180 val = self[ridx+gpr_offs].value
181 # now shift down and mask out
182 val = val >> (gpr_col*elwidth) & ((1 << elwidth)-1)
183 # finally, return a SelectableInt at the required elwidth
184 log("GPR call", ridx, "isvec", is_vec, "offs", offs,
185 "elwid", elwidth, "offs/col", gpr_offs, gpr_col, "val", hex(val))
186 return SelectableInt(val, elwidth)
187
188 def set_form(self, form):
189 self.form = form
190
191 def write(self, rnum, value, is_vec=False, elwidth=64):
192 # get internal value
193 if isinstance(rnum, SelectableInt):
194 rnum = rnum.value
195 if isinstance(value, SelectableInt):
196 value = value.value
197 # compatibility...
198 if isinstance(rnum, tuple):
199 rnum, base, offs = rnum
200 else:
201 base, offs = rnum, 0
202 # rrrright. start by breaking down into row/col, based on elwidth
203 gpr_offs = offs // (64//elwidth)
204 gpr_col = offs % (64//elwidth)
205 # compute the mask based on elwidth
206 mask = (1 << elwidth)-1
207 # now select the 64-bit register, but get its value (easier)
208 val = self[base+gpr_offs].value
209 # now mask out the bit we don't want
210 val = val & ~(mask << (gpr_col*elwidth))
211 # then wipe the bit we don't want from the value
212 value = value & mask
213 # OR the new value in, shifted up
214 val |= value << (gpr_col*elwidth)
215 # finally put the damn value into the regfile
216 log("GPR write", base, "isvec", is_vec, "offs", offs,
217 "elwid", elwidth, "offs/col", gpr_offs, gpr_col, "val", hex(val),
218 "@", base+gpr_offs)
219 dict.__setitem__(self, base+gpr_offs, SelectableInt(val, 64))
220
221 def __setitem__(self, rnum, value):
222 # rnum = rnum.value # only SelectableInt allowed
223 log("GPR setitem", rnum, value)
224 if isinstance(rnum, SelectableInt):
225 rnum = rnum.value
226 dict.__setitem__(self, rnum, value)
227
228 def getz(self, rnum, rvalue=None):
229 # rnum = rnum.value # only SelectableInt allowed
230 log("GPR getzero?", rnum, rvalue)
231 if rvalue is not None:
232 if rnum == 0:
233 return SelectableInt(0, rvalue.bits)
234 return rvalue
235 if rnum == 0:
236 return SelectableInt(0, 64)
237 return self[rnum]
238
239 def _get_regnum(self, attr):
240 getform = self.sd.sigforms[self.form]
241 rnum = getattr(getform, attr)
242 return rnum
243
244 def ___getitem__(self, attr):
245 """ XXX currently not used
246 """
247 rnum = self._get_regnum(attr)
248 log("GPR getitem", attr, rnum)
249 return self.regfile[rnum]
250
251 def dump(self, printout=True):
252 res = []
253 for i in range(len(self)):
254 res.append(self[i].value)
255 if printout:
256 for i in range(0, len(res), 8):
257 s = []
258 for j in range(8):
259 s.append("%08x" % res[i+j])
260 s = ' '.join(s)
261 log("reg", "%2d" % i, s, kind=LogType.InstrInOuts)
262 return res
263
264
265 class SPR(dict):
266 def __init__(self, dec2, initial_sprs={}, gpr=None):
267 self.sd = dec2
268 self.gpr = gpr # for SVSHAPE[0-3]
269 dict.__init__(self)
270 for key, v in initial_sprs.items():
271 if isinstance(key, SelectableInt):
272 key = key.value
273 key = special_sprs.get(key, key)
274 if isinstance(key, int):
275 info = spr_dict[key]
276 else:
277 info = spr_byname[key]
278 if not isinstance(v, SelectableInt):
279 v = SelectableInt(v, info.length)
280 self[key] = v
281
282 def __getitem__(self, key):
283 #log("get spr", key)
284 #log("dict", self.items())
285 # if key in special_sprs get the special spr, otherwise return key
286 if isinstance(key, SelectableInt):
287 key = key.value
288 if isinstance(key, int):
289 key = spr_dict[key].SPR
290 key = special_sprs.get(key, key)
291 if key == 'HSRR0': # HACK!
292 key = 'SRR0'
293 if key == 'HSRR1': # HACK!
294 key = 'SRR1'
295 if key in self:
296 res = dict.__getitem__(self, key)
297 else:
298 if isinstance(key, int):
299 info = spr_dict[key]
300 else:
301 info = spr_byname[key]
302 self[key] = SelectableInt(0, info.length)
303 res = dict.__getitem__(self, key)
304 #log("spr returning", key, res)
305 return res
306
307 def __setitem__(self, key, value):
308 if isinstance(key, SelectableInt):
309 key = key.value
310 if isinstance(key, int):
311 key = spr_dict[key].SPR
312 log("spr key", key)
313 key = special_sprs.get(key, key)
314 if key == 'HSRR0': # HACK!
315 self.__setitem__('SRR0', value)
316 if key == 'HSRR1': # HACK!
317 self.__setitem__('SRR1', value)
318 if key == 1:
319 value = XERState(value)
320 if key in ('SVSHAPE0', 'SVSHAPE1', 'SVSHAPE2', 'SVSHAPE3'):
321 value = SVSHAPE(value, self.gpr)
322 log("setting spr", key, value)
323 dict.__setitem__(self, key, value)
324
325 def __call__(self, ridx):
326 return self[ridx]
327
328 def dump(self, printout=True):
329 res = []
330 keys = list(self.keys())
331 # keys.sort()
332 for k in keys:
333 sprname = spr_dict.get(k, None)
334 if sprname is None:
335 sprname = k
336 else:
337 sprname = sprname.SPR
338 res.append((sprname, self[k].value))
339 if printout:
340 for sprname, value in res:
341 print(" ", sprname, hex(value))
342 return res
343
344
345 class PC:
346 def __init__(self, pc_init=0):
347 self.CIA = SelectableInt(pc_init, 64)
348 self.NIA = self.CIA + SelectableInt(4, 64) # only true for v3.0B!
349
350 def update_nia(self, is_svp64):
351 increment = 8 if is_svp64 else 4
352 self.NIA = self.CIA + SelectableInt(increment, 64)
353
354 def update(self, namespace, is_svp64):
355 """updates the program counter (PC) by 4 if v3.0B mode or 8 if SVP64
356 """
357 self.CIA = namespace['NIA'].narrow(64)
358 self.update_nia(is_svp64)
359 namespace['CIA'] = self.CIA
360 namespace['NIA'] = self.NIA
361
362
363 # CR register fields
364 # See PowerISA Version 3.0 B Book 1
365 # Section 2.3.1 Condition Register pages 30 - 31
366 class CRFields:
367 LT = FL = 0 # negative, less than, floating-point less than
368 GT = FG = 1 # positive, greater than, floating-point greater than
369 EQ = FE = 2 # equal, floating-point equal
370 SO = FU = 3 # summary overflow, floating-point unordered
371
372 def __init__(self, init=0):
373 # rev_cr = int('{:016b}'.format(initial_cr)[::-1], 2)
374 # self.cr = FieldSelectableInt(self._cr, list(range(32, 64)))
375 self.cr = SelectableInt(init, 64) # underlying reg
376 # field-selectable versions of Condition Register TODO check bitranges?
377 self.crl = []
378 for i in range(8):
379 bits = tuple(range(i*4+32, (i+1)*4+32))
380 _cr = FieldSelectableInt(self.cr, bits)
381 self.crl.append(_cr)
382
383
384 # decode SVP64 predicate integer to reg number and invert
385 def get_predint(gpr, mask):
386 r3 = gpr(3)
387 r10 = gpr(10)
388 r30 = gpr(30)
389 log("get_predint", mask, SVP64PredInt.ALWAYS.value)
390 if mask == SVP64PredInt.ALWAYS.value:
391 return 0xffff_ffff_ffff_ffff # 64 bits of 1
392 if mask == SVP64PredInt.R3_UNARY.value:
393 return 1 << (r3.value & 0b111111)
394 if mask == SVP64PredInt.R3.value:
395 return r3.value
396 if mask == SVP64PredInt.R3_N.value:
397 return ~r3.value
398 if mask == SVP64PredInt.R10.value:
399 return r10.value
400 if mask == SVP64PredInt.R10_N.value:
401 return ~r10.value
402 if mask == SVP64PredInt.R30.value:
403 return r30.value
404 if mask == SVP64PredInt.R30_N.value:
405 return ~r30.value
406
407
408 # decode SVP64 predicate CR to reg number and invert status
409 def _get_predcr(mask):
410 if mask == SVP64PredCR.LT.value:
411 return 0, 1
412 if mask == SVP64PredCR.GE.value:
413 return 0, 0
414 if mask == SVP64PredCR.GT.value:
415 return 1, 1
416 if mask == SVP64PredCR.LE.value:
417 return 1, 0
418 if mask == SVP64PredCR.EQ.value:
419 return 2, 1
420 if mask == SVP64PredCR.NE.value:
421 return 2, 0
422 if mask == SVP64PredCR.SO.value:
423 return 3, 1
424 if mask == SVP64PredCR.NS.value:
425 return 3, 0
426
427
428 # read individual CR fields (0..VL-1), extract the required bit
429 # and construct the mask
430 def get_predcr(crl, mask, vl):
431 idx, noninv = _get_predcr(mask)
432 mask = 0
433 for i in range(vl):
434 cr = crl[i+SVP64CROffs.CRPred]
435 if cr[idx].value == noninv:
436 mask |= (1 << i)
437 return mask
438
439
440 # TODO, really should just be using PowerDecoder2
441 def get_idx_map(dec2, name):
442 op = dec2.dec.op
443 in1_sel = yield op.in1_sel
444 in2_sel = yield op.in2_sel
445 in3_sel = yield op.in3_sel
446 in1 = yield dec2.e.read_reg1.data
447 # identify which regnames map to in1/2/3
448 if name == 'RA' or name == 'RA_OR_ZERO':
449 if (in1_sel == In1Sel.RA.value or
450 (in1_sel == In1Sel.RA_OR_ZERO.value and in1 != 0)):
451 return 1
452 if in1_sel == In1Sel.RA_OR_ZERO.value:
453 return 1
454 elif name == 'RB':
455 if in2_sel == In2Sel.RB.value:
456 return 2
457 if in3_sel == In3Sel.RB.value:
458 return 3
459 # XXX TODO, RC doesn't exist yet!
460 elif name == 'RC':
461 if in3_sel == In3Sel.RC.value:
462 return 3
463 elif name in ['EA', 'RS']:
464 if in1_sel == In1Sel.RS.value:
465 return 1
466 if in2_sel == In2Sel.RS.value:
467 return 2
468 if in3_sel == In3Sel.RS.value:
469 return 3
470 elif name == 'FRA':
471 if in1_sel == In1Sel.FRA.value:
472 return 1
473 if in3_sel == In3Sel.FRA.value:
474 return 3
475 elif name == 'FRB':
476 if in2_sel == In2Sel.FRB.value:
477 return 2
478 elif name == 'FRC':
479 if in3_sel == In3Sel.FRC.value:
480 return 3
481 elif name == 'FRS':
482 if in1_sel == In1Sel.FRS.value:
483 return 1
484 if in3_sel == In3Sel.FRS.value:
485 return 3
486 elif name == 'FRT':
487 if in1_sel == In1Sel.FRT.value:
488 return 1
489 elif name == 'RT':
490 if in1_sel == In1Sel.RT.value:
491 return 1
492 return None
493
494
495 # TODO, really should just be using PowerDecoder2
496 def get_idx_in(dec2, name, ewmode=False):
497 idx = yield from get_idx_map(dec2, name)
498 if idx is None:
499 return None, False
500 op = dec2.dec.op
501 in1_sel = yield op.in1_sel
502 in2_sel = yield op.in2_sel
503 in3_sel = yield op.in3_sel
504 # get the IN1/2/3 from the decoder (includes SVP64 remap and isvec)
505 in1 = yield dec2.e.read_reg1.data
506 in2 = yield dec2.e.read_reg2.data
507 in3 = yield dec2.e.read_reg3.data
508 if ewmode:
509 in1_base = yield dec2.e.read_reg1.base
510 in2_base = yield dec2.e.read_reg2.base
511 in3_base = yield dec2.e.read_reg3.base
512 in1_offs = yield dec2.e.read_reg1.offs
513 in2_offs = yield dec2.e.read_reg2.offs
514 in3_offs = yield dec2.e.read_reg3.offs
515 in1 = (in1, in1_base, in1_offs)
516 in2 = (in2, in2_base, in2_offs)
517 in3 = (in3, in3_base, in3_offs)
518
519 in1_isvec = yield dec2.in1_isvec
520 in2_isvec = yield dec2.in2_isvec
521 in3_isvec = yield dec2.in3_isvec
522 log("get_idx_in in1", name, in1_sel, In1Sel.RA.value,
523 in1, in1_isvec)
524 log("get_idx_in in2", name, in2_sel, In2Sel.RB.value,
525 in2, in2_isvec)
526 log("get_idx_in in3", name, in3_sel, In3Sel.RS.value,
527 in3, in3_isvec)
528 log("get_idx_in FRS in3", name, in3_sel, In3Sel.FRS.value,
529 in3, in3_isvec)
530 log("get_idx_in FRB in2", name, in2_sel, In2Sel.FRB.value,
531 in2, in2_isvec)
532 log("get_idx_in FRC in3", name, in3_sel, In3Sel.FRC.value,
533 in3, in3_isvec)
534 if idx == 1:
535 return in1, in1_isvec
536 if idx == 2:
537 return in2, in2_isvec
538 if idx == 3:
539 return in3, in3_isvec
540 return None, False
541
542
543 # TODO, really should just be using PowerDecoder2
544 def get_cr_in(dec2, name):
545 op = dec2.dec.op
546 in_sel = yield op.cr_in
547 in_bitfield = yield dec2.dec_cr_in.cr_bitfield.data
548 sv_cr_in = yield op.sv_cr_in
549 spec = yield dec2.crin_svdec.spec
550 sv_override = yield dec2.dec_cr_in.sv_override
551 # get the IN1/2/3 from the decoder (includes SVP64 remap and isvec)
552 in1 = yield dec2.e.read_cr1.data
553 cr_isvec = yield dec2.cr_in_isvec
554 log("get_cr_in", in_sel, CROutSel.CR0.value, in1, cr_isvec)
555 log(" sv_cr_in", sv_cr_in)
556 log(" cr_bf", in_bitfield)
557 log(" spec", spec)
558 log(" override", sv_override)
559 # identify which regnames map to in / o2
560 if name == 'BI':
561 if in_sel == CRInSel.BI.value:
562 return in1, cr_isvec
563 log("get_cr_in not found", name)
564 return None, False
565
566
567 # TODO, really should just be using PowerDecoder2
568 def get_cr_out(dec2, name):
569 op = dec2.dec.op
570 out_sel = yield op.cr_out
571 out_bitfield = yield dec2.dec_cr_out.cr_bitfield.data
572 sv_cr_out = yield op.sv_cr_out
573 spec = yield dec2.crout_svdec.spec
574 sv_override = yield dec2.dec_cr_out.sv_override
575 # get the IN1/2/3 from the decoder (includes SVP64 remap and isvec)
576 out = yield dec2.e.write_cr.data
577 o_isvec = yield dec2.cr_out_isvec
578 log("get_cr_out", out_sel, CROutSel.CR0.value, out, o_isvec)
579 log(" sv_cr_out", sv_cr_out)
580 log(" cr_bf", out_bitfield)
581 log(" spec", spec)
582 log(" override", sv_override)
583 # identify which regnames map to out / o2
584 if name == 'BF':
585 if out_sel == CROutSel.BF.value:
586 return out, o_isvec
587 if name == 'CR0':
588 if out_sel == CROutSel.CR0.value:
589 return out, o_isvec
590 if name == 'CR1': # these are not actually calculated correctly
591 if out_sel == CROutSel.CR1.value:
592 return out, o_isvec
593 # check RC1 set? if so return implicit vector, this is a REAL bad hack
594 RC1 = yield dec2.rm_dec.RC1
595 if RC1:
596 log("get_cr_out RC1 mode")
597 if name == 'CR0':
598 return 0, True # XXX TODO: offset CR0 from SVSTATE SPR
599 if name == 'CR1':
600 return 1, True # XXX TODO: offset CR1 from SVSTATE SPR
601 # nope - not found.
602 log("get_cr_out not found", name)
603 return None, False
604
605
606 # TODO, really should just be using PowerDecoder2
607 def get_out_map(dec2, name):
608 op = dec2.dec.op
609 out_sel = yield op.out_sel
610 # get the IN1/2/3 from the decoder (includes SVP64 remap and isvec)
611 out = yield dec2.e.write_reg.data
612 # identify which regnames map to out / o2
613 if name == 'RA':
614 if out_sel == OutSel.RA.value:
615 return True
616 elif name == 'RT':
617 if out_sel == OutSel.RT.value:
618 return True
619 if out_sel == OutSel.RT_OR_ZERO.value and out != 0:
620 return True
621 elif name == 'RT_OR_ZERO':
622 if out_sel == OutSel.RT_OR_ZERO.value:
623 return True
624 elif name == 'FRA':
625 if out_sel == OutSel.FRA.value:
626 return True
627 elif name == 'FRS':
628 if out_sel == OutSel.FRS.value:
629 return True
630 elif name == 'FRT':
631 if out_sel == OutSel.FRT.value:
632 return True
633 return False
634
635
636 # TODO, really should just be using PowerDecoder2
637 def get_idx_out(dec2, name, ewmode=False):
638 op = dec2.dec.op
639 out_sel = yield op.out_sel
640 # get the IN1/2/3 from the decoder (includes SVP64 remap and isvec)
641 out = yield dec2.e.write_reg.data
642 o_isvec = yield dec2.o_isvec
643 if ewmode:
644 offs = yield dec2.e.write_reg.offs
645 base = yield dec2.e.write_reg.base
646 out = (out, base, offs)
647 # identify which regnames map to out / o2
648 ismap = yield from get_out_map(dec2, name)
649 if ismap:
650 log("get_idx_out", name, out_sel, out, o_isvec)
651 return out, o_isvec
652 log("get_idx_out not found", name, out_sel, out, o_isvec)
653 return None, False
654
655
656 # TODO, really should just be using PowerDecoder2
657 def get_out2_map(dec2, name):
658 # check first if register is activated for write
659 op = dec2.dec.op
660 out_sel = yield op.out_sel
661 out = yield dec2.e.write_ea.data
662 out_ok = yield dec2.e.write_ea.ok
663 if not out_ok:
664 return False
665
666 if name in ['EA', 'RA']:
667 if hasattr(op, "upd"):
668 # update mode LD/ST uses read-reg A also as an output
669 upd = yield op.upd
670 log("get_idx_out2", upd, LDSTMode.update.value,
671 out_sel, OutSel.RA.value,
672 out)
673 if upd == LDSTMode.update.value:
674 return True
675 if name == 'RS':
676 fft_en = yield dec2.implicit_rs
677 if fft_en:
678 log("get_idx_out2", out_sel, OutSel.RS.value,
679 out)
680 return True
681 if name == 'FRS':
682 fft_en = yield dec2.implicit_rs
683 if fft_en:
684 log("get_idx_out2", out_sel, OutSel.FRS.value,
685 out)
686 return True
687 return False
688
689
690 # TODO, really should just be using PowerDecoder2
691 def get_idx_out2(dec2, name, ewmode=False):
692 # check first if register is activated for write
693 op = dec2.dec.op
694 out_sel = yield op.out_sel
695 out = yield dec2.e.write_ea.data
696 if ewmode:
697 offs = yield dec2.e.write_ea.offs
698 base = yield dec2.e.write_ea.base
699 out = (out, base, offs)
700 o_isvec = yield dec2.o2_isvec
701 ismap = yield from get_out2_map(dec2, name)
702 if ismap:
703 log("get_idx_out2", name, out_sel, out, o_isvec)
704 return out, o_isvec
705 return None, False
706
707
708 class StepLoop:
709 """deals with svstate looping.
710 """
711
712 def __init__(self, svstate):
713 self.svstate = svstate
714 self.new_iterators()
715
716 def new_iterators(self):
717 self.src_it = self.src_iterator()
718 self.dst_it = self.dst_iterator()
719 self.loopend = False
720 self.new_srcstep = 0
721 self.new_dststep = 0
722 self.new_ssubstep = 0
723 self.new_dsubstep = 0
724 self.pred_dst_zero = 0
725 self.pred_src_zero = 0
726
727 def src_iterator(self):
728 """source-stepping iterator
729 """
730 pack = self.svstate.pack
731
732 # source step
733 if pack:
734 # pack advances subvl in *outer* loop
735 while True: # outer subvl loop
736 while True: # inner vl loop
737 vl = self.svstate.vl
738 subvl = self.subvl
739 srcmask = self.srcmask
740 srcstep = self.svstate.srcstep
741 pred_src_zero = ((1 << srcstep) & srcmask) != 0
742 if self.pred_sz or pred_src_zero:
743 self.pred_src_zero = not pred_src_zero
744 log(" advance src", srcstep, vl,
745 self.svstate.ssubstep, subvl)
746 # yield actual substep/srcstep
747 yield (self.svstate.ssubstep, srcstep)
748 # the way yield works these could have been modified.
749 vl = self.svstate.vl
750 subvl = self.subvl
751 srcstep = self.svstate.srcstep
752 log(" advance src check", srcstep, vl,
753 self.svstate.ssubstep, subvl, srcstep == vl-1,
754 self.svstate.ssubstep == subvl)
755 if srcstep == vl-1: # end-point
756 self.svstate.srcstep = SelectableInt(0, 7) # reset
757 if self.svstate.ssubstep == subvl: # end-point
758 log(" advance pack stop")
759 return
760 break # exit inner loop
761 self.svstate.srcstep += SelectableInt(1, 7) # advance ss
762 subvl = self.subvl
763 if self.svstate.ssubstep == subvl: # end-point
764 self.svstate.ssubstep = SelectableInt(0, 2) # reset
765 log(" advance pack stop")
766 return
767 self.svstate.ssubstep += SelectableInt(1, 2)
768
769 else:
770 # these cannot be done as for-loops because SVSTATE may change
771 # (srcstep/substep may be modified, interrupted, subvl/vl change)
772 # but they *can* be done as while-loops as long as every SVSTATE
773 # "thing" is re-read every single time a yield gives indices
774 while True: # outer vl loop
775 while True: # inner subvl loop
776 vl = self.svstate.vl
777 subvl = self.subvl
778 srcmask = self.srcmask
779 srcstep = self.svstate.srcstep
780 pred_src_zero = ((1 << srcstep) & srcmask) != 0
781 if self.pred_sz or pred_src_zero:
782 self.pred_src_zero = not pred_src_zero
783 log(" advance src", srcstep, vl,
784 self.svstate.ssubstep, subvl)
785 # yield actual substep/srcstep
786 yield (self.svstate.ssubstep, srcstep)
787 if self.svstate.ssubstep == subvl: # end-point
788 self.svstate.ssubstep = SelectableInt(0, 2) # reset
789 break # exit inner loop
790 self.svstate.ssubstep += SelectableInt(1, 2)
791 vl = self.svstate.vl
792 if srcstep == vl-1: # end-point
793 self.svstate.srcstep = SelectableInt(0, 7) # reset
794 self.loopend = True
795 return
796 self.svstate.srcstep += SelectableInt(1, 7) # advance srcstep
797
798 def dst_iterator(self):
799 """dest-stepping iterator
800 """
801 unpack = self.svstate.unpack
802
803 # dest step
804 if unpack:
805 # pack advances subvl in *outer* loop
806 while True: # outer subvl loop
807 while True: # inner vl loop
808 vl = self.svstate.vl
809 subvl = self.subvl
810 dstmask = self.dstmask
811 dststep = self.svstate.dststep
812 pred_dst_zero = ((1 << dststep) & dstmask) != 0
813 if self.pred_dz or pred_dst_zero:
814 self.pred_dst_zero = not pred_dst_zero
815 log(" advance dst", dststep, vl,
816 self.svstate.dsubstep, subvl)
817 # yield actual substep/dststep
818 yield (self.svstate.dsubstep, dststep)
819 # the way yield works these could have been modified.
820 vl = self.svstate.vl
821 dststep = self.svstate.dststep
822 log(" advance dst check", dststep, vl,
823 self.svstate.ssubstep, subvl)
824 if dststep == vl-1: # end-point
825 self.svstate.dststep = SelectableInt(0, 7) # reset
826 if self.svstate.dsubstep == subvl: # end-point
827 log(" advance unpack stop")
828 return
829 break
830 self.svstate.dststep += SelectableInt(1, 7) # advance ds
831 subvl = self.subvl
832 if self.svstate.dsubstep == subvl: # end-point
833 self.svstate.dsubstep = SelectableInt(0, 2) # reset
834 log(" advance unpack stop")
835 return
836 self.svstate.dsubstep += SelectableInt(1, 2)
837 else:
838 # these cannot be done as for-loops because SVSTATE may change
839 # (dststep/substep may be modified, interrupted, subvl/vl change)
840 # but they *can* be done as while-loops as long as every SVSTATE
841 # "thing" is re-read every single time a yield gives indices
842 while True: # outer vl loop
843 while True: # inner subvl loop
844 subvl = self.subvl
845 dstmask = self.dstmask
846 dststep = self.svstate.dststep
847 pred_dst_zero = ((1 << dststep) & dstmask) != 0
848 if self.pred_dz or pred_dst_zero:
849 self.pred_dst_zero = not pred_dst_zero
850 log(" advance dst", dststep, self.svstate.vl,
851 self.svstate.dsubstep, subvl)
852 # yield actual substep/dststep
853 yield (self.svstate.dsubstep, dststep)
854 if self.svstate.dsubstep == subvl: # end-point
855 self.svstate.dsubstep = SelectableInt(0, 2) # reset
856 break
857 self.svstate.dsubstep += SelectableInt(1, 2)
858 subvl = self.subvl
859 vl = self.svstate.vl
860 if dststep == vl-1: # end-point
861 self.svstate.dststep = SelectableInt(0, 7) # reset
862 return
863 self.svstate.dststep += SelectableInt(1, 7) # advance dststep
864
865 def src_iterate(self):
866 """source-stepping iterator
867 """
868 subvl = self.subvl
869 vl = self.svstate.vl
870 pack = self.svstate.pack
871 unpack = self.svstate.unpack
872 ssubstep = self.svstate.ssubstep
873 end_ssub = ssubstep == subvl
874 end_src = self.svstate.srcstep == vl-1
875 log(" pack/unpack/subvl", pack, unpack, subvl,
876 "end", end_src,
877 "sub", end_ssub)
878 # first source step
879 srcstep = self.svstate.srcstep
880 srcmask = self.srcmask
881 if pack:
882 # pack advances subvl in *outer* loop
883 while True:
884 assert srcstep <= vl-1
885 end_src = srcstep == vl-1
886 if end_src:
887 if end_ssub:
888 self.loopend = True
889 else:
890 self.svstate.ssubstep += SelectableInt(1, 2)
891 srcstep = 0 # reset
892 break
893 else:
894 srcstep += 1 # advance srcstep
895 if not self.srcstep_skip:
896 break
897 if ((1 << srcstep) & srcmask) != 0:
898 break
899 else:
900 log(" sskip", bin(srcmask), bin(1 << srcstep))
901 else:
902 # advance subvl in *inner* loop
903 if end_ssub:
904 while True:
905 assert srcstep <= vl-1
906 end_src = srcstep == vl-1
907 if end_src: # end-point
908 self.loopend = True
909 srcstep = 0
910 break
911 else:
912 srcstep += 1
913 if not self.srcstep_skip:
914 break
915 if ((1 << srcstep) & srcmask) != 0:
916 break
917 else:
918 log(" sskip", bin(srcmask), bin(1 << srcstep))
919 self.svstate.ssubstep = SelectableInt(0, 2) # reset
920 else:
921 # advance ssubstep
922 self.svstate.ssubstep += SelectableInt(1, 2)
923
924 self.svstate.srcstep = SelectableInt(srcstep, 7)
925 log(" advance src", self.svstate.srcstep, self.svstate.ssubstep,
926 self.loopend)
927
928 def dst_iterate(self):
929 """dest step iterator
930 """
931 vl = self.svstate.vl
932 subvl = self.subvl
933 pack = self.svstate.pack
934 unpack = self.svstate.unpack
935 dsubstep = self.svstate.dsubstep
936 end_dsub = dsubstep == subvl
937 dststep = self.svstate.dststep
938 end_dst = dststep == vl-1
939 dstmask = self.dstmask
940 log(" pack/unpack/subvl", pack, unpack, subvl,
941 "end", end_dst,
942 "sub", end_dsub)
943 # now dest step
944 if unpack:
945 # unpack advances subvl in *outer* loop
946 while True:
947 assert dststep <= vl-1
948 end_dst = dststep == vl-1
949 if end_dst:
950 if end_dsub:
951 self.loopend = True
952 else:
953 self.svstate.dsubstep += SelectableInt(1, 2)
954 dststep = 0 # reset
955 break
956 else:
957 dststep += 1 # advance dststep
958 if not self.dststep_skip:
959 break
960 if ((1 << dststep) & dstmask) != 0:
961 break
962 else:
963 log(" dskip", bin(dstmask), bin(1 << dststep))
964 else:
965 # advance subvl in *inner* loop
966 if end_dsub:
967 while True:
968 assert dststep <= vl-1
969 end_dst = dststep == vl-1
970 if end_dst: # end-point
971 self.loopend = True
972 dststep = 0
973 break
974 else:
975 dststep += 1
976 if not self.dststep_skip:
977 break
978 if ((1 << dststep) & dstmask) != 0:
979 break
980 else:
981 log(" dskip", bin(dstmask), bin(1 << dststep))
982 self.svstate.dsubstep = SelectableInt(0, 2) # reset
983 else:
984 # advance ssubstep
985 self.svstate.dsubstep += SelectableInt(1, 2)
986
987 self.svstate.dststep = SelectableInt(dststep, 7)
988 log(" advance dst", self.svstate.dststep, self.svstate.dsubstep,
989 self.loopend)
990
991 def at_loopend(self):
992 """tells if this is the last possible element. uses the cached values
993 for src/dst-step and sub-steps
994 """
995 subvl = self.subvl
996 vl = self.svstate.vl
997 srcstep, dststep = self.new_srcstep, self.new_dststep
998 ssubstep, dsubstep = self.new_ssubstep, self.new_dsubstep
999 end_ssub = ssubstep == subvl
1000 end_dsub = dsubstep == subvl
1001 if srcstep == vl-1 and end_ssub:
1002 return True
1003 if dststep == vl-1 and end_dsub:
1004 return True
1005 return False
1006
1007 def advance_svstate_steps(self):
1008 """ advance sub/steps. note that Pack/Unpack *INVERTS* the order.
1009 TODO when Pack/Unpack is set, substep becomes the *outer* loop
1010 """
1011 self.subvl = yield self.dec2.rm_dec.rm_in.subvl
1012 if self.loopend: # huhn??
1013 return
1014 self.src_iterate()
1015 self.dst_iterate()
1016
1017 def read_src_mask(self):
1018 """read/update pred_sz and src mask
1019 """
1020 # get SVSTATE VL (oh and print out some debug stuff)
1021 vl = self.svstate.vl
1022 srcstep = self.svstate.srcstep
1023 ssubstep = self.svstate.ssubstep
1024
1025 # get predicate mask (all 64 bits)
1026 srcmask = 0xffff_ffff_ffff_ffff
1027
1028 pmode = yield self.dec2.rm_dec.predmode
1029 sv_ptype = yield self.dec2.dec.op.SV_Ptype
1030 srcpred = yield self.dec2.rm_dec.srcpred
1031 dstpred = yield self.dec2.rm_dec.dstpred
1032 pred_sz = yield self.dec2.rm_dec.pred_sz
1033 if pmode == SVP64PredMode.INT.value:
1034 srcmask = dstmask = get_predint(self.gpr, dstpred)
1035 if sv_ptype == SVPType.P2.value:
1036 srcmask = get_predint(self.gpr, srcpred)
1037 elif pmode == SVP64PredMode.CR.value:
1038 srcmask = dstmask = get_predcr(self.crl, dstpred, vl)
1039 if sv_ptype == SVPType.P2.value:
1040 srcmask = get_predcr(self.crl, srcpred, vl)
1041 # work out if the ssubsteps are completed
1042 ssubstart = ssubstep == 0
1043 log(" pmode", pmode)
1044 log(" ptype", sv_ptype)
1045 log(" srcpred", bin(srcpred))
1046 log(" srcmask", bin(srcmask))
1047 log(" pred_sz", bin(pred_sz))
1048 log(" ssubstart", ssubstart)
1049
1050 # store all that above
1051 self.srcstep_skip = False
1052 self.srcmask = srcmask
1053 self.pred_sz = pred_sz
1054 self.new_ssubstep = ssubstep
1055 log(" new ssubstep", ssubstep)
1056 # until the predicate mask has a "1" bit... or we run out of VL
1057 # let srcstep==VL be the indicator to move to next instruction
1058 if not pred_sz:
1059 self.srcstep_skip = True
1060
1061 def read_dst_mask(self):
1062 """same as read_src_mask - check and record everything needed
1063 """
1064 # get SVSTATE VL (oh and print out some debug stuff)
1065 # yield Delay(1e-10) # make changes visible
1066 vl = self.svstate.vl
1067 dststep = self.svstate.dststep
1068 dsubstep = self.svstate.dsubstep
1069
1070 # get predicate mask (all 64 bits)
1071 dstmask = 0xffff_ffff_ffff_ffff
1072
1073 pmode = yield self.dec2.rm_dec.predmode
1074 reverse_gear = yield self.dec2.rm_dec.reverse_gear
1075 sv_ptype = yield self.dec2.dec.op.SV_Ptype
1076 dstpred = yield self.dec2.rm_dec.dstpred
1077 pred_dz = yield self.dec2.rm_dec.pred_dz
1078 if pmode == SVP64PredMode.INT.value:
1079 dstmask = get_predint(self.gpr, dstpred)
1080 elif pmode == SVP64PredMode.CR.value:
1081 dstmask = get_predcr(self.crl, dstpred, vl)
1082 # work out if the ssubsteps are completed
1083 dsubstart = dsubstep == 0
1084 log(" pmode", pmode)
1085 log(" ptype", sv_ptype)
1086 log(" dstpred", bin(dstpred))
1087 log(" dstmask", bin(dstmask))
1088 log(" pred_dz", bin(pred_dz))
1089 log(" dsubstart", dsubstart)
1090
1091 self.dststep_skip = False
1092 self.dstmask = dstmask
1093 self.pred_dz = pred_dz
1094 self.new_dsubstep = dsubstep
1095 log(" new dsubstep", dsubstep)
1096 if not pred_dz:
1097 self.dststep_skip = True
1098
1099 def svstate_pre_inc(self):
1100 """check if srcstep/dststep need to skip over masked-out predicate bits
1101 note that this is not supposed to do anything to substep,
1102 it is purely for skipping masked-out bits
1103 """
1104
1105 self.subvl = yield self.dec2.rm_dec.rm_in.subvl
1106 yield from self.read_src_mask()
1107 yield from self.read_dst_mask()
1108
1109 self.skip_src()
1110 self.skip_dst()
1111
1112 def skip_src(self):
1113
1114 srcstep = self.svstate.srcstep
1115 srcmask = self.srcmask
1116 pred_src_zero = self.pred_sz
1117 vl = self.svstate.vl
1118 # srcstep-skipping opportunity identified
1119 if self.srcstep_skip:
1120 # cannot do this with sv.bc - XXX TODO
1121 if srcmask == 0:
1122 self.loopend = True
1123 while (((1 << srcstep) & srcmask) == 0) and (srcstep != vl):
1124 log(" sskip", bin(1 << srcstep))
1125 srcstep += 1
1126
1127 # now work out if the relevant mask bits require zeroing
1128 if pred_src_zero:
1129 pred_src_zero = ((1 << srcstep) & srcmask) == 0
1130
1131 # store new srcstep / dststep
1132 self.new_srcstep = srcstep
1133 self.pred_src_zero = pred_src_zero
1134 log(" new srcstep", srcstep)
1135
1136 def skip_dst(self):
1137 # dststep-skipping opportunity identified
1138 dststep = self.svstate.dststep
1139 dstmask = self.dstmask
1140 pred_dst_zero = self.pred_dz
1141 vl = self.svstate.vl
1142 if self.dststep_skip:
1143 # cannot do this with sv.bc - XXX TODO
1144 if dstmask == 0:
1145 self.loopend = True
1146 while (((1 << dststep) & dstmask) == 0) and (dststep != vl):
1147 log(" dskip", bin(1 << dststep))
1148 dststep += 1
1149
1150 # now work out if the relevant mask bits require zeroing
1151 if pred_dst_zero:
1152 pred_dst_zero = ((1 << dststep) & dstmask) == 0
1153
1154 # store new srcstep / dststep
1155 self.new_dststep = dststep
1156 self.pred_dst_zero = pred_dst_zero
1157 log(" new dststep", dststep)
1158
1159
1160 class ExitSyscallCalled(Exception):
1161 pass
1162
1163
1164 class SyscallEmulator(openpower.syscalls.Dispatcher):
1165 def __init__(self, isacaller):
1166 self.__isacaller = isacaller
1167
1168 host = os.uname().machine
1169 bits = (64 if (sys.maxsize > (2**32)) else 32)
1170 host = openpower.syscalls.architecture(arch=host, bits=bits)
1171
1172 return super().__init__(guest="ppc64", host=host)
1173
1174 def __call__(self, identifier, *arguments):
1175 (identifier, *arguments) = map(int, (identifier, *arguments))
1176 return super().__call__(identifier, *arguments)
1177
1178 def sys_exit_group(self, status, *rest):
1179 self.__isacaller.halted = True
1180 raise ExitSyscallCalled(status)
1181
1182 def sys_write(self, fd, buf, count, *rest):
1183 buf = self.__isacaller.mem.get_ctypes(buf, count, is_write=False)
1184 try:
1185 return os.write(fd, buf)
1186 except OSError as e:
1187 return -e.errno
1188
1189 def sys_read(self, fd, buf, count, *rest):
1190 buf = self.__isacaller.mem.get_ctypes(buf, count, is_write=True)
1191 try:
1192 return os.readv(fd, [buf])
1193 except OSError as e:
1194 return -e.errno
1195
1196 def sys_mmap(self, addr, length, prot, flags, fd, offset, *rest):
1197 return self.__isacaller.mem.mmap_syscall(
1198 addr, length, prot, flags, fd, offset, is_mmap2=False)
1199
1200 def sys_mmap2(self, addr, length, prot, flags, fd, offset, *rest):
1201 return self.__isacaller.mem.mmap_syscall(
1202 addr, length, prot, flags, fd, offset, is_mmap2=True)
1203
1204 def sys_brk(self, addr, *rest):
1205 return self.__isacaller.mem.brk_syscall(addr)
1206
1207 def sys_munmap(self, addr, length, *rest):
1208 return -errno.ENOSYS # TODO: implement
1209
1210 def sys_mprotect(self, addr, length, prot, *rest):
1211 return -errno.ENOSYS # TODO: implement
1212
1213 def sys_pkey_mprotect(self, addr, length, prot, pkey, *rest):
1214 return -errno.ENOSYS # TODO: implement
1215
1216 def sys_openat(self, dirfd, pathname, flags, mode, *rest):
1217 try:
1218 path = self.__isacaller.mem.read_cstr(pathname)
1219 except (ValueError, MemException):
1220 return -errno.EFAULT
1221 try:
1222 if dirfd == ppc_flags.AT_FDCWD:
1223 return os.open(path, flags, mode)
1224 else:
1225 return os.open(path, flags, mode, dir_fd=dirfd)
1226 except OSError as e:
1227 return -e.errno
1228
1229 def _uname(self):
1230 uname = os.uname()
1231 sysname = b'Linux'
1232 nodename = uname.nodename.encode()
1233 release = b'5.6.0-1-powerpc64le'
1234 version = b'#1 SMP Debian 5.6.7-1 (2020-04-29)'
1235 machine = b'ppc64le'
1236 domainname = b''
1237 return sysname, nodename, release, version, machine, domainname
1238
1239 def sys_uname(self, buf, *rest):
1240 s = struct.Struct("<65s65s65s65s65s")
1241 try:
1242 buf = self.__isacaller.mem.get_ctypes(buf, s.size, is_write=True)
1243 except (ValueError, MemException):
1244 return -errno.EFAULT
1245 sysname, nodename, release, version, machine, domainname = \
1246 self._uname()
1247 s.pack_into(buf, 0, sysname, nodename, release, version, machine)
1248 return 0
1249
1250 def sys_newuname(self, buf, *rest):
1251 name_len = ppc_flags.__NEW_UTS_LEN + 1
1252 s = struct.Struct("<%ds%ds%ds%ds%ds%ds" % ((name_len,) * 6))
1253 try:
1254 buf = self.__isacaller.mem.get_ctypes(buf, s.size, is_write=True)
1255 except (ValueError, MemException):
1256 return -errno.EFAULT
1257 sysname, nodename, release, version, machine, domainname = \
1258 self._uname()
1259 s.pack_into(buf, 0,
1260 sysname, nodename, release, version, machine, domainname)
1261 return 0
1262
1263
1264 class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
1265 # decoder2 - an instance of power_decoder2
1266 # regfile - a list of initial values for the registers
1267 # initial_{etc} - initial values for SPRs, Condition Register, Mem, MSR
1268 # respect_pc - tracks the program counter. requires initial_insns
1269 def __init__(self, decoder2, regfile, initial_sprs=None, initial_cr=0,
1270 initial_mem=None, initial_msr=0,
1271 initial_svstate=0,
1272 initial_insns=None,
1273 fpregfile=None,
1274 respect_pc=False,
1275 disassembly=None,
1276 initial_pc=0,
1277 bigendian=False,
1278 mmu=False,
1279 icachemmu=False,
1280 initial_fpscr=0,
1281 insnlog=None,
1282 use_mmap_mem=False,
1283 use_syscall_emu=False,
1284 emulating_mmap=False):
1285 if use_syscall_emu:
1286 self.syscall = SyscallEmulator(isacaller=self)
1287 if not use_mmap_mem:
1288 log("forcing use_mmap_mem due to use_syscall_emu active")
1289 use_mmap_mem = True
1290 else:
1291 self.syscall = None
1292
1293 # we will eventually be able to load ELF files without use_syscall_emu
1294 # (e.g. the linux kernel), so do it in a separate if block
1295 if isinstance(initial_insns, ELFFile):
1296 if not use_mmap_mem:
1297 log("forcing use_mmap_mem due to loading an ELF file")
1298 use_mmap_mem = True
1299 if not emulating_mmap:
1300 log("forcing emulating_mmap due to loading an ELF file")
1301 emulating_mmap = True
1302
1303 # trace log file for model output. if None do nothing
1304 self.insnlog = insnlog
1305 self.insnlog_is_file = hasattr(insnlog, "write")
1306 if not self.insnlog_is_file and self.insnlog:
1307 self.insnlog = open(self.insnlog, "w")
1308
1309 self.bigendian = bigendian
1310 self.halted = False
1311 self.is_svp64_mode = False
1312 self.respect_pc = respect_pc
1313 if initial_sprs is None:
1314 initial_sprs = {}
1315 if initial_mem is None:
1316 initial_mem = {}
1317 if fpregfile is None:
1318 fpregfile = [0] * 32
1319 if initial_insns is None:
1320 initial_insns = {}
1321 assert self.respect_pc == False, "instructions required to honor pc"
1322 if initial_msr is None:
1323 initial_msr = DEFAULT_MSR
1324
1325 log("ISACaller insns", respect_pc, initial_insns, disassembly)
1326 log("ISACaller initial_msr", initial_msr)
1327
1328 # "fake program counter" mode (for unit testing)
1329 self.fake_pc = 0
1330 disasm_start = 0
1331 if not respect_pc:
1332 if isinstance(initial_mem, tuple):
1333 self.fake_pc = initial_mem[0]
1334 disasm_start = self.fake_pc
1335 else:
1336 disasm_start = initial_pc
1337
1338 # disassembly: we need this for now (not given from the decoder)
1339 self.disassembly = {}
1340 if disassembly:
1341 for i, code in enumerate(disassembly):
1342 self.disassembly[i*4 + disasm_start] = code
1343
1344 # set up registers, instruction memory, data memory, PC, SPRs, MSR, CR
1345 self.svp64rm = SVP64RM()
1346 if initial_svstate is None:
1347 initial_svstate = 0
1348 if isinstance(initial_svstate, int):
1349 initial_svstate = SVP64State(initial_svstate)
1350 # SVSTATE, MSR and PC
1351 StepLoop.__init__(self, initial_svstate)
1352 self.msr = SelectableInt(initial_msr, 64) # underlying reg
1353 self.pc = PC()
1354 # GPR FPR SPR registers
1355 initial_sprs = deepcopy(initial_sprs) # so as not to get modified
1356 self.gpr = GPR(decoder2, self, self.svstate, regfile)
1357 self.fpr = GPR(decoder2, self, self.svstate, fpregfile)
1358 # initialise SPRs before MMU
1359 self.spr = SPR(decoder2, initial_sprs, gpr=self.gpr)
1360
1361 # set up 4 dummy SVSHAPEs if they aren't already set up
1362 for i in range(4):
1363 sname = 'SVSHAPE%d' % i
1364 val = self.spr.get(sname, 0)
1365 # make sure it's an SVSHAPE -- conversion done by SPR.__setitem__
1366 self.spr[sname] = val
1367 self.last_op_svshape = False
1368
1369 # "raw" memory
1370 if use_mmap_mem:
1371 self.mem = MemMMap(row_bytes=8,
1372 initial_mem=initial_mem,
1373 misaligned_ok=True,
1374 emulating_mmap=emulating_mmap)
1375 self.imem = self.mem
1376 lelf = self.mem.initialize(row_bytes=4, initial_mem=initial_insns)
1377 if isinstance(lelf, LoadedELF): # stuff parsed from ELF
1378 initial_pc = lelf.pc
1379 for k, v in lelf.gprs.items():
1380 self.gpr[k] = SelectableInt(v, 64)
1381 initial_fpscr = lelf.fpscr
1382 self.mem.log_fancy(kind=LogType.InstrInOuts)
1383 else:
1384 self.mem = Mem(row_bytes=8, initial_mem=initial_mem,
1385 misaligned_ok=True)
1386 self.mem.log_fancy(kind=LogType.InstrInOuts)
1387 self.imem = Mem(row_bytes=4, initial_mem=initial_insns)
1388 # MMU mode, redirect underlying Mem through RADIX
1389 if mmu:
1390 self.mem = RADIX(self.mem, self)
1391 if icachemmu:
1392 self.imem = RADIX(self.imem, self)
1393
1394 # TODO, needed here:
1395 # FPR (same as GPR except for FP nums)
1396 # 4.2.2 p124 FPSCR (definitely "separate" - not in SPR)
1397 # note that mffs, mcrfs, mtfsf "manage" this FPSCR
1398 self.fpscr = FPSCRState(initial_fpscr)
1399
1400 # 2.3.1 CR (and sub-fields CR0..CR6 - CR0 SO comes from XER.SO)
1401 # note that mfocrf, mfcr, mtcr, mtocrf, mcrxrx "manage" CRs
1402 # -- Done
1403 # 2.3.2 LR (actually SPR #8) -- Done
1404 # 2.3.3 CTR (actually SPR #9) -- Done
1405 # 2.3.4 TAR (actually SPR #815)
1406 # 3.2.2 p45 XER (actually SPR #1) -- Done
1407 # 3.2.3 p46 p232 VRSAVE (actually SPR #256)
1408
1409 # create CR then allow portions of it to be "selectable" (below)
1410 self.cr_fields = CRFields(initial_cr)
1411 self.cr = self.cr_fields.cr
1412 self.cr_backup = 0 # sigh, dreadful hack: for fail-first (VLi)
1413
1414 # "undefined", just set to variable-bit-width int (use exts "max")
1415 # self.undefined = SelectableInt(0, EFFECTIVELY_UNLIMITED)
1416
1417 self.namespace = {}
1418 self.namespace.update(self.spr)
1419 self.namespace.update({'GPR': self.gpr,
1420 'FPR': self.fpr,
1421 'MEM': self.mem,
1422 'SPR': self.spr,
1423 'memassign': self.memassign,
1424 'NIA': self.pc.NIA,
1425 'CIA': self.pc.CIA,
1426 'SVSTATE': self.svstate,
1427 'SVSHAPE0': self.spr['SVSHAPE0'],
1428 'SVSHAPE1': self.spr['SVSHAPE1'],
1429 'SVSHAPE2': self.spr['SVSHAPE2'],
1430 'SVSHAPE3': self.spr['SVSHAPE3'],
1431 'CR': self.cr,
1432 'MSR': self.msr,
1433 'FPSCR': self.fpscr,
1434 'undefined': undefined,
1435 'mode_is_64bit': True,
1436 'SO': XER_bits['SO'],
1437 'XLEN': 64 # elwidth overrides
1438 })
1439
1440 for name in BFP_FLAG_NAMES:
1441 setattr(self, name, 0)
1442
1443 # update pc to requested start point
1444 self.set_pc(initial_pc)
1445
1446 # field-selectable versions of Condition Register
1447 self.crl = self.cr_fields.crl
1448 for i in range(8):
1449 self.namespace["CR%d" % i] = self.crl[i]
1450
1451 self.decoder = decoder2.dec
1452 self.dec2 = decoder2
1453
1454 super().__init__(XLEN=self.namespace["XLEN"], FPSCR=self.fpscr)
1455
1456 def trace(self, out):
1457 if self.insnlog is None:
1458 return
1459 self.insnlog.write(out)
1460
1461 @property
1462 def XLEN(self):
1463 return self.namespace["XLEN"]
1464
1465 @property
1466 def FPSCR(self):
1467 return self.fpscr
1468
1469 def call_trap(self, trap_addr, trap_bit):
1470 """calls TRAP and sets up NIA to the new execution location.
1471 next instruction will begin at trap_addr.
1472 """
1473 self.TRAP(trap_addr, trap_bit)
1474 self.namespace['NIA'] = self.trap_nia
1475 self.pc.update(self.namespace, self.is_svp64_mode)
1476
1477 def TRAP(self, trap_addr=0x700, trap_bit=PIb.TRAP):
1478 """TRAP> saves PC, MSR (and TODO SVSTATE), and updates MSR
1479
1480 TRAP function is callable from inside the pseudocode itself,
1481 hence the default arguments. when calling from inside ISACaller
1482 it is best to use call_trap()
1483
1484 trap_addr: int | SelectableInt
1485 the address to go to (before any modifications from `KAIVB`)
1486 trap_bit: int | None
1487 the bit in `SRR1` to set, `None` means don't set any bits.
1488 """
1489 if isinstance(trap_addr, SelectableInt):
1490 trap_addr = trap_addr.value
1491 # https://bugs.libre-soc.org/show_bug.cgi?id=859
1492 kaivb = self.spr['KAIVB'].value
1493 msr = self.namespace['MSR'].value
1494 log("TRAP:", hex(trap_addr), hex(msr), "kaivb", hex(kaivb))
1495 # store CIA(+4?) in SRR0, set NIA to 0x700
1496 # store MSR in SRR1, set MSR to um errr something, have to check spec
1497 # store SVSTATE (if enabled) in SVSRR0
1498 self.spr['SRR0'].value = self.pc.CIA.value
1499 self.spr['SRR1'].value = msr
1500 if self.is_svp64_mode:
1501 self.spr['SVSRR0'] = self.namespace['SVSTATE'].value
1502 self.trap_nia = SelectableInt(trap_addr | (kaivb & ~0x1fff), 64)
1503 if trap_bit is not None:
1504 self.spr['SRR1'][trap_bit] = 1 # change *copy* of MSR in SRR1
1505
1506 # set exception bits. TODO: this should, based on the address
1507 # in figure 66 p1065 V3.0B and the table figure 65 p1063 set these
1508 # bits appropriately. however it turns out that *for now* in all
1509 # cases (all trap_addrs) the exact same thing is needed.
1510 self.msr[MSRb.IR] = 0
1511 self.msr[MSRb.DR] = 0
1512 self.msr[MSRb.FE0] = 0
1513 self.msr[MSRb.FE1] = 0
1514 self.msr[MSRb.EE] = 0
1515 self.msr[MSRb.RI] = 0
1516 self.msr[MSRb.SF] = 1
1517 self.msr[MSRb.TM] = 0
1518 self.msr[MSRb.VEC] = 0
1519 self.msr[MSRb.VSX] = 0
1520 self.msr[MSRb.PR] = 0
1521 self.msr[MSRb.FP] = 0
1522 self.msr[MSRb.PMM] = 0
1523 self.msr[MSRb.TEs] = 0
1524 self.msr[MSRb.TEe] = 0
1525 self.msr[MSRb.UND] = 0
1526 self.msr[MSRb.LE] = 1
1527
1528 def memassign(self, ea, sz, val):
1529 self.mem.memassign(ea, sz, val)
1530
1531 def prep_namespace(self, insn_name, formname, op_fields, xlen):
1532 # TODO: get field names from form in decoder*1* (not decoder2)
1533 # decoder2 is hand-created, and decoder1.sigform is auto-generated
1534 # from spec
1535 # then "yield" fields only from op_fields rather than hard-coded
1536 # list, here.
1537 fields = self.decoder.sigforms[formname]
1538 log("prep_namespace", formname, op_fields, insn_name)
1539 for name in op_fields:
1540 # CR immediates. deal with separately. needs modifying
1541 # pseudocode
1542 if self.is_svp64_mode and name in ['BI']: # TODO, more CRs
1543 # BI is a 5-bit, must reconstruct the value
1544 regnum, is_vec = yield from get_cr_in(self.dec2, name)
1545 sig = getattr(fields, name)
1546 val = yield sig
1547 # low 2 LSBs (CR field selector) remain same, CR num extended
1548 assert regnum <= 7, "sigh, TODO, 128 CR fields"
1549 val = (val & 0b11) | (regnum << 2)
1550 elif self.is_svp64_mode and name in ['BF']: # TODO, more CRs
1551 regnum, is_vec = yield from get_cr_out(self.dec2, "BF")
1552 log('hack %s' % name, regnum, is_vec)
1553 val = regnum
1554 else:
1555 sig = getattr(fields, name)
1556 val = yield sig
1557 # these are all opcode fields involved in index-selection of CR,
1558 # and need to do "standard" arithmetic. CR[BA+32] for example
1559 # would, if using SelectableInt, only be 5-bit.
1560 if name in ['BF', 'BFA', 'BC', 'BA', 'BB', 'BT', 'BI']:
1561 self.namespace[name] = val
1562 else:
1563 self.namespace[name] = SelectableInt(val, sig.width)
1564
1565 self.namespace['XER'] = self.spr['XER']
1566 self.namespace['CA'] = self.spr['XER'][XER_bits['CA']].value
1567 self.namespace['CA32'] = self.spr['XER'][XER_bits['CA32']].value
1568 self.namespace['OV'] = self.spr['XER'][XER_bits['OV']].value
1569 self.namespace['OV32'] = self.spr['XER'][XER_bits['OV32']].value
1570 self.namespace['XLEN'] = xlen
1571
1572 # add some SVSTATE convenience variables
1573 vl = self.svstate.vl
1574 srcstep = self.svstate.srcstep
1575 self.namespace['VL'] = vl
1576 self.namespace['srcstep'] = srcstep
1577
1578 # take a copy of the CR field value: if non-VLi fail-first fails
1579 # this is because the pseudocode writes *directly* to CR. sigh
1580 self.cr_backup = self.cr.value
1581
1582 # sv.bc* need some extra fields
1583 if not self.is_svp64_mode or not insn_name.startswith("sv.bc"):
1584 return
1585
1586 # blegh grab bits manually
1587 mode = yield self.dec2.rm_dec.rm_in.mode
1588 # convert to SelectableInt before test
1589 mode = SelectableInt(mode, 5)
1590 bc_vlset = mode[SVP64MODEb.BC_VLSET] != 0
1591 bc_vli = mode[SVP64MODEb.BC_VLI] != 0
1592 bc_snz = mode[SVP64MODEb.BC_SNZ] != 0
1593 bc_vsb = yield self.dec2.rm_dec.bc_vsb
1594 bc_ctrtest = yield self.dec2.rm_dec.bc_ctrtest
1595 bc_lru = yield self.dec2.rm_dec.bc_lru
1596 bc_gate = yield self.dec2.rm_dec.bc_gate
1597 sz = yield self.dec2.rm_dec.pred_sz
1598 self.namespace['mode'] = SelectableInt(mode, 5)
1599 self.namespace['ALL'] = SelectableInt(bc_gate, 1)
1600 self.namespace['VSb'] = SelectableInt(bc_vsb, 1)
1601 self.namespace['LRu'] = SelectableInt(bc_lru, 1)
1602 self.namespace['CTRtest'] = SelectableInt(bc_ctrtest, 1)
1603 self.namespace['VLSET'] = SelectableInt(bc_vlset, 1)
1604 self.namespace['VLI'] = SelectableInt(bc_vli, 1)
1605 self.namespace['sz'] = SelectableInt(sz, 1)
1606 self.namespace['SNZ'] = SelectableInt(bc_snz, 1)
1607
1608 def get_kludged_op_add_ca_ov(self, inputs, inp_ca_ov):
1609 """ this was not at all necessary to do. this function massively
1610 duplicates - in a laborious and complex fashion - the contents of
1611 the CSV files that were extracted two years ago from microwatt's
1612 source code. A-inversion is the "inv A" column, output inversion
1613 is the "inv out" column, carry-in equal to 0 or 1 or CA is the
1614 "cry in" column
1615
1616 all of that information is available in
1617 self.instrs[ins_name].op_fields
1618 where info is usually assigned to self.instrs[ins_name]
1619
1620 https://git.libre-soc.org/?p=openpower-isa.git;a=blob;f=openpower/isatables/minor_31.csv;hb=HEAD
1621
1622 the immediate constants are *also* decoded correctly and placed
1623 usually by DecodeIn2Imm into operand2, as part of power_decoder2.py
1624 """
1625 def ca(a, b, ca_in, width):
1626 mask = (1 << width) - 1
1627 y = (a & mask) + (b & mask) + ca_in
1628 return y >> width
1629
1630 asmcode = yield self.dec2.dec.op.asmcode
1631 insn = insns.get(asmcode)
1632 SI = yield self.dec2.dec.SI
1633 SI &= 0xFFFF
1634 CA, OV = inp_ca_ov
1635 inputs = [i.value for i in inputs]
1636 if SI & 0x8000:
1637 SI -= 0x10000
1638 if insn in ("add", "addo", "addc", "addco"):
1639 a = inputs[0]
1640 b = inputs[1]
1641 ca_in = 0
1642 elif insn == "addic" or insn == "addic.":
1643 a = inputs[0]
1644 b = SI
1645 ca_in = 0
1646 elif insn in ("subf", "subfo", "subfc", "subfco"):
1647 a = ~inputs[0]
1648 b = inputs[1]
1649 ca_in = 1
1650 elif insn == "subfic":
1651 a = ~inputs[0]
1652 b = SI
1653 ca_in = 1
1654 elif insn == "adde" or insn == "addeo":
1655 a = inputs[0]
1656 b = inputs[1]
1657 ca_in = CA
1658 elif insn == "subfe" or insn == "subfeo":
1659 a = ~inputs[0]
1660 b = inputs[1]
1661 ca_in = CA
1662 elif insn == "addme" or insn == "addmeo":
1663 a = inputs[0]
1664 b = ~0
1665 ca_in = CA
1666 elif insn == "addze" or insn == "addzeo":
1667 a = inputs[0]
1668 b = 0
1669 ca_in = CA
1670 elif insn == "subfme" or insn == "subfmeo":
1671 a = ~inputs[0]
1672 b = ~0
1673 ca_in = CA
1674 elif insn == "subfze" or insn == "subfzeo":
1675 a = ~inputs[0]
1676 b = 0
1677 ca_in = CA
1678 elif insn == "addex":
1679 # CA[32] aren't actually written, just generate so we have
1680 # something to return
1681 ca64 = ov64 = ca(inputs[0], inputs[1], OV, 64)
1682 ca32 = ov32 = ca(inputs[0], inputs[1], OV, 32)
1683 return ca64, ca32, ov64, ov32
1684 elif insn == "neg" or insn == "nego":
1685 a = ~inputs[0]
1686 b = 0
1687 ca_in = 1
1688 else:
1689 raise NotImplementedError(
1690 "op_add kludge unimplemented instruction: ", asmcode, insn)
1691
1692 ca64 = ca(a, b, ca_in, 64)
1693 ca32 = ca(a, b, ca_in, 32)
1694 ov64 = ca64 != ca(a, b, ca_in, 63)
1695 ov32 = ca32 != ca(a, b, ca_in, 31)
1696 return ca64, ca32, ov64, ov32
1697
1698 def handle_carry_(self, inputs, output, ca, ca32, inp_ca_ov):
1699 if ca is not None and ca32 is not None:
1700 return
1701 op = yield self.dec2.e.do.insn_type
1702 if op == MicrOp.OP_ADD.value and ca is None and ca32 is None:
1703 retval = yield from self.get_kludged_op_add_ca_ov(
1704 inputs, inp_ca_ov)
1705 ca, ca32, ov, ov32 = retval
1706 asmcode = yield self.dec2.dec.op.asmcode
1707 if insns.get(asmcode) == 'addex':
1708 # TODO: if 32-bit mode, set ov to ov32
1709 self.spr['XER'][XER_bits['OV']] = ov
1710 self.spr['XER'][XER_bits['OV32']] = ov32
1711 log(f"write OV/OV32 OV={ov} OV32={ov32}",
1712 kind=LogType.InstrInOuts)
1713 else:
1714 # TODO: if 32-bit mode, set ca to ca32
1715 self.spr['XER'][XER_bits['CA']] = ca
1716 self.spr['XER'][XER_bits['CA32']] = ca32
1717 log(f"write CA/CA32 CA={ca} CA32={ca32}",
1718 kind=LogType.InstrInOuts)
1719 return
1720 inv_a = yield self.dec2.e.do.invert_in
1721 if inv_a:
1722 inputs[0] = ~inputs[0]
1723
1724 imm_ok = yield self.dec2.e.do.imm_data.ok
1725 if imm_ok:
1726 imm = yield self.dec2.e.do.imm_data.data
1727 inputs.append(SelectableInt(imm, 64))
1728 gts = []
1729 for x in inputs:
1730 log("gt input", x, output)
1731 gt = (gtu(x, output))
1732 gts.append(gt)
1733 log(gts)
1734 cy = 1 if any(gts) else 0
1735 log("CA", cy, gts)
1736 if ca is None: # already written
1737 self.spr['XER'][XER_bits['CA']] = cy
1738
1739 # 32 bit carry
1740 # ARGH... different for OP_ADD... *sigh*...
1741 op = yield self.dec2.e.do.insn_type
1742 if op == MicrOp.OP_ADD.value:
1743 res32 = (output.value & (1 << 32)) != 0
1744 a32 = (inputs[0].value & (1 << 32)) != 0
1745 if len(inputs) >= 2:
1746 b32 = (inputs[1].value & (1 << 32)) != 0
1747 else:
1748 b32 = False
1749 cy32 = res32 ^ a32 ^ b32
1750 log("CA32 ADD", cy32)
1751 else:
1752 gts = []
1753 for x in inputs:
1754 log("input", x, output)
1755 log(" x[32:64]", x, x[32:64])
1756 log(" o[32:64]", output, output[32:64])
1757 gt = (gtu(x[32:64], output[32:64])) == SelectableInt(1, 1)
1758 gts.append(gt)
1759 cy32 = 1 if any(gts) else 0
1760 log("CA32", cy32, gts)
1761 if ca32 is None: # already written
1762 self.spr['XER'][XER_bits['CA32']] = cy32
1763
1764 def handle_overflow(self, inputs, output, div_overflow, inp_ca_ov):
1765 op = yield self.dec2.e.do.insn_type
1766 if op == MicrOp.OP_ADD.value:
1767 retval = yield from self.get_kludged_op_add_ca_ov(
1768 inputs, inp_ca_ov)
1769 ca, ca32, ov, ov32 = retval
1770 # TODO: if 32-bit mode, set ov to ov32
1771 self.spr['XER'][XER_bits['OV']] = ov
1772 self.spr['XER'][XER_bits['OV32']] = ov32
1773 self.spr['XER'][XER_bits['SO']] |= ov
1774 return
1775 if hasattr(self.dec2.e.do, "invert_in"):
1776 inv_a = yield self.dec2.e.do.invert_in
1777 if inv_a:
1778 inputs[0] = ~inputs[0]
1779
1780 imm_ok = yield self.dec2.e.do.imm_data.ok
1781 if imm_ok:
1782 imm = yield self.dec2.e.do.imm_data.data
1783 inputs.append(SelectableInt(imm, 64))
1784 log("handle_overflow", inputs, output, div_overflow)
1785 if len(inputs) < 2 and div_overflow is None:
1786 return
1787
1788 # div overflow is different: it's returned by the pseudo-code
1789 # because it's more complex than can be done by analysing the output
1790 if div_overflow is not None:
1791 ov, ov32 = div_overflow, div_overflow
1792 # arithmetic overflow can be done by analysing the input and output
1793 elif len(inputs) >= 2:
1794 # OV (64-bit)
1795 input_sgn = [exts(x.value, x.bits) < 0 for x in inputs]
1796 output_sgn = exts(output.value, output.bits) < 0
1797 ov = 1 if input_sgn[0] == input_sgn[1] and \
1798 output_sgn != input_sgn[0] else 0
1799
1800 # OV (32-bit)
1801 input32_sgn = [exts(x.value, 32) < 0 for x in inputs]
1802 output32_sgn = exts(output.value, 32) < 0
1803 ov32 = 1 if input32_sgn[0] == input32_sgn[1] and \
1804 output32_sgn != input32_sgn[0] else 0
1805
1806 # now update XER OV/OV32/SO
1807 so = self.spr['XER'][XER_bits['SO']]
1808 new_so = so | ov # sticky overflow ORs in old with new
1809 self.spr['XER'][XER_bits['OV']] = ov
1810 self.spr['XER'][XER_bits['OV32']] = ov32
1811 self.spr['XER'][XER_bits['SO']] = new_so
1812 log(" set overflow", ov, ov32, so, new_so)
1813
1814 def handle_comparison(self, out, cr_idx=0, overflow=None, no_so=False):
1815 assert isinstance(out, SelectableInt), \
1816 "out zero not a SelectableInt %s" % repr(outputs)
1817 log("handle_comparison", out.bits, hex(out.value))
1818 # TODO - XXX *processor* in 32-bit mode
1819 # https://bugs.libre-soc.org/show_bug.cgi?id=424
1820 # if is_32bit:
1821 # o32 = exts(out.value, 32)
1822 # print ("handle_comparison exts 32 bit", hex(o32))
1823 out = exts(out.value, out.bits)
1824 log("handle_comparison exts", hex(out))
1825 # create the three main CR flags, EQ GT LT
1826 zero = SelectableInt(out == 0, 1)
1827 positive = SelectableInt(out > 0, 1)
1828 negative = SelectableInt(out < 0, 1)
1829 # get (or not) XER.SO. for setvl this is important *not* to read SO
1830 if no_so:
1831 SO = SelectableInt(1, 0)
1832 else:
1833 SO = self.spr['XER'][XER_bits['SO']]
1834 log("handle_comparison SO", SO.value,
1835 "overflow", overflow,
1836 "zero", zero.value,
1837 "+ve", positive.value,
1838 "-ve", negative.value)
1839 # alternative overflow checking (setvl mainly at the moment)
1840 if overflow is not None and overflow == 1:
1841 SO = SelectableInt(1, 1)
1842 # create the four CR field values and set the required CR field
1843 cr_field = selectconcat(negative, positive, zero, SO)
1844 log("handle_comparison cr_field", self.cr, cr_idx, cr_field)
1845 self.crl[cr_idx].eq(cr_field)
1846 return cr_field
1847
1848 def set_pc(self, pc_val):
1849 self.namespace['NIA'] = SelectableInt(pc_val, 64)
1850 self.pc.update(self.namespace, self.is_svp64_mode)
1851
1852 def get_next_insn(self):
1853 """check instruction
1854 """
1855 if self.respect_pc:
1856 pc = self.pc.CIA.value
1857 else:
1858 pc = self.fake_pc
1859 ins = self.imem.ld(pc, 4, False, True, instr_fetch=True)
1860 if ins is None:
1861 raise KeyError("no instruction at 0x%x" % pc)
1862 return pc, ins
1863
1864 def setup_one(self):
1865 """set up one instruction
1866 """
1867 pc, insn = self.get_next_insn()
1868 yield from self.setup_next_insn(pc, insn)
1869
1870 # cache since it's really slow to construct
1871 __PREFIX_CACHE = SVP64Instruction.Prefix(SelectableInt(value=0, bits=32))
1872
1873 def __decode_prefix(self, opcode):
1874 pfx = self.__PREFIX_CACHE
1875 pfx.storage.eq(opcode)
1876 return pfx
1877
1878 def setup_next_insn(self, pc, ins):
1879 """set up next instruction
1880 """
1881 self._pc = pc
1882 log("setup: 0x%x 0x%x %s" % (pc, ins & 0xffffffff, bin(ins)))
1883 log("CIA NIA", self.respect_pc, self.pc.CIA.value, self.pc.NIA.value)
1884
1885 yield self.dec2.sv_rm.eq(0)
1886 yield self.dec2.dec.raw_opcode_in.eq(ins & 0xffffffff)
1887 yield self.dec2.dec.bigendian.eq(self.bigendian)
1888 yield self.dec2.state.msr.eq(self.msr.value)
1889 yield self.dec2.state.pc.eq(pc)
1890 if self.svstate is not None:
1891 yield self.dec2.state.svstate.eq(self.svstate.value)
1892
1893 # SVP64. first, check if the opcode is EXT001, and SVP64 id bits set
1894 yield Settle()
1895 opcode = yield self.dec2.dec.opcode_in
1896 opcode = SelectableInt(value=opcode, bits=32)
1897 pfx = self.__decode_prefix(opcode)
1898 log("prefix test: opcode:", pfx.PO, bin(pfx.PO), pfx.id)
1899 self.is_svp64_mode = bool((pfx.PO == 0b000001) and (pfx.id == 0b11))
1900 self.pc.update_nia(self.is_svp64_mode)
1901 # set SVP64 decode
1902 yield self.dec2.is_svp64_mode.eq(self.is_svp64_mode)
1903 self.namespace['NIA'] = self.pc.NIA
1904 self.namespace['SVSTATE'] = self.svstate
1905 if not self.is_svp64_mode:
1906 return
1907
1908 # in SVP64 mode. decode/print out svp64 prefix, get v3.0B instruction
1909 log("svp64.rm", bin(pfx.rm))
1910 log(" svstate.vl", self.svstate.vl)
1911 log(" svstate.mvl", self.svstate.maxvl)
1912 ins = self.imem.ld(pc+4, 4, False, True, instr_fetch=True)
1913 log(" svsetup: 0x%x 0x%x %s" % (pc+4, ins & 0xffffffff, bin(ins)))
1914 yield self.dec2.dec.raw_opcode_in.eq(ins & 0xffffffff) # v3.0B suffix
1915 yield self.dec2.sv_rm.eq(int(pfx.rm)) # svp64 prefix
1916 yield Settle()
1917
1918 def execute_one(self):
1919 """execute one instruction
1920 """
1921 # get the disassembly code for this instruction
1922 if not self.disassembly:
1923 code = yield from self.get_assembly_name()
1924 else:
1925 offs, dbg = 0, ""
1926 if self.is_svp64_mode:
1927 offs, dbg = 4, "svp64 "
1928 code = self.disassembly[self._pc+offs]
1929 log(" %s sim-execute" % dbg, hex(self._pc), code)
1930 opname = code.split(' ')[0]
1931 try:
1932 yield from self.call(opname) # execute the instruction
1933 except MemException as e: # check for memory errors
1934 if e.args[0] == 'unaligned': # alignment error
1935 # run a Trap but set DAR first
1936 print("memory unaligned exception, DAR", e.dar, repr(e))
1937 self.spr['DAR'] = SelectableInt(e.dar, 64)
1938 self.call_trap(0x600, PIb.PRIV) # 0x600, privileged
1939 return
1940 elif e.args[0] == 'invalid': # invalid
1941 # run a Trap but set DAR first
1942 log("RADIX MMU memory invalid error, mode %s" % e.mode)
1943 if e.mode == 'EXECUTE':
1944 # XXX TODO: must set a few bits in SRR1,
1945 # see microwatt loadstore1.vhdl
1946 # if m_in.segerr = '0' then
1947 # v.srr1(47 - 33) := m_in.invalid;
1948 # v.srr1(47 - 35) := m_in.perm_error; -- noexec fault
1949 # v.srr1(47 - 44) := m_in.badtree;
1950 # v.srr1(47 - 45) := m_in.rc_error;
1951 # v.intr_vec := 16#400#;
1952 # else
1953 # v.intr_vec := 16#480#;
1954 self.call_trap(0x400, PIb.PRIV) # 0x400, privileged
1955 else:
1956 self.call_trap(0x300, PIb.PRIV) # 0x300, privileged
1957 return
1958 # not supported yet:
1959 raise e # ... re-raise
1960
1961 # append to the trace log file
1962 self.trace(" # %s\n" % code)
1963
1964 log("gprs after code", code)
1965 self.gpr.dump()
1966 crs = []
1967 for i in range(len(self.crl)):
1968 crs.append(bin(self.crl[i].asint()))
1969 log("crs", " ".join(crs))
1970 log("vl,maxvl", self.svstate.vl, self.svstate.maxvl)
1971
1972 # don't use this except in special circumstances
1973 if not self.respect_pc:
1974 self.fake_pc += 4
1975
1976 log("execute one, CIA NIA", hex(self.pc.CIA.value),
1977 hex(self.pc.NIA.value))
1978
1979 def get_assembly_name(self):
1980 # TODO, asmregs is from the spec, e.g. add RT,RA,RB
1981 # see http://bugs.libre-riscv.org/show_bug.cgi?id=282
1982 dec_insn = yield self.dec2.e.do.insn
1983 insn_1_11 = yield self.dec2.e.do.insn[1:11]
1984 asmcode = yield self.dec2.dec.op.asmcode
1985 int_op = yield self.dec2.dec.op.internal_op
1986 log("get assembly name asmcode", asmcode, int_op,
1987 hex(dec_insn), bin(insn_1_11))
1988 asmop = insns.get(asmcode, None)
1989
1990 # sigh reconstruct the assembly instruction name
1991 if hasattr(self.dec2.e.do, "oe"):
1992 ov_en = yield self.dec2.e.do.oe.oe
1993 ov_ok = yield self.dec2.e.do.oe.ok
1994 else:
1995 ov_en = False
1996 ov_ok = False
1997 if hasattr(self.dec2.e.do, "rc"):
1998 rc_en = yield self.dec2.e.do.rc.rc
1999 rc_ok = yield self.dec2.e.do.rc.ok
2000 else:
2001 rc_en = False
2002 rc_ok = False
2003 # annoying: ignore rc_ok if RC1 is set (for creating *assembly name*)
2004 RC1 = yield self.dec2.rm_dec.RC1
2005 if RC1:
2006 rc_en = False
2007 rc_ok = False
2008 # grrrr have to special-case MUL op (see DecodeOE)
2009 log("ov %d en %d rc %d en %d op %d" %
2010 (ov_ok, ov_en, rc_ok, rc_en, int_op))
2011 if int_op in [MicrOp.OP_MUL_H64.value, MicrOp.OP_MUL_H32.value]:
2012 log("mul op")
2013 if rc_en & rc_ok:
2014 asmop += "."
2015 else:
2016 if not asmop.endswith("."): # don't add "." to "andis."
2017 if rc_en & rc_ok:
2018 asmop += "."
2019 if hasattr(self.dec2.e.do, "lk"):
2020 lk = yield self.dec2.e.do.lk
2021 if lk:
2022 asmop += "l"
2023 log("int_op", int_op)
2024 if int_op in [MicrOp.OP_B.value, MicrOp.OP_BC.value]:
2025 AA = yield self.dec2.dec.fields.FormI.AA[0:-1]
2026 log("AA", AA)
2027 if AA:
2028 asmop += "a"
2029 spr_msb = yield from self.get_spr_msb()
2030 if int_op == MicrOp.OP_MFCR.value:
2031 if spr_msb:
2032 asmop = 'mfocrf'
2033 else:
2034 asmop = 'mfcr'
2035 # XXX TODO: for whatever weird reason this doesn't work
2036 # https://bugs.libre-soc.org/show_bug.cgi?id=390
2037 if int_op == MicrOp.OP_MTCRF.value:
2038 if spr_msb:
2039 asmop = 'mtocrf'
2040 else:
2041 asmop = 'mtcrf'
2042 return asmop
2043
2044 def reset_remaps(self):
2045 self.remap_loopends = [0] * 4
2046 self.remap_idxs = [0, 1, 2, 3]
2047
2048 def get_remap_indices(self):
2049 """WARNING, this function stores remap_idxs and remap_loopends
2050 in the class for later use. this to avoid problems with yield
2051 """
2052 # go through all iterators in lock-step, advance to next remap_idx
2053 srcstep, dststep, ssubstep, dsubstep = self.get_src_dststeps()
2054 # get four SVSHAPEs. here we are hard-coding
2055 self.reset_remaps()
2056 SVSHAPE0 = self.spr['SVSHAPE0']
2057 SVSHAPE1 = self.spr['SVSHAPE1']
2058 SVSHAPE2 = self.spr['SVSHAPE2']
2059 SVSHAPE3 = self.spr['SVSHAPE3']
2060 # set up the iterators
2061 remaps = [(SVSHAPE0, SVSHAPE0.get_iterator()),
2062 (SVSHAPE1, SVSHAPE1.get_iterator()),
2063 (SVSHAPE2, SVSHAPE2.get_iterator()),
2064 (SVSHAPE3, SVSHAPE3.get_iterator()),
2065 ]
2066
2067 dbg = []
2068 for i, (shape, remap) in enumerate(remaps):
2069 # zero is "disabled"
2070 if shape.value == 0x0:
2071 self.remap_idxs[i] = 0
2072 # pick src or dststep depending on reg num (0-2=in, 3-4=out)
2073 step = dststep if (i in [3, 4]) else srcstep
2074 # this is terrible. O(N^2) looking for the match. but hey.
2075 for idx, (remap_idx, loopends) in enumerate(remap):
2076 if idx == step:
2077 break
2078 self.remap_idxs[i] = remap_idx
2079 self.remap_loopends[i] = loopends
2080 dbg.append((i, step, remap_idx, loopends))
2081 for (i, step, remap_idx, loopends) in dbg:
2082 log("SVSHAPE %d idx, end" % i, step, remap_idx, bin(loopends))
2083 return remaps
2084
2085 def get_spr_msb(self):
2086 dec_insn = yield self.dec2.e.do.insn
2087 return dec_insn & (1 << 20) != 0 # sigh - XFF.spr[-1]?
2088
2089 def call(self, name, syscall_emu_active=False):
2090 """call(opcode) - the primary execution point for instructions
2091 """
2092 self.last_st_addr = None # reset the last known store address
2093 self.last_ld_addr = None # etc.
2094
2095 ins_name = name.strip() # remove spaces if not already done so
2096 if self.halted:
2097 log("halted - not executing", ins_name)
2098 return
2099
2100 # TODO, asmregs is from the spec, e.g. add RT,RA,RB
2101 # see http://bugs.libre-riscv.org/show_bug.cgi?id=282
2102 asmop = yield from self.get_assembly_name()
2103 log("call", ins_name, asmop,
2104 kind=LogType.InstrInOuts)
2105
2106 # sv.setvl is *not* a loop-function. sigh
2107 log("is_svp64_mode", self.is_svp64_mode, asmop)
2108
2109 # check privileged
2110 int_op = yield self.dec2.dec.op.internal_op
2111 spr_msb = yield from self.get_spr_msb()
2112
2113 instr_is_privileged = False
2114 if int_op in [MicrOp.OP_ATTN.value,
2115 MicrOp.OP_MFMSR.value,
2116 MicrOp.OP_MTMSR.value,
2117 MicrOp.OP_MTMSRD.value,
2118 # TODO: OP_TLBIE
2119 MicrOp.OP_RFID.value]:
2120 instr_is_privileged = True
2121 if int_op in [MicrOp.OP_MFSPR.value,
2122 MicrOp.OP_MTSPR.value] and spr_msb:
2123 instr_is_privileged = True
2124
2125 log("is priv", instr_is_privileged, hex(self.msr.value),
2126 self.msr[MSRb.PR])
2127 # check MSR priv bit and whether op is privileged: if so, throw trap
2128 if instr_is_privileged and self.msr[MSRb.PR] == 1:
2129 self.call_trap(0x700, PIb.PRIV)
2130 return
2131
2132 # check halted condition
2133 if ins_name == 'attn':
2134 self.halted = True
2135 return
2136
2137 # User mode system call emulation consists of several steps:
2138 # 1. Detect whether instruction is sc or scv.
2139 # 2. Call the HDL implementation which invokes trap.
2140 # 3. Reroute the guest system call to host system call.
2141 # 4. Force return from the interrupt as if we had guest OS.
2142 if ((asmop in ("sc", "scv")) and
2143 (self.syscall is not None) and
2144 not syscall_emu_active):
2145 # Memoize PC and trigger an interrupt
2146 if self.respect_pc:
2147 pc = self.pc.CIA.value
2148 else:
2149 pc = self.fake_pc
2150 yield from self.call(asmop, syscall_emu_active=True)
2151
2152 # Reroute the syscall to host OS
2153 identifier = self.gpr(0)
2154 arguments = map(self.gpr, range(3, 9))
2155 result = self.syscall(identifier, *arguments)
2156 self.gpr.write(3, result, False, self.namespace["XLEN"])
2157
2158 # Return from interrupt
2159 yield from self.call("rfid", syscall_emu_active=True)
2160 return
2161 elif ((name in ("rfid", "hrfid")) and syscall_emu_active):
2162 asmop = "rfid"
2163
2164 # check illegal instruction
2165 illegal = False
2166 if ins_name not in ['mtcrf', 'mtocrf']:
2167 illegal = ins_name != asmop
2168
2169 # list of instructions not being supported by binutils (.long)
2170 dotstrp = asmop[:-1] if asmop[-1] == '.' else asmop
2171 if dotstrp in [*FPTRANS_INSNS,
2172 *LDST_UPDATE_INSNS,
2173 'ffmadds', 'fdmadds', 'ffadds',
2174 'minmax',
2175 "brh", "brw", "brd",
2176 'setvl', 'svindex', 'svremap', 'svstep',
2177 'svshape', 'svshape2',
2178 'ternlogi', 'bmask', 'cprop', 'gbbd',
2179 'absdu', 'absds', 'absdacs', 'absdacu', 'avgadd',
2180 'fmvis', 'fishmv', 'pcdec', "maddedu", "divmod2du",
2181 "dsld", "dsrd", "maddedus",
2182 "sadd", "saddw", "sadduw",
2183 "cffpr", "cffpro",
2184 "mffpr", "mffprs",
2185 "ctfpr", "ctfprs",
2186 "mtfpr", "mtfprs",
2187 "maddsubrs", "maddrs", "msubrs",
2188 "cfuged", "cntlzdm", "cnttzdm", "pdepd", "pextd",
2189 "setbc", "setbcr", "setnbc", "setnbcr",
2190 ]:
2191 illegal = False
2192 ins_name = dotstrp
2193
2194 # match against instructions treated as nop, see nop below
2195 if asmop.startswith("dcbt"):
2196 illegal = False
2197 ins_name = "nop"
2198
2199 # branch-conditional redirects to sv.bc
2200 if asmop.startswith('bc') and self.is_svp64_mode:
2201 ins_name = 'sv.%s' % ins_name
2202
2203 # ld-immediate-with-pi mode redirects to ld-with-postinc
2204 ldst_imm_postinc = False
2205 if 'u' in ins_name and self.is_svp64_mode:
2206 ldst_pi = yield self.dec2.rm_dec.ldst_postinc
2207 if ldst_pi:
2208 ins_name = ins_name.replace("u", "up")
2209 ldst_imm_postinc = True
2210 log(" enable ld/st postinc", ins_name)
2211
2212 log(" post-processed name", dotstrp, ins_name, asmop)
2213
2214 # illegal instructions call TRAP at 0x700
2215 if illegal:
2216 print("illegal", ins_name, asmop)
2217 self.call_trap(0x700, PIb.ILLEG)
2218 print("name %s != %s - calling ILLEGAL trap, PC: %x" %
2219 (ins_name, asmop, self.pc.CIA.value))
2220 return
2221
2222 # this is for setvl "Vertical" mode: if set true,
2223 # srcstep/dststep is explicitly advanced. mode says which SVSTATE to
2224 # test for Rc=1 end condition. 3 bits of all 3 loops are put into CR0
2225 self.allow_next_step_inc = False
2226 self.svstate_next_mode = 0
2227
2228 # nop has to be supported, we could let the actual op calculate
2229 # but PowerDecoder has a pattern for nop
2230 if ins_name == 'nop':
2231 self.update_pc_next()
2232 return
2233
2234 # get elwidths, defaults to 64
2235 xlen = 64
2236 ew_src = 64
2237 ew_dst = 64
2238 if self.is_svp64_mode:
2239 ew_src = yield self.dec2.rm_dec.ew_src
2240 ew_dst = yield self.dec2.rm_dec.ew_dst
2241 ew_src = 8 << (3-int(ew_src)) # convert to bitlength
2242 ew_dst = 8 << (3-int(ew_dst)) # convert to bitlength
2243 xlen = max(ew_src, ew_dst)
2244 log("elwidth", ew_src, ew_dst)
2245 log("XLEN:", self.is_svp64_mode, xlen)
2246
2247 # look up instruction in ISA.instrs, prepare namespace
2248 if ins_name == 'pcdec': # grrrr yes there are others ("stbcx." etc.)
2249 info = self.instrs[ins_name+"."]
2250 elif asmop[-1] == '.' and asmop in self.instrs:
2251 info = self.instrs[asmop]
2252 else:
2253 info = self.instrs[ins_name]
2254 yield from self.prep_namespace(ins_name, info.form, info.op_fields,
2255 xlen)
2256
2257 # dict retains order
2258 inputs = dict.fromkeys(create_full_args(
2259 read_regs=info.read_regs, special_regs=info.special_regs,
2260 uninit_regs=info.uninit_regs, write_regs=info.write_regs))
2261
2262 # preserve order of register names
2263 write_without_special_regs = OrderedSet(info.write_regs)
2264 write_without_special_regs -= OrderedSet(info.special_regs)
2265 input_names = create_args([
2266 *info.read_regs, *info.uninit_regs, *write_without_special_regs])
2267 log("input names", input_names)
2268
2269 # get SVP64 entry for the current instruction
2270 sv_rm = self.svp64rm.instrs.get(ins_name)
2271 if sv_rm is not None:
2272 dest_cr, src_cr, src_byname, dest_byname = decode_extra(sv_rm)
2273 else:
2274 dest_cr, src_cr, src_byname, dest_byname = False, False, {}, {}
2275 log("sv rm", sv_rm, dest_cr, src_cr, src_byname, dest_byname)
2276
2277 # see if srcstep/dststep need skipping over masked-out predicate bits
2278 # svstep also needs advancement because it calls SVSTATE_NEXT.
2279 # bit the remaps get computed just after pre_inc moves them on
2280 # with remap_set_steps substituting for PowerDecider2 not doing it,
2281 # and SVSTATE_NEXT not being able to.use yield, the preinc on
2282 # svstep is necessary for now.
2283 self.reset_remaps()
2284 if (self.is_svp64_mode or ins_name in ['svstep']):
2285 yield from self.svstate_pre_inc()
2286 if self.is_svp64_mode:
2287 pre = yield from self.update_new_svstate_steps()
2288 if pre:
2289 self.svp64_reset_loop()
2290 self.update_nia()
2291 self.update_pc_next()
2292 return
2293 srcstep, dststep, ssubstep, dsubstep = self.get_src_dststeps()
2294 pred_dst_zero = self.pred_dst_zero
2295 pred_src_zero = self.pred_src_zero
2296 vl = self.svstate.vl
2297 subvl = yield self.dec2.rm_dec.rm_in.subvl
2298
2299 # VL=0 in SVP64 mode means "do nothing: skip instruction"
2300 if self.is_svp64_mode and vl == 0:
2301 self.pc.update(self.namespace, self.is_svp64_mode)
2302 log("SVP64: VL=0, end of call", self.namespace['CIA'],
2303 self.namespace['NIA'], kind=LogType.InstrInOuts)
2304 return
2305
2306 # for when SVREMAP is active, using pre-arranged schedule.
2307 # note: modifying PowerDecoder2 needs to "settle"
2308 remap_en = self.svstate.SVme
2309 persist = self.svstate.RMpst
2310 active = (persist or self.last_op_svshape) and remap_en != 0
2311 if self.is_svp64_mode:
2312 yield self.dec2.remap_active.eq(remap_en if active else 0)
2313 yield Settle()
2314 if persist or self.last_op_svshape:
2315 remaps = self.get_remap_indices()
2316 if self.is_svp64_mode and (persist or self.last_op_svshape):
2317 yield from self.remap_set_steps(remaps)
2318 # after that, settle down (combinatorial) to let Vector reg numbers
2319 # work themselves out
2320 yield Settle()
2321 if self.is_svp64_mode:
2322 remap_active = yield self.dec2.remap_active
2323 else:
2324 remap_active = False
2325 log("remap active", bin(remap_active), self.is_svp64_mode)
2326
2327 # LDST does *not* allow elwidth overrides on RA (Effective Address).
2328 # this has to be detected. XXX TODO: RB for ldst-idx *may* need
2329 # conversion (to 64-bit) also.
2330 # see write reg this *HAS* to also override XLEN to 64 on LDST/Update
2331 sv_mode = yield self.dec2.rm_dec.sv_mode
2332 is_ldst = (sv_mode in [SVMode.LDST_IDX.value, SVMode.LDST_IMM.value] \
2333 and self.is_svp64_mode)
2334 log("is_ldst", sv_mode, is_ldst)
2335
2336 # main input registers (RT, RA ...)
2337 for name in input_names:
2338 if name == "overflow":
2339 inputs[name] = SelectableInt(0, 1)
2340 elif name == "FPSCR":
2341 inputs[name] = self.FPSCR
2342 elif name in ("CA", "CA32", "OV", "OV32"):
2343 inputs[name] = self.spr['XER'][XER_bits[name]]
2344 elif name in "CR0":
2345 inputs[name] = self.crl[0]
2346 elif name in spr_byname:
2347 inputs[name] = self.spr[name]
2348 elif is_ldst and name == 'RA':
2349 regval = (yield from self.get_input(name, ew_src, 64))
2350 log("EA (RA) regval name", name, regval)
2351 inputs[name] = regval
2352 else:
2353 regval = (yield from self.get_input(name, ew_src, xlen))
2354 log("regval name", name, regval)
2355 inputs[name] = regval
2356
2357 # arrrrgh, awful hack, to get _RT into namespace
2358 if ins_name in ['setvl', 'svstep']:
2359 regname = "_RT"
2360 RT = yield self.dec2.dec.RT
2361 self.namespace[regname] = SelectableInt(RT, 5)
2362 if RT == 0:
2363 self.namespace["RT"] = SelectableInt(0, 5)
2364 regnum, is_vec = yield from get_idx_out(self.dec2, "RT")
2365 log('hack input reg %s %s' % (name, str(regnum)), is_vec)
2366
2367 # in SVP64 mode for LD/ST work out immediate
2368 # XXX TODO: replace_ds for DS-Form rather than D-Form.
2369 # use info.form to detect
2370 if self.is_svp64_mode and not ldst_imm_postinc:
2371 yield from self.check_replace_d(info, remap_active)
2372
2373 # "special" registers
2374 for special in info.special_regs:
2375 if special in special_sprs:
2376 inputs[special] = self.spr[special]
2377 else:
2378 inputs[special] = self.namespace[special]
2379
2380 # clear trap (trap) NIA
2381 self.trap_nia = None
2382
2383 # check if this was an sv.bc* and create an indicator that
2384 # this is the last check to be made as a loop. combined with
2385 # the ALL/ANY mode we can early-exit. note that BI (to test)
2386 # is an input so there is no termination if BI is scalar
2387 # (because early-termination is for *output* scalars)
2388 if self.is_svp64_mode and ins_name.startswith("sv.bc"):
2389 end_loop = srcstep == vl-1 or dststep == vl-1
2390 self.namespace['end_loop'] = SelectableInt(end_loop, 1)
2391
2392 inp_ca_ov = (self.spr['XER'][XER_bits['CA']].value,
2393 self.spr['XER'][XER_bits['OV']].value)
2394
2395 for k, v in inputs.items():
2396 if v is None:
2397 v = SelectableInt(0, self.XLEN)
2398 # prevent pseudo-code from modifying input registers
2399 v = copy_assign_rhs(v)
2400 if isinstance(v, SelectableInt):
2401 v.ok = False
2402 inputs[k] = v
2403
2404 # execute actual instruction here (finally)
2405 log("inputs", inputs)
2406 inputs = list(inputs.values())
2407 results = info.func(self, *inputs)
2408 output_names = create_args(info.write_regs)
2409 outs = {}
2410 # record .ok before anything after the pseudo-code can modify it
2411 outs_ok = {}
2412 for out, n in zip(results or [], output_names):
2413 outs[n] = out
2414 outs_ok[n] = True
2415 if isinstance(out, SelectableInt):
2416 outs_ok[n] = out.ok
2417 log("results", outs)
2418 log("results ok", outs_ok)
2419
2420 # "inject" decorator takes namespace from function locals: we need to
2421 # overwrite NIA being overwritten (sigh)
2422 if self.trap_nia is not None:
2423 self.namespace['NIA'] = self.trap_nia
2424
2425 log("after func", self.namespace['CIA'], self.namespace['NIA'])
2426
2427 # check if op was a LD/ST so that debugging can check the
2428 # address
2429 if int_op in [MicrOp.OP_STORE.value,
2430 ]:
2431 self.last_st_addr = self.mem.last_st_addr
2432 if int_op in [MicrOp.OP_LOAD.value,
2433 ]:
2434 self.last_ld_addr = self.mem.last_ld_addr
2435 log("op", int_op, MicrOp.OP_STORE.value, MicrOp.OP_LOAD.value,
2436 self.last_st_addr, self.last_ld_addr)
2437
2438 # detect if CA/CA32 already in outputs (sra*, basically)
2439 ca = outs.get("CA")
2440 ca32 = outs.get("CA32")
2441
2442 log("carry already done?", ca, ca32, output_names)
2443 # soc test_pipe_caller tests don't have output_carry
2444 has_output_carry = hasattr(self.dec2.e.do, "output_carry")
2445 carry_en = has_output_carry and (yield self.dec2.e.do.output_carry)
2446 if carry_en:
2447 yield from self.handle_carry_(
2448 inputs, results[0], ca, ca32, inp_ca_ov=inp_ca_ov)
2449
2450 # get output named "overflow" and "CR0"
2451 overflow = outs.get('overflow')
2452 cr0 = outs.get('CR0')
2453 cr1 = outs.get('CR1')
2454
2455 # soc test_pipe_caller tests don't have oe
2456 has_oe = hasattr(self.dec2.e.do, "oe")
2457 # yeah just no. not in parallel processing
2458 if has_oe and not self.is_svp64_mode:
2459 # detect if overflow was in return result
2460 ov_en = yield self.dec2.e.do.oe.oe
2461 ov_ok = yield self.dec2.e.do.oe.ok
2462 log("internal overflow", ins_name, overflow, "en?", ov_en, ov_ok)
2463 if ov_en & ov_ok:
2464 yield from self.handle_overflow(
2465 inputs, results[0], overflow, inp_ca_ov=inp_ca_ov)
2466
2467 # only do SVP64 dest predicated Rc=1 if dest-pred is not enabled
2468 rc_en = False
2469 if not self.is_svp64_mode or not pred_dst_zero:
2470 if hasattr(self.dec2.e.do, "rc"):
2471 rc_en = yield self.dec2.e.do.rc.rc
2472 # don't do Rc=1 for svstep it is handled explicitly.
2473 # XXX TODO: now that CR0 is supported, sort out svstep's pseudocode
2474 # to write directly to CR0 instead of in ISACaller. hooyahh.
2475 if rc_en and ins_name not in ['svstep']:
2476 if outs_ok.get('FPSCR', False):
2477 FPSCR = outs['FPSCR']
2478 else:
2479 FPSCR = self.FPSCR
2480 yield from self.do_rc_ov(
2481 ins_name, results[0], overflow, cr0, cr1, FPSCR)
2482
2483 # check failfirst
2484 ffirst_hit = False, False
2485 if self.is_svp64_mode:
2486 sv_mode = yield self.dec2.rm_dec.sv_mode
2487 is_cr = sv_mode == SVMode.CROP.value
2488 chk = rc_en or is_cr
2489 if outs_ok.get('CR', False):
2490 # early write so check_ffirst can see value
2491 self.namespace['CR'].eq(outs['CR'])
2492 ffirst_hit = (yield from self.check_ffirst(info, chk, srcstep))
2493
2494 # any modified return results?
2495 yield from self.do_outregs(
2496 info, outs, carry_en, ffirst_hit, ew_dst, outs_ok)
2497
2498 # check if a FP Exception occurred. TODO for DD-FFirst, check VLi
2499 # and raise the exception *after* if VLi=1 but if VLi=0 then
2500 # truncate and make the exception "disappear".
2501 if self.FPSCR.FEX and (self.msr[MSRb.FE0] or self.msr[MSRb.FE1]):
2502 self.call_trap(0x700, PIb.FP)
2503 return
2504
2505 yield from self.do_nia(asmop, ins_name, rc_en, ffirst_hit)
2506
2507 def check_ffirst(self, info, rc_en, srcstep):
2508 """fail-first mode: checks a bit of Rc Vector, truncates VL
2509 """
2510 rm_mode = yield self.dec2.rm_dec.mode
2511 ff_inv = yield self.dec2.rm_dec.inv
2512 cr_bit = yield self.dec2.rm_dec.cr_sel
2513 RC1 = yield self.dec2.rm_dec.RC1
2514 vli_ = yield self.dec2.rm_dec.vli # VL inclusive if truncated
2515 log(" ff rm_mode", rc_en, rm_mode, SVP64RMMode.FFIRST.value)
2516 log(" inv", ff_inv)
2517 log(" RC1", RC1)
2518 log(" vli", vli_)
2519 log(" cr_bit", cr_bit)
2520 log(" rc_en", rc_en)
2521 ffirst = yield from is_ffirst_mode(self.dec2)
2522 if not rc_en or not ffirst:
2523 return False, False
2524 # get the CR vevtor, do BO-test
2525 crf = "CR0"
2526 log("asmregs", info.asmregs[0], info.write_regs)
2527 if 'CR' in info.write_regs and 'BF' in info.asmregs[0]:
2528 crf = 'BF'
2529 regnum, is_vec = yield from get_cr_out(self.dec2, crf)
2530 crtest = self.crl[regnum]
2531 ffirst_hit = crtest[cr_bit] != ff_inv
2532 log("cr test", crf, regnum, int(crtest), crtest, cr_bit, ff_inv)
2533 log("cr test?", ffirst_hit)
2534 if not ffirst_hit:
2535 return False, False
2536 # Fail-first activated, truncate VL
2537 vli = SelectableInt(int(vli_), 7)
2538 self.svstate.vl = srcstep + vli
2539 yield self.dec2.state.svstate.eq(self.svstate.value)
2540 yield Settle() # let decoder update
2541 return True, vli_
2542
2543 def do_rc_ov(self, ins_name, result, overflow, cr0, cr1, FPSCR):
2544 cr_out = yield self.dec2.op.cr_out
2545 if cr_out == CROutSel.CR1.value:
2546 rc_reg = "CR1"
2547 else:
2548 rc_reg = "CR0"
2549 regnum, is_vec = yield from get_cr_out(self.dec2, rc_reg)
2550 # hang on... for `setvl` actually you want to test SVSTATE.VL
2551 is_setvl = ins_name in ('svstep', 'setvl')
2552 if is_setvl:
2553 result = SelectableInt(result.vl, 64)
2554 # else:
2555 # overflow = None # do not override overflow except in setvl
2556
2557 if rc_reg == "CR1":
2558 if cr1 is None:
2559 cr1 = int(FPSCR.FX) << 3
2560 cr1 |= int(FPSCR.FEX) << 2
2561 cr1 |= int(FPSCR.VX) << 1
2562 cr1 |= int(FPSCR.OX)
2563 log("default fp cr1", cr1)
2564 else:
2565 log("explicit cr1", cr1)
2566 self.crl[regnum].eq(cr1)
2567 elif cr0 is None:
2568 # if there was not an explicit CR0 in the pseudocode,
2569 # do implicit Rc=1
2570 c = self.handle_comparison(result, regnum, overflow, no_so=is_setvl)
2571 log("implicit cr0", c)
2572 else:
2573 # otherwise we just blat CR0 into the required regnum
2574 log("explicit cr0", cr0)
2575 self.crl[regnum].eq(cr0)
2576
2577 def do_outregs(self, info, outs, ca_en, ffirst_hit, ew_dst, outs_ok):
2578 ffirst_hit, vli = ffirst_hit
2579 # write out any regs for this instruction, but only if fail-first is ok
2580 # XXX TODO: allow CR-vector to be written out even if ffirst fails
2581 if not ffirst_hit or vli:
2582 for name, output in outs.items():
2583 if not outs_ok[name]:
2584 log("skipping writing output with .ok=False", name, output)
2585 continue
2586 yield from self.check_write(info, name, output, ca_en, ew_dst)
2587 # restore the CR value on non-VLI failfirst (from sv.cmp and others
2588 # which write directly to CR in the pseudocode (gah, what a mess)
2589 # if ffirst_hit and not vli:
2590 # self.cr.value = self.cr_backup
2591
2592 def do_nia(self, asmop, ins_name, rc_en, ffirst_hit):
2593 ffirst_hit, vli = ffirst_hit
2594 if ffirst_hit:
2595 self.svp64_reset_loop()
2596 nia_update = True
2597 else:
2598 # check advancement of src/dst/sub-steps and if PC needs updating
2599 nia_update = (yield from self.check_step_increment(
2600 rc_en, asmop, ins_name))
2601 if nia_update:
2602 self.update_pc_next()
2603
2604 def check_replace_d(self, info, remap_active):
2605 replace_d = False # update / replace constant in pseudocode
2606 ldstmode = yield self.dec2.rm_dec.ldstmode
2607 vl = self.svstate.vl
2608 subvl = yield self.dec2.rm_dec.rm_in.subvl
2609 srcstep, dststep = self.new_srcstep, self.new_dststep
2610 ssubstep, dsubstep = self.new_ssubstep, self.new_dsubstep
2611 if info.form == 'DS':
2612 # DS-Form, multiply by 4 then knock 2 bits off after
2613 imm = yield self.dec2.dec.fields.FormDS.DS[0:14] * 4
2614 else:
2615 imm = yield self.dec2.dec.fields.FormD.D[0:16]
2616 imm = exts(imm, 16) # sign-extend to integer
2617 # get the right step. LD is from srcstep, ST is dststep
2618 op = yield self.dec2.e.do.insn_type
2619 offsmul = 0
2620 if op == MicrOp.OP_LOAD.value:
2621 if remap_active:
2622 offsmul = yield self.dec2.in1_step
2623 log("D-field REMAP src", imm, offsmul, ldstmode)
2624 else:
2625 offsmul = (srcstep * (subvl+1)) + ssubstep
2626 log("D-field src", imm, offsmul, ldstmode)
2627 elif op == MicrOp.OP_STORE.value:
2628 # XXX NOTE! no bit-reversed STORE! this should not ever be used
2629 offsmul = (dststep * (subvl+1)) + dsubstep
2630 log("D-field dst", imm, offsmul, ldstmode)
2631 # Unit-Strided LD/ST adds offset*width to immediate
2632 if ldstmode == SVP64LDSTmode.UNITSTRIDE.value:
2633 ldst_len = yield self.dec2.e.do.data_len
2634 imm = SelectableInt(imm + offsmul * ldst_len, 32)
2635 replace_d = True
2636 # Element-strided multiplies the immediate by element step
2637 elif ldstmode == SVP64LDSTmode.ELSTRIDE.value:
2638 imm = SelectableInt(imm * offsmul, 32)
2639 replace_d = True
2640 if replace_d:
2641 ldst_ra_vec = yield self.dec2.rm_dec.ldst_ra_vec
2642 ldst_imz_in = yield self.dec2.rm_dec.ldst_imz_in
2643 log("LDSTmode", SVP64LDSTmode(ldstmode),
2644 offsmul, imm, ldst_ra_vec, ldst_imz_in)
2645 # new replacement D... errr.. DS
2646 if replace_d:
2647 if info.form == 'DS':
2648 # TODO: assert 2 LSBs are zero?
2649 log("DS-Form, TODO, assert 2 LSBs zero?", bin(imm.value))
2650 imm.value = imm.value >> 2
2651 self.namespace['DS'] = imm
2652 else:
2653 self.namespace['D'] = imm
2654
2655 def get_input(self, name, ew_src, xlen):
2656 # using PowerDecoder2, first, find the decoder index.
2657 # (mapping name RA RB RC RS to in1, in2, in3)
2658 regnum, is_vec = yield from get_idx_in(self.dec2, name, True)
2659 if regnum is None:
2660 # doing this is not part of svp64, it's because output
2661 # registers, to be modified, need to be in the namespace.
2662 regnum, is_vec = yield from get_idx_out(self.dec2, name, True)
2663 if regnum is None:
2664 regnum, is_vec = yield from get_idx_out2(self.dec2, name, True)
2665
2666 if isinstance(regnum, tuple):
2667 (regnum, base, offs) = regnum
2668 else:
2669 base, offs = regnum, 0 # temporary HACK
2670
2671 # in case getting the register number is needed, _RA, _RB
2672 # (HACK: only in straight non-svp64-mode for now, or elwidth == 64)
2673 regname = "_" + name
2674 if not self.is_svp64_mode or ew_src == 64:
2675 self.namespace[regname] = regnum
2676 else:
2677 # FIXME: we're trying to access a sub-register, plain register
2678 # numbers don't work for that. for now, just pass something that
2679 # can be compared to 0 and probably will cause an error if misused.
2680 # see https://bugs.libre-soc.org/show_bug.cgi?id=1221
2681 self.namespace[regname] = regnum * 10000
2682
2683 if not self.is_svp64_mode or not self.pred_src_zero:
2684 log('reading reg %s %s' % (name, str(regnum)), is_vec)
2685 if name in fregs:
2686 fval = self.fpr(base, is_vec, offs, ew_src)
2687 reg_val = SelectableInt(fval)
2688 assert ew_src == self.XLEN, "TODO fix elwidth conversion"
2689 self.trace("r:FPR:%d:%d:%d " % (base, offs, ew_src))
2690 log("read fp reg %d/%d: 0x%x" % (base, offs, reg_val.value),
2691 kind=LogType.InstrInOuts)
2692 elif name is not None:
2693 gval = self.gpr(base, is_vec, offs, ew_src)
2694 reg_val = SelectableInt(gval.value, bits=xlen)
2695 self.trace("r:GPR:%d:%d:%d " % (base, offs, ew_src))
2696 log("read int reg %d/%d: 0x%x" % (base, offs, reg_val.value),
2697 kind=LogType.InstrInOuts)
2698 else:
2699 log('zero input reg %s %s' % (name, str(regnum)), is_vec)
2700 reg_val = SelectableInt(0, ew_src)
2701 return reg_val
2702
2703 def remap_set_steps(self, remaps):
2704 """remap_set_steps sets up the in1/2/3 and out1/2 steps.
2705 they work in concert with PowerDecoder2 at the moment,
2706 there is no HDL implementation of REMAP. therefore this
2707 function, because ISACaller still uses PowerDecoder2,
2708 will *explicitly* write the dec2.XX_step values. this has
2709 to get sorted out.
2710 """
2711 # just some convenient debug info
2712 for i in range(4):
2713 sname = 'SVSHAPE%d' % i
2714 shape = self.spr[sname]
2715 log(sname, bin(shape.value))
2716 log(" lims", shape.lims)
2717 log(" mode", shape.mode)
2718 log(" skip", shape.skip)
2719
2720 # set up the list of steps to remap
2721 mi0 = self.svstate.mi0
2722 mi1 = self.svstate.mi1
2723 mi2 = self.svstate.mi2
2724 mo0 = self.svstate.mo0
2725 mo1 = self.svstate.mo1
2726 steps = [[self.dec2.in1_step, mi0], # RA
2727 [self.dec2.in2_step, mi1], # RB
2728 [self.dec2.in3_step, mi2], # RC
2729 [self.dec2.o_step, mo0], # RT
2730 [self.dec2.o2_step, mo1], # EA
2731 ]
2732 if False: # TODO
2733 rnames = ['RA', 'RB', 'RC', 'RT', 'RS']
2734 for i, reg in enumerate(rnames):
2735 idx = yield from get_idx_map(self.dec2, reg)
2736 if idx is None:
2737 idx = yield from get_idx_map(self.dec2, "F"+reg)
2738 if idx == 1: # RA
2739 steps[i][0] = self.dec2.in1_step
2740 elif idx == 2: # RB
2741 steps[i][0] = self.dec2.in2_step
2742 elif idx == 3: # RC
2743 steps[i][0] = self.dec2.in3_step
2744 log("remap step", i, reg, idx, steps[i][1])
2745 remap_idxs = self.remap_idxs
2746 rremaps = []
2747 # now cross-index the required SHAPE for each of 3-in 2-out regs
2748 rnames = ['RA', 'RB', 'RC', 'RT', 'EA']
2749 for i, (dstep, shape_idx) in enumerate(steps):
2750 (shape, remap) = remaps[shape_idx]
2751 remap_idx = remap_idxs[shape_idx]
2752 # zero is "disabled"
2753 if shape.value == 0x0:
2754 continue
2755 # now set the actual requested step to the current index
2756 if dstep is not None:
2757 yield dstep.eq(remap_idx)
2758
2759 # debug printout info
2760 rremaps.append((shape.mode, hex(shape.value), dstep,
2761 i, rnames[i], shape_idx, remap_idx))
2762 for x in rremaps:
2763 log("shape remap", x)
2764
2765 def check_write(self, info, name, output, carry_en, ew_dst):
2766 if name == 'overflow': # ignore, done already (above)
2767 return
2768 if name == 'CR0': # ignore, done already (above)
2769 return
2770 if isinstance(output, int):
2771 output = SelectableInt(output, EFFECTIVELY_UNLIMITED)
2772 # write FPSCR
2773 if name in ['FPSCR', ]:
2774 log("write FPSCR 0x%x" % (output.value))
2775 self.FPSCR.eq(output)
2776 return
2777 # write carry flags
2778 if name in ['CA', 'CA32']:
2779 if carry_en:
2780 log("writing %s to XER" % name, output)
2781 log("write XER %s 0x%x" % (name, output.value))
2782 self.spr['XER'][XER_bits[name]] = output.value
2783 else:
2784 log("NOT writing %s to XER" % name, output)
2785 return
2786 # write special SPRs
2787 if name in info.special_regs:
2788 log('writing special %s' % name, output, special_sprs)
2789 log("write reg %s 0x%x" % (name, output.value),
2790 kind=LogType.InstrInOuts)
2791 if name in special_sprs:
2792 self.spr[name] = output
2793 else:
2794 self.namespace[name].eq(output)
2795 if name == 'MSR':
2796 log('msr written', hex(self.msr.value))
2797 return
2798 # find out1/out2 PR/FPR
2799 regnum, is_vec = yield from get_idx_out(self.dec2, name, True)
2800 if regnum is None:
2801 regnum, is_vec = yield from get_idx_out2(self.dec2, name, True)
2802 if regnum is None:
2803 # temporary hack for not having 2nd output
2804 regnum = yield getattr(self.decoder, name)
2805 is_vec = False
2806 # convenient debug prefix
2807 if name in fregs:
2808 reg_prefix = 'f'
2809 else:
2810 reg_prefix = 'r'
2811 # check zeroing due to predicate bit being zero
2812 if self.is_svp64_mode and self.pred_dst_zero:
2813 log('zeroing reg %s %s' % (str(regnum), str(output)), is_vec)
2814 output = SelectableInt(0, EFFECTIVELY_UNLIMITED)
2815 log("write reg %s%s 0x%x ew %d" % (reg_prefix, str(regnum),
2816 output.value, ew_dst),
2817 kind=LogType.InstrInOuts)
2818 # zero-extend tov64 bit begore storing (should use EXT oh well)
2819 if output.bits > 64:
2820 output = SelectableInt(output.value, 64)
2821 rnum, base, offset = regnum
2822 if name in fregs:
2823 self.fpr.write(regnum, output, is_vec, ew_dst)
2824 self.trace("w:FPR:%d:%d:%d " % (rnum, offset, ew_dst))
2825 return
2826
2827 # LDST/Update does *not* allow elwidths on RA (Effective Address).
2828 # this has to be detected, and overridden. see get_input (related)
2829 sv_mode = yield self.dec2.rm_dec.sv_mode
2830 is_ldst = (sv_mode in [SVMode.LDST_IDX.value, SVMode.LDST_IMM.value] \
2831 and self.is_svp64_mode)
2832 if is_ldst and name in ['EA', 'RA']:
2833 op = self.dec2.dec.op
2834 if hasattr(op, "upd"):
2835 # update mode LD/ST uses read-reg A also as an output
2836 upd = yield op.upd
2837 log("write is_ldst is_update", sv_mode, is_ldst, upd)
2838 if upd == LDSTMode.update.value:
2839 ew_dst = 64 # override for RA (EA) to 64-bit
2840
2841 self.gpr.write(regnum, output, is_vec, ew_dst)
2842 self.trace("w:GPR:%d:%d:%d " % (rnum, offset, ew_dst))
2843
2844 def check_step_increment(self, rc_en, asmop, ins_name):
2845 # check if it is the SVSTATE.src/dest step that needs incrementing
2846 # this is our Sub-Program-Counter loop from 0 to VL-1
2847 if not self.allow_next_step_inc:
2848 if self.is_svp64_mode:
2849 return (yield from self.svstate_post_inc(ins_name))
2850
2851 # XXX only in non-SVP64 mode!
2852 # record state of whether the current operation was an svshape,
2853 # OR svindex!
2854 # to be able to know if it should apply in the next instruction.
2855 # also (if going to use this instruction) should disable ability
2856 # to interrupt in between. sigh.
2857 self.last_op_svshape = asmop in ['svremap', 'svindex',
2858 'svshape2']
2859 return True
2860
2861 pre = False
2862 post = False
2863 nia_update = True
2864 log("SVSTATE_NEXT: inc requested, mode",
2865 self.svstate_next_mode, self.allow_next_step_inc)
2866 yield from self.svstate_pre_inc()
2867 pre = yield from self.update_new_svstate_steps()
2868 if pre:
2869 # reset at end of loop including exit Vertical Mode
2870 log("SVSTATE_NEXT: end of loop, reset")
2871 self.svp64_reset_loop()
2872 self.svstate.vfirst = 0
2873 self.update_nia()
2874 if not rc_en:
2875 return True
2876 self.handle_comparison(SelectableInt(0, 64)) # CR0
2877 return True
2878 if self.allow_next_step_inc == 2:
2879 log("SVSTATE_NEXT: read")
2880 nia_update = (yield from self.svstate_post_inc(ins_name))
2881 else:
2882 log("SVSTATE_NEXT: post-inc")
2883 # use actual (cached) src/dst-step here to check end
2884 remaps = self.get_remap_indices()
2885 remap_idxs = self.remap_idxs
2886 vl = self.svstate.vl
2887 subvl = yield self.dec2.rm_dec.rm_in.subvl
2888 if self.allow_next_step_inc != 2:
2889 yield from self.advance_svstate_steps()
2890 #self.namespace['SVSTATE'] = self.svstate.spr
2891 # set CR0 (if Rc=1) based on end
2892 endtest = 1 if self.at_loopend() else 0
2893 if rc_en:
2894 #results = [SelectableInt(endtest, 64)]
2895 # self.handle_comparison(results) # CR0
2896
2897 # see if svstep was requested, if so, which SVSTATE
2898 endings = 0b111
2899 if self.svstate_next_mode > 0:
2900 shape_idx = self.svstate_next_mode.value-1
2901 endings = self.remap_loopends[shape_idx]
2902 cr_field = SelectableInt((~endings) << 1 | endtest, 4)
2903 log("svstep Rc=1, CR0", cr_field, endtest)
2904 self.crl[0].eq(cr_field) # CR0
2905 if endtest:
2906 # reset at end of loop including exit Vertical Mode
2907 log("SVSTATE_NEXT: after increments, reset")
2908 self.svp64_reset_loop()
2909 self.svstate.vfirst = 0
2910 return nia_update
2911
2912 def SVSTATE_NEXT(self, mode, submode):
2913 """explicitly moves srcstep/dststep on to next element, for
2914 "Vertical-First" mode. this function is called from
2915 setvl pseudo-code, as a pseudo-op "svstep"
2916
2917 WARNING: this function uses information that was created EARLIER
2918 due to it being in the middle of a yield, but this function is
2919 *NOT* called from yield (it's called from compiled pseudocode).
2920 """
2921 self.allow_next_step_inc = submode.value + 1
2922 log("SVSTATE_NEXT mode", mode, submode, self.allow_next_step_inc)
2923 self.svstate_next_mode = mode
2924 if self.svstate_next_mode > 0 and self.svstate_next_mode < 5:
2925 shape_idx = self.svstate_next_mode.value-1
2926 return SelectableInt(self.remap_idxs[shape_idx], 7)
2927 if self.svstate_next_mode == 5:
2928 self.svstate_next_mode = 0
2929 return SelectableInt(self.svstate.srcstep, 7)
2930 if self.svstate_next_mode == 6:
2931 self.svstate_next_mode = 0
2932 return SelectableInt(self.svstate.dststep, 7)
2933 if self.svstate_next_mode == 7:
2934 self.svstate_next_mode = 0
2935 return SelectableInt(self.svstate.ssubstep, 7)
2936 if self.svstate_next_mode == 8:
2937 self.svstate_next_mode = 0
2938 return SelectableInt(self.svstate.dsubstep, 7)
2939 return SelectableInt(0, 7)
2940
2941 def get_src_dststeps(self):
2942 """gets srcstep, dststep, and ssubstep, dsubstep
2943 """
2944 return (self.new_srcstep, self.new_dststep,
2945 self.new_ssubstep, self.new_dsubstep)
2946
2947 def update_svstate_namespace(self, overwrite_svstate=True):
2948 if overwrite_svstate:
2949 # note, do not get the bit-reversed srcstep here!
2950 srcstep, dststep = self.new_srcstep, self.new_dststep
2951 ssubstep, dsubstep = self.new_ssubstep, self.new_dsubstep
2952
2953 # update SVSTATE with new srcstep
2954 self.svstate.srcstep = srcstep
2955 self.svstate.dststep = dststep
2956 self.svstate.ssubstep = ssubstep
2957 self.svstate.dsubstep = dsubstep
2958 self.namespace['SVSTATE'] = self.svstate
2959 yield self.dec2.state.svstate.eq(self.svstate.value)
2960 yield Settle() # let decoder update
2961
2962 def update_new_svstate_steps(self, overwrite_svstate=True):
2963 yield from self.update_svstate_namespace(overwrite_svstate)
2964 srcstep = self.svstate.srcstep
2965 dststep = self.svstate.dststep
2966 ssubstep = self.svstate.ssubstep
2967 dsubstep = self.svstate.dsubstep
2968 pack = self.svstate.pack
2969 unpack = self.svstate.unpack
2970 vl = self.svstate.vl
2971 sv_mode = yield self.dec2.rm_dec.sv_mode
2972 subvl = yield self.dec2.rm_dec.rm_in.subvl
2973 rm_mode = yield self.dec2.rm_dec.mode
2974 ff_inv = yield self.dec2.rm_dec.inv
2975 cr_bit = yield self.dec2.rm_dec.cr_sel
2976 log(" srcstep", srcstep)
2977 log(" dststep", dststep)
2978 log(" pack", pack)
2979 log(" unpack", unpack)
2980 log(" ssubstep", ssubstep)
2981 log(" dsubstep", dsubstep)
2982 log(" vl", vl)
2983 log(" subvl", subvl)
2984 log(" rm_mode", rm_mode)
2985 log(" sv_mode", sv_mode)
2986 log(" inv", ff_inv)
2987 log(" cr_bit", cr_bit)
2988
2989 # check if end reached (we let srcstep overrun, above)
2990 # nothing needs doing (TODO zeroing): just do next instruction
2991 if self.loopend:
2992 return True
2993 return ((ssubstep == subvl and srcstep == vl) or
2994 (dsubstep == subvl and dststep == vl))
2995
2996 def svstate_post_inc(self, insn_name, vf=0):
2997 # check if SV "Vertical First" mode is enabled
2998 vfirst = self.svstate.vfirst
2999 log(" SV Vertical First", vf, vfirst)
3000 if not vf and vfirst == 1:
3001 # SV Branch-Conditional required to be as-if-vector
3002 # because there *is* no destination register
3003 # (SV normally only terminates on 1st scalar reg written
3004 # except in [slightly-misnamed] mapreduce mode)
3005 ffirst = yield from is_ffirst_mode(self.dec2)
3006 if insn_name.startswith("sv.bc") or ffirst:
3007 self.update_pc_next()
3008 return False
3009 self.update_nia()
3010 return True
3011
3012 # check if it is the SVSTATE.src/dest step that needs incrementing
3013 # this is our Sub-Program-Counter loop from 0 to VL-1
3014 # XXX twin predication TODO
3015 vl = self.svstate.vl
3016 subvl = yield self.dec2.rm_dec.rm_in.subvl
3017 mvl = self.svstate.maxvl
3018 srcstep = self.svstate.srcstep
3019 dststep = self.svstate.dststep
3020 ssubstep = self.svstate.ssubstep
3021 dsubstep = self.svstate.dsubstep
3022 pack = self.svstate.pack
3023 unpack = self.svstate.unpack
3024 rm_mode = yield self.dec2.rm_dec.mode
3025 reverse_gear = yield self.dec2.rm_dec.reverse_gear
3026 sv_ptype = yield self.dec2.dec.op.SV_Ptype
3027 out_vec = not (yield self.dec2.no_out_vec)
3028 in_vec = not (yield self.dec2.no_in_vec)
3029 rm_mode = yield self.dec2.rm_dec.mode
3030 log(" svstate.vl", vl)
3031 log(" svstate.mvl", mvl)
3032 log(" rm.subvl", subvl)
3033 log(" svstate.srcstep", srcstep)
3034 log(" svstate.dststep", dststep)
3035 log(" svstate.ssubstep", ssubstep)
3036 log(" svstate.dsubstep", dsubstep)
3037 log(" svstate.pack", pack)
3038 log(" svstate.unpack", unpack)
3039 log(" mode", rm_mode)
3040 log(" reverse", reverse_gear)
3041 log(" out_vec", out_vec)
3042 log(" in_vec", in_vec)
3043 log(" sv_ptype", sv_ptype, sv_ptype == SVPType.P2.value)
3044 log(" rm_mode", rm_mode)
3045 # check if this was an sv.bc* and if so did it succeed
3046 if self.is_svp64_mode and insn_name.startswith("sv.bc"):
3047 end_loop = self.namespace['end_loop']
3048 log("branch %s end_loop" % insn_name, end_loop)
3049 if end_loop.value:
3050 self.svp64_reset_loop()
3051 self.update_pc_next()
3052 return False
3053 # check if srcstep needs incrementing by one, stop PC advancing
3054 # but for 2-pred both src/dest have to be checked.
3055 # XXX this might not be true! it may just be LD/ST
3056 if sv_ptype == SVPType.P2.value:
3057 svp64_is_vector = (out_vec or in_vec)
3058 else:
3059 svp64_is_vector = out_vec
3060 # also if data-dependent fail-first is used, only in_vec is tested,
3061 # allowing *scalar destinations* to be used as an accumulator.
3062 # effectively this implies /mr (mapreduce mode) is 100% on with ddffirst
3063 # see https://bugs.libre-soc.org/show_bug.cgi?id=1183#c16
3064 ffirst = yield from is_ffirst_mode(self.dec2)
3065 if ffirst:
3066 svp64_is_vector = in_vec
3067
3068 # loops end at the first "hit" (source or dest)
3069 yield from self.advance_svstate_steps()
3070 loopend = self.loopend
3071 log("loopend", svp64_is_vector, loopend)
3072 if not svp64_is_vector or loopend:
3073 # reset loop to zero and update NIA
3074 self.svp64_reset_loop()
3075 self.update_nia()
3076
3077 return True
3078
3079 # still looping, advance and update NIA
3080 self.namespace['SVSTATE'] = self.svstate
3081
3082 # not an SVP64 branch, so fix PC (NIA==CIA) for next loop
3083 # (by default, NIA is CIA+4 if v3.0B or CIA+8 if SVP64)
3084 # this way we keep repeating the same instruction (with new steps)
3085 self.pc.NIA.eq(self.pc.CIA)
3086 self.namespace['NIA'] = self.pc.NIA
3087 log("end of sub-pc call", self.namespace['CIA'], self.namespace['NIA'])
3088 return False # DO NOT allow PC update whilst Sub-PC loop running
3089
3090 def update_pc_next(self):
3091 # UPDATE program counter
3092 self.pc.update(self.namespace, self.is_svp64_mode)
3093 #self.svstate.spr = self.namespace['SVSTATE']
3094 log("end of call", self.namespace['CIA'],
3095 self.namespace['NIA'],
3096 self.namespace['SVSTATE'])
3097
3098 def svp64_reset_loop(self):
3099 self.svstate.srcstep = 0
3100 self.svstate.dststep = 0
3101 self.svstate.ssubstep = 0
3102 self.svstate.dsubstep = 0
3103 self.loopend = False
3104 log(" svstate.srcstep loop end (PC to update)")
3105 self.namespace['SVSTATE'] = self.svstate
3106
3107 def update_nia(self):
3108 self.pc.update_nia(self.is_svp64_mode)
3109 self.namespace['NIA'] = self.pc.NIA
3110
3111
3112 def inject():
3113 """Decorator factory.
3114
3115 this decorator will "inject" variables into the function's namespace,
3116 from the *dictionary* in self.namespace. it therefore becomes possible
3117 to make it look like a whole stack of variables which would otherwise
3118 need "self." inserted in front of them (*and* for those variables to be
3119 added to the instance) "appear" in the function.
3120
3121 "self.namespace['SI']" for example becomes accessible as just "SI" but
3122 *only* inside the function, when decorated.
3123 """
3124 def variable_injector(func):
3125 @wraps(func)
3126 def decorator(*args, **kwargs):
3127 try:
3128 func_globals = func.__globals__ # Python 2.6+
3129 except AttributeError:
3130 func_globals = func.func_globals # Earlier versions.
3131
3132 context = args[0].namespace # variables to be injected
3133 saved_values = func_globals.copy() # Shallow copy of dict.
3134 log("globals before", context.keys())
3135 func_globals.update(context)
3136 result = func(*args, **kwargs)
3137 log("globals after", func_globals['CIA'], func_globals['NIA'])
3138 log("args[0]", args[0].namespace['CIA'],
3139 args[0].namespace['NIA'],
3140 args[0].namespace['SVSTATE'])
3141 if 'end_loop' in func_globals:
3142 log("args[0] end_loop", func_globals['end_loop'])
3143 args[0].namespace = func_globals
3144 #exec (func.__code__, func_globals)
3145
3146 # finally:
3147 # func_globals = saved_values # Undo changes.
3148
3149 return result
3150
3151 return decorator
3152
3153 return variable_injector