move two big step/loop functions into separate class out of ISACaller
[openpower-isa.git] / src / openpower / decoder / isa / caller.py
1 # SPDX-License-Identifier: LGPLv3+
2 # Copyright (C) 2020, 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
3 # Copyright (C) 2020 Michael Nolan
4 # Funded by NLnet http://nlnet.nl
5 """core of the python-based POWER9 simulator
6
7 this is part of a cycle-accurate POWER9 simulator. its primary purpose is
8 not speed, it is for both learning and educational purposes, as well as
9 a method of verifying the HDL.
10
11 related bugs:
12
13 * https://bugs.libre-soc.org/show_bug.cgi?id=424
14 """
15
16 import re
17 from nmigen.sim import Settle, Delay
18 from functools import wraps
19 from copy import copy, deepcopy
20 from openpower.decoder.orderedset import OrderedSet
21 from openpower.decoder.selectable_int import (
22 FieldSelectableInt,
23 SelectableInt,
24 selectconcat,
25 )
26 from openpower.decoder.power_insn import SVP64Instruction
27 from openpower.decoder.power_enums import (spr_dict, spr_byname, XER_bits,
28 insns, MicrOp,
29 In1Sel, In2Sel, In3Sel,
30 OutSel, CRInSel, CROutSel, LDSTMode,
31 SVP64RMMode, SVP64PredMode,
32 SVP64PredInt, SVP64PredCR,
33 SVP64LDSTmode, FPTRANS_INSNS)
34
35 from openpower.decoder.power_enums import SVPtype
36
37 from openpower.decoder.helpers import (exts, gtu, ltu, undefined,
38 ISACallerHelper, ISAFPHelpers)
39 from openpower.consts import PIb, MSRb # big-endian (PowerISA versions)
40 from openpower.consts import (SVP64MODE,
41 SVP64CROffs,
42 )
43 from openpower.decoder.power_svp64 import SVP64RM, decode_extra
44
45 from openpower.decoder.isa.radixmmu import RADIX
46 from openpower.decoder.isa.mem import Mem, swap_order, MemException
47 from openpower.decoder.isa.svshape import SVSHAPE
48 from openpower.decoder.isa.svstate import SVP64State
49
50
51 from openpower.util import LogKind, log
52
53 from collections import namedtuple
54 import math
55 import sys
56
57 instruction_info = namedtuple('instruction_info',
58 'func read_regs uninit_regs write_regs ' +
59 'special_regs op_fields form asmregs')
60
61 special_sprs = {
62 'LR': 8,
63 'CTR': 9,
64 'TAR': 815,
65 'XER': 1,
66 'VRSAVE': 256}
67
68
69 REG_SORT_ORDER = {
70 # TODO (lkcl): adjust other registers that should be in a particular order
71 # probably CA, CA32, and CR
72 "FRT": 0,
73 "FRA": 0,
74 "FRB": 0,
75 "FRC": 0,
76 "FRS": 0,
77 "RT": 0,
78 "RA": 0,
79 "RB": 0,
80 "RC": 0,
81 "RS": 0,
82 "BI": 0,
83 "CR": 0,
84 "LR": 0,
85 "CTR": 0,
86 "TAR": 0,
87 "MSR": 0,
88 "SVSTATE": 0,
89 "SVSHAPE0": 0,
90 "SVSHAPE1": 0,
91 "SVSHAPE2": 0,
92 "SVSHAPE3": 0,
93
94 "CA": 0,
95 "CA32": 0,
96
97 "overflow": 7, # should definitely be last
98 }
99
100 fregs = ['FRA', 'FRB', 'FRC', 'FRS', 'FRT']
101
102
103 def create_args(reglist, extra=None):
104 retval = list(OrderedSet(reglist))
105 retval.sort(key=lambda reg: REG_SORT_ORDER.get(reg, 0))
106 if extra is not None:
107 return [extra] + retval
108 return retval
109
110
111 class GPR(dict):
112 def __init__(self, decoder, isacaller, svstate, regfile):
113 dict.__init__(self)
114 self.sd = decoder
115 self.isacaller = isacaller
116 self.svstate = svstate
117 for i in range(len(regfile)):
118 self[i] = SelectableInt(regfile[i], 64)
119
120 def __call__(self, ridx):
121 if isinstance(ridx, SelectableInt):
122 ridx = ridx.value
123 return self[ridx]
124
125 def set_form(self, form):
126 self.form = form
127
128 def __setitem__(self, rnum, value):
129 # rnum = rnum.value # only SelectableInt allowed
130 log("GPR setitem", rnum, value)
131 if isinstance(rnum, SelectableInt):
132 rnum = rnum.value
133 dict.__setitem__(self, rnum, value)
134
135 def getz(self, rnum):
136 # rnum = rnum.value # only SelectableInt allowed
137 log("GPR getzero?", rnum)
138 if rnum == 0:
139 return SelectableInt(0, 64)
140 return self[rnum]
141
142 def _get_regnum(self, attr):
143 getform = self.sd.sigforms[self.form]
144 rnum = getattr(getform, attr)
145 return rnum
146
147 def ___getitem__(self, attr):
148 """ XXX currently not used
149 """
150 rnum = self._get_regnum(attr)
151 log("GPR getitem", attr, rnum)
152 return self.regfile[rnum]
153
154 def dump(self, printout=True):
155 res = []
156 for i in range(len(self)):
157 res.append(self[i].value)
158 if printout:
159 for i in range(0, len(res), 8):
160 s = []
161 for j in range(8):
162 s.append("%08x" % res[i+j])
163 s = ' '.join(s)
164 print("reg", "%2d" % i, s)
165 return res
166
167
168 class SPR(dict):
169 def __init__(self, dec2, initial_sprs={}):
170 self.sd = dec2
171 dict.__init__(self)
172 for key, v in initial_sprs.items():
173 if isinstance(key, SelectableInt):
174 key = key.value
175 key = special_sprs.get(key, key)
176 if isinstance(key, int):
177 info = spr_dict[key]
178 else:
179 info = spr_byname[key]
180 if not isinstance(v, SelectableInt):
181 v = SelectableInt(v, info.length)
182 self[key] = v
183
184 def __getitem__(self, key):
185 log("get spr", key)
186 log("dict", self.items())
187 # if key in special_sprs get the special spr, otherwise return key
188 if isinstance(key, SelectableInt):
189 key = key.value
190 if isinstance(key, int):
191 key = spr_dict[key].SPR
192 key = special_sprs.get(key, key)
193 if key == 'HSRR0': # HACK!
194 key = 'SRR0'
195 if key == 'HSRR1': # HACK!
196 key = 'SRR1'
197 if key in self:
198 res = dict.__getitem__(self, key)
199 else:
200 if isinstance(key, int):
201 info = spr_dict[key]
202 else:
203 info = spr_byname[key]
204 dict.__setitem__(self, key, SelectableInt(0, info.length))
205 res = dict.__getitem__(self, key)
206 log("spr returning", key, res)
207 return res
208
209 def __setitem__(self, key, value):
210 if isinstance(key, SelectableInt):
211 key = key.value
212 if isinstance(key, int):
213 key = spr_dict[key].SPR
214 log("spr key", key)
215 key = special_sprs.get(key, key)
216 if key == 'HSRR0': # HACK!
217 self.__setitem__('SRR0', value)
218 if key == 'HSRR1': # HACK!
219 self.__setitem__('SRR1', value)
220 log("setting spr", key, value)
221 dict.__setitem__(self, key, value)
222
223 def __call__(self, ridx):
224 return self[ridx]
225
226 def dump(self, printout=True):
227 res = []
228 keys = list(self.keys())
229 # keys.sort()
230 for k in keys:
231 sprname = spr_dict.get(k, None)
232 if sprname is None:
233 sprname = k
234 else:
235 sprname = sprname.SPR
236 res.append((sprname, self[k].value))
237 if printout:
238 for sprname, value in res:
239 print(" ", sprname, hex(value))
240 return res
241
242
243 class PC:
244 def __init__(self, pc_init=0):
245 self.CIA = SelectableInt(pc_init, 64)
246 self.NIA = self.CIA + SelectableInt(4, 64) # only true for v3.0B!
247
248 def update_nia(self, is_svp64):
249 increment = 8 if is_svp64 else 4
250 self.NIA = self.CIA + SelectableInt(increment, 64)
251
252 def update(self, namespace, is_svp64):
253 """updates the program counter (PC) by 4 if v3.0B mode or 8 if SVP64
254 """
255 self.CIA = namespace['NIA'].narrow(64)
256 self.update_nia(is_svp64)
257 namespace['CIA'] = self.CIA
258 namespace['NIA'] = self.NIA
259
260
261 # CR register fields
262 # See PowerISA Version 3.0 B Book 1
263 # Section 2.3.1 Condition Register pages 30 - 31
264 class CRFields:
265 LT = FL = 0 # negative, less than, floating-point less than
266 GT = FG = 1 # positive, greater than, floating-point greater than
267 EQ = FE = 2 # equal, floating-point equal
268 SO = FU = 3 # summary overflow, floating-point unordered
269
270 def __init__(self, init=0):
271 # rev_cr = int('{:016b}'.format(initial_cr)[::-1], 2)
272 # self.cr = FieldSelectableInt(self._cr, list(range(32, 64)))
273 self.cr = SelectableInt(init, 64) # underlying reg
274 # field-selectable versions of Condition Register TODO check bitranges?
275 self.crl = []
276 for i in range(8):
277 bits = tuple(range(i*4+32, (i+1)*4+32))
278 _cr = FieldSelectableInt(self.cr, bits)
279 self.crl.append(_cr)
280
281 # decode SVP64 predicate integer to reg number and invert
282
283
284 def get_predint(gpr, mask):
285 r10 = gpr(10)
286 r30 = gpr(30)
287 log("get_predint", mask, SVP64PredInt.ALWAYS.value)
288 if mask == SVP64PredInt.ALWAYS.value:
289 return 0xffff_ffff_ffff_ffff # 64 bits of 1
290 if mask == SVP64PredInt.R3_UNARY.value:
291 return 1 << (gpr(3).value & 0b111111)
292 if mask == SVP64PredInt.R3.value:
293 return gpr(3).value
294 if mask == SVP64PredInt.R3_N.value:
295 return ~gpr(3).value
296 if mask == SVP64PredInt.R10.value:
297 return gpr(10).value
298 if mask == SVP64PredInt.R10_N.value:
299 return ~gpr(10).value
300 if mask == SVP64PredInt.R30.value:
301 return gpr(30).value
302 if mask == SVP64PredInt.R30_N.value:
303 return ~gpr(30).value
304
305 # decode SVP64 predicate CR to reg number and invert status
306
307
308 def _get_predcr(mask):
309 if mask == SVP64PredCR.LT.value:
310 return 0, 1
311 if mask == SVP64PredCR.GE.value:
312 return 0, 0
313 if mask == SVP64PredCR.GT.value:
314 return 1, 1
315 if mask == SVP64PredCR.LE.value:
316 return 1, 0
317 if mask == SVP64PredCR.EQ.value:
318 return 2, 1
319 if mask == SVP64PredCR.NE.value:
320 return 2, 0
321 if mask == SVP64PredCR.SO.value:
322 return 3, 1
323 if mask == SVP64PredCR.NS.value:
324 return 3, 0
325
326 # read individual CR fields (0..VL-1), extract the required bit
327 # and construct the mask
328
329
330 def get_predcr(crl, mask, vl):
331 idx, noninv = _get_predcr(mask)
332 mask = 0
333 for i in range(vl):
334 cr = crl[i+SVP64CROffs.CRPred]
335 if cr[idx].value == noninv:
336 mask |= (1 << i)
337 return mask
338
339
340 # TODO, really should just be using PowerDecoder2
341 def get_pdecode_idx_in(dec2, name):
342 op = dec2.dec.op
343 in1_sel = yield op.in1_sel
344 in2_sel = yield op.in2_sel
345 in3_sel = yield op.in3_sel
346 # get the IN1/2/3 from the decoder (includes SVP64 remap and isvec)
347 in1 = yield dec2.e.read_reg1.data
348 in2 = yield dec2.e.read_reg2.data
349 in3 = yield dec2.e.read_reg3.data
350 in1_isvec = yield dec2.in1_isvec
351 in2_isvec = yield dec2.in2_isvec
352 in3_isvec = yield dec2.in3_isvec
353 log("get_pdecode_idx_in in1", name, in1_sel, In1Sel.RA.value,
354 in1, in1_isvec)
355 log("get_pdecode_idx_in in2", name, in2_sel, In2Sel.RB.value,
356 in2, in2_isvec)
357 log("get_pdecode_idx_in in3", name, in3_sel, In3Sel.RS.value,
358 in3, in3_isvec)
359 log("get_pdecode_idx_in FRS in3", name, in3_sel, In3Sel.FRS.value,
360 in3, in3_isvec)
361 log("get_pdecode_idx_in FRB in2", name, in2_sel, In2Sel.FRB.value,
362 in2, in2_isvec)
363 log("get_pdecode_idx_in FRC in3", name, in3_sel, In3Sel.FRC.value,
364 in3, in3_isvec)
365 # identify which regnames map to in1/2/3
366 if name == 'RA' or name == 'RA_OR_ZERO':
367 if (in1_sel == In1Sel.RA.value or
368 (in1_sel == In1Sel.RA_OR_ZERO.value and in1 != 0)):
369 return in1, in1_isvec
370 if in1_sel == In1Sel.RA_OR_ZERO.value:
371 return in1, in1_isvec
372 elif name == 'RB':
373 if in2_sel == In2Sel.RB.value:
374 return in2, in2_isvec
375 if in3_sel == In3Sel.RB.value:
376 return in3, in3_isvec
377 # XXX TODO, RC doesn't exist yet!
378 elif name == 'RC':
379 assert False, "RC does not exist yet"
380 elif name == 'RS':
381 if in1_sel == In1Sel.RS.value:
382 return in1, in1_isvec
383 if in2_sel == In2Sel.RS.value:
384 return in2, in2_isvec
385 if in3_sel == In3Sel.RS.value:
386 return in3, in3_isvec
387 elif name == 'FRA':
388 if in1_sel == In1Sel.FRA.value:
389 return in1, in1_isvec
390 elif name == 'FRB':
391 if in2_sel == In2Sel.FRB.value:
392 return in2, in2_isvec
393 elif name == 'FRC':
394 if in3_sel == In3Sel.FRC.value:
395 return in3, in3_isvec
396 elif name == 'FRS':
397 if in1_sel == In1Sel.FRS.value:
398 return in1, in1_isvec
399 if in3_sel == In3Sel.FRS.value:
400 return in3, in3_isvec
401 return None, False
402
403
404 # TODO, really should just be using PowerDecoder2
405 def get_pdecode_cr_in(dec2, name):
406 op = dec2.dec.op
407 in_sel = yield op.cr_in
408 in_bitfield = yield dec2.dec_cr_in.cr_bitfield.data
409 sv_cr_in = yield op.sv_cr_in
410 spec = yield dec2.crin_svdec.spec
411 sv_override = yield dec2.dec_cr_in.sv_override
412 # get the IN1/2/3 from the decoder (includes SVP64 remap and isvec)
413 in1 = yield dec2.e.read_cr1.data
414 cr_isvec = yield dec2.cr_in_isvec
415 log("get_pdecode_cr_in", in_sel, CROutSel.CR0.value, in1, cr_isvec)
416 log(" sv_cr_in", sv_cr_in)
417 log(" cr_bf", in_bitfield)
418 log(" spec", spec)
419 log(" override", sv_override)
420 # identify which regnames map to in / o2
421 if name == 'BI':
422 if in_sel == CRInSel.BI.value:
423 return in1, cr_isvec
424 log("get_pdecode_cr_in not found", name)
425 return None, False
426
427
428 # TODO, really should just be using PowerDecoder2
429 def get_pdecode_cr_out(dec2, name):
430 op = dec2.dec.op
431 out_sel = yield op.cr_out
432 out_bitfield = yield dec2.dec_cr_out.cr_bitfield.data
433 sv_cr_out = yield op.sv_cr_out
434 spec = yield dec2.crout_svdec.spec
435 sv_override = yield dec2.dec_cr_out.sv_override
436 # get the IN1/2/3 from the decoder (includes SVP64 remap and isvec)
437 out = yield dec2.e.write_cr.data
438 o_isvec = yield dec2.o_isvec
439 log("get_pdecode_cr_out", out_sel, CROutSel.CR0.value, out, o_isvec)
440 log(" sv_cr_out", sv_cr_out)
441 log(" cr_bf", out_bitfield)
442 log(" spec", spec)
443 log(" override", sv_override)
444 # identify which regnames map to out / o2
445 if name == 'CR0':
446 if out_sel == CROutSel.CR0.value:
447 return out, o_isvec
448 if name == 'CR1': # these are not actually calculated correctly
449 if out_sel == CROutSel.CR1.value:
450 return out, o_isvec
451 log("get_pdecode_cr_out not found", name)
452 return None, False
453
454
455 # TODO, really should just be using PowerDecoder2
456 def get_pdecode_idx_out(dec2, name):
457 op = dec2.dec.op
458 out_sel = yield op.out_sel
459 # get the IN1/2/3 from the decoder (includes SVP64 remap and isvec)
460 out = yield dec2.e.write_reg.data
461 o_isvec = yield dec2.o_isvec
462 # identify which regnames map to out / o2
463 if name == 'RA':
464 log("get_pdecode_idx_out", out_sel, OutSel.RA.value, out, o_isvec)
465 if out_sel == OutSel.RA.value:
466 return out, o_isvec
467 elif name == 'RT':
468 log("get_pdecode_idx_out", out_sel, OutSel.RT.value,
469 OutSel.RT_OR_ZERO.value, out, o_isvec,
470 dec2.dec.RT)
471 if out_sel == OutSel.RT.value:
472 return out, o_isvec
473 if out_sel == OutSel.RT_OR_ZERO.value and out != 0:
474 return out, o_isvec
475 elif name == 'RT_OR_ZERO':
476 log("get_pdecode_idx_out", out_sel, OutSel.RT.value,
477 OutSel.RT_OR_ZERO.value, out, o_isvec,
478 dec2.dec.RT)
479 if out_sel == OutSel.RT_OR_ZERO.value:
480 return out, o_isvec
481 elif name == 'FRA':
482 log("get_pdecode_idx_out", out_sel, OutSel.FRA.value, out, o_isvec)
483 if out_sel == OutSel.FRA.value:
484 return out, o_isvec
485 elif name == 'FRT':
486 log("get_pdecode_idx_out", out_sel, OutSel.FRT.value,
487 OutSel.FRT.value, out, o_isvec)
488 if out_sel == OutSel.FRT.value:
489 return out, o_isvec
490 log("get_pdecode_idx_out not found", name, out_sel, out, o_isvec)
491 return None, False
492
493
494 # TODO, really should just be using PowerDecoder2
495 def get_pdecode_idx_out2(dec2, name):
496 # check first if register is activated for write
497 op = dec2.dec.op
498 out_sel = yield op.out_sel
499 out = yield dec2.e.write_ea.data
500 o_isvec = yield dec2.o2_isvec
501 out_ok = yield dec2.e.write_ea.ok
502 log("get_pdecode_idx_out2", name, out_sel, out, out_ok, o_isvec)
503 if not out_ok:
504 return None, False
505
506 if name == 'RA':
507 if hasattr(op, "upd"):
508 # update mode LD/ST uses read-reg A also as an output
509 upd = yield op.upd
510 log("get_pdecode_idx_out2", upd, LDSTMode.update.value,
511 out_sel, OutSel.RA.value,
512 out, o_isvec)
513 if upd == LDSTMode.update.value:
514 return out, o_isvec
515 if name == 'FRS':
516 int_op = yield dec2.dec.op.internal_op
517 fft_en = yield dec2.use_svp64_fft
518 # if int_op == MicrOp.OP_FP_MADD.value and fft_en:
519 if fft_en:
520 log("get_pdecode_idx_out2", out_sel, OutSel.FRS.value,
521 out, o_isvec)
522 return out, o_isvec
523 return None, False
524
525
526 class StepLoop:
527 """deals with svstate looping.
528 """
529
530 def __init__(self):
531 pass
532
533 def advance_svstate_steps(self, end_src=False, end_dst=False):
534 """ advance sub/steps. note that Pack/Unpack *INVERTS* the order.
535 TODO when Pack/Unpack is set, substep becomes the *outer* loop
536 """
537 subvl = yield self.dec2.rm_dec.rm_in.subvl
538 pack = self.svstate.pack
539 unpack = self.svstate.unpack
540 ssubstep = self.svstate.ssubstep
541 dsubstep = self.svstate.dsubstep
542 end_ssub = ssubstep == subvl
543 end_dsub = dsubstep == subvl
544 log(" pack/unpack/subvl", pack, unpack, subvl,
545 "end", end_src, end_dst,
546 "sub", end_ssub, end_dsub)
547 # first source step
548 srcstep = self.svstate.srcstep
549 if pack:
550 # pack advances subvl in *outer* loop
551 if end_src:
552 if not end_ssub:
553 self.svstate.ssubstep += SelectableInt(1, 2)
554 self.svstate.srcstep = SelectableInt(0, 7) # reset
555 else:
556 self.svstate.srcstep += SelectableInt(1, 7) # advance srcstep
557 else:
558 # advance subvl in *inner* loop
559 if end_ssub:
560 if not end_src:
561 self.svstate.srcstep += SelectableInt(1, 7)
562 self.svstate.ssubstep = SelectableInt(0, 2) # reset
563 else:
564 self.svstate.ssubstep += SelectableInt(1, 2) # advance ssubstep
565
566 # now dest step
567 if unpack:
568 # unpack advances subvl in *outer* loop
569 if end_dst:
570 if not end_dsub:
571 self.svstate.dsubstep += SelectableInt(1, 2)
572 self.svstate.dststep = SelectableInt(0, 7) # reset
573 else:
574 self.svstate.dststep += SelectableInt(1, 7) # advance dststep
575 else:
576 # advance subvl in *inner* loop
577 if end_dsub:
578 if not end_dst:
579 self.svstate.dststep += SelectableInt(1, 7)
580 self.svstate.dsubstep = SelectableInt(0, 2) # reset
581 else:
582 self.svstate.dsubstep += SelectableInt(1, 2) # advance ssubstep
583 log(" advance", self.svstate.srcstep, self.svstate.ssubstep,
584 "dst", self.svstate.dststep, self.svstate.dsubstep)
585
586 def svstate_pre_inc(self):
587 """check if srcstep/dststep need to skip over masked-out predicate bits
588 note that this is not supposed to do anything to substep,
589 it is purely for skipping masked-out bits
590 """
591 # get SVSTATE VL (oh and print out some debug stuff)
592 # yield Delay(1e-10) # make changes visible
593 vl = self.svstate.vl
594 subvl = yield self.dec2.rm_dec.rm_in.subvl
595 srcstep = self.svstate.srcstep
596 dststep = self.svstate.dststep
597 ssubstep = self.svstate.ssubstep
598 dsubstep = self.svstate.dsubstep
599 pack = self.svstate.pack
600 unpack = self.svstate.unpack
601 sv_a_nz = yield self.dec2.sv_a_nz
602 fft_mode = yield self.dec2.use_svp64_fft
603 in1 = yield self.dec2.e.read_reg1.data
604 log("SVP64: VL, subvl, srcstep, dststep, ssubstep, dsybstep, sv_a_nz, "
605 "in1 fft, svp64",
606 vl, subvl, srcstep, dststep, ssubstep, dsubstep,
607 sv_a_nz, in1, fft_mode,
608 self.is_svp64_mode)
609
610 # get predicate mask (all 64 bits)
611 srcmask = dstmask = 0xffff_ffff_ffff_ffff
612
613 pmode = yield self.dec2.rm_dec.predmode
614 reverse_gear = yield self.dec2.rm_dec.reverse_gear
615 sv_ptype = yield self.dec2.dec.op.SV_Ptype
616 srcpred = yield self.dec2.rm_dec.srcpred
617 dstpred = yield self.dec2.rm_dec.dstpred
618 pred_src_zero = yield self.dec2.rm_dec.pred_sz
619 pred_dst_zero = yield self.dec2.rm_dec.pred_dz
620 if pmode == SVP64PredMode.INT.value:
621 srcmask = dstmask = get_predint(self.gpr, dstpred)
622 if sv_ptype == SVPtype.P2.value:
623 srcmask = get_predint(self.gpr, srcpred)
624 elif pmode == SVP64PredMode.CR.value:
625 srcmask = dstmask = get_predcr(self.crl, dstpred, vl)
626 if sv_ptype == SVPtype.P2.value:
627 srcmask = get_predcr(self.crl, srcpred, vl)
628 # work out if the ssubsteps are completed
629 ssubstart = ssubstep == 0
630 dsubstart = dsubstep == 0
631 log(" pmode", pmode)
632 log(" pack/unpack", pack, unpack)
633 log(" reverse", reverse_gear)
634 log(" ptype", sv_ptype)
635 log(" srcpred", bin(srcpred))
636 log(" dstpred", bin(dstpred))
637 log(" srcmask", bin(srcmask))
638 log(" dstmask", bin(dstmask))
639 log(" pred_sz", bin(pred_src_zero))
640 log(" pred_dz", bin(pred_dst_zero))
641 log(" ssubstart", ssubstart)
642 log(" dsubstart", dsubstart)
643
644 # okaaay, so here we simply advance srcstep (TODO dststep)
645 # this can ONLY be done at the beginning of the "for" loop
646 # (this is all actually a FSM so it's hell to keep track sigh)
647 srcstep_skip = False
648 if ssubstart:
649 # until the predicate mask has a "1" bit... or we run out of VL
650 # let srcstep==VL be the indicator to move to next instruction
651 if not pred_src_zero:
652 srcstep_skip = True
653
654 # srcstep-skipping opportunity identified
655 if srcstep_skip:
656 while (((1 << srcstep) & srcmask) == 0) and (srcstep != vl):
657 log(" sskip", bin(1 << srcstep))
658 srcstep += 1
659
660 dststep_skip = False
661 if dsubstart:
662 # same for dststep
663 if not pred_dst_zero:
664 dststep_skip = True
665
666 # dststep-skipping opportunity identified
667 if dststep_skip:
668 while (((1 << dststep) & dstmask) == 0) and (dststep != vl):
669 log(" dskip", bin(1 << dststep))
670 dststep += 1
671
672 # now work out if the relevant mask bits require zeroing
673 if pred_dst_zero:
674 pred_dst_zero = ((1 << dststep) & dstmask) == 0
675 if pred_src_zero:
676 pred_src_zero = ((1 << srcstep) & srcmask) == 0
677
678 # store new srcstep / dststep
679 self.new_srcstep, self.new_dststep = (srcstep, dststep)
680 self.new_ssubstep, self.new_dsubstep = (ssubstep, dsubstep)
681 self.pred_dst_zero, self.pred_src_zero = (pred_dst_zero, pred_src_zero)
682 log(" new srcstep", srcstep)
683 log(" new dststep", dststep)
684 log(" new ssubstep", ssubstep)
685 log(" new dsubstep", dsubstep)
686
687
688 class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
689 # decoder2 - an instance of power_decoder2
690 # regfile - a list of initial values for the registers
691 # initial_{etc} - initial values for SPRs, Condition Register, Mem, MSR
692 # respect_pc - tracks the program counter. requires initial_insns
693 def __init__(self, decoder2, regfile, initial_sprs=None, initial_cr=0,
694 initial_mem=None, initial_msr=0,
695 initial_svstate=0,
696 initial_insns=None,
697 fpregfile=None,
698 respect_pc=False,
699 disassembly=None,
700 initial_pc=0,
701 bigendian=False,
702 mmu=False,
703 icachemmu=False):
704
705 self.bigendian = bigendian
706 self.halted = False
707 self.is_svp64_mode = False
708 self.respect_pc = respect_pc
709 if initial_sprs is None:
710 initial_sprs = {}
711 if initial_mem is None:
712 initial_mem = {}
713 if fpregfile is None:
714 fpregfile = [0] * 32
715 if initial_insns is None:
716 initial_insns = {}
717 assert self.respect_pc == False, "instructions required to honor pc"
718
719 log("ISACaller insns", respect_pc, initial_insns, disassembly)
720 log("ISACaller initial_msr", initial_msr)
721
722 # "fake program counter" mode (for unit testing)
723 self.fake_pc = 0
724 disasm_start = 0
725 if not respect_pc:
726 if isinstance(initial_mem, tuple):
727 self.fake_pc = initial_mem[0]
728 disasm_start = self.fake_pc
729 else:
730 disasm_start = initial_pc
731
732 # disassembly: we need this for now (not given from the decoder)
733 self.disassembly = {}
734 if disassembly:
735 for i, code in enumerate(disassembly):
736 self.disassembly[i*4 + disasm_start] = code
737
738 # set up registers, instruction memory, data memory, PC, SPRs, MSR, CR
739 self.svp64rm = SVP64RM()
740 if initial_svstate is None:
741 initial_svstate = 0
742 if isinstance(initial_svstate, int):
743 initial_svstate = SVP64State(initial_svstate)
744 # SVSTATE, MSR and PC
745 self.svstate = initial_svstate
746 self.msr = SelectableInt(initial_msr, 64) # underlying reg
747 self.pc = PC()
748 # GPR FPR SPR registers
749 initial_sprs = deepcopy(initial_sprs) # so as not to get modified
750 self.gpr = GPR(decoder2, self, self.svstate, regfile)
751 self.fpr = GPR(decoder2, self, self.svstate, fpregfile)
752 self.spr = SPR(decoder2, initial_sprs) # initialise SPRs before MMU
753
754 # set up 4 dummy SVSHAPEs if they aren't already set up
755 for i in range(4):
756 sname = 'SVSHAPE%d' % i
757 if sname not in self.spr:
758 val = 0
759 else:
760 val = self.spr[sname].value
761 # make sure it's an SVSHAPE
762 self.spr[sname] = SVSHAPE(val, self.gpr)
763 self.last_op_svshape = False
764
765 # "raw" memory
766 self.mem = Mem(row_bytes=8, initial_mem=initial_mem)
767 self.mem.log_fancy(kind=LogKind.InstrInOuts)
768 self.imem = Mem(row_bytes=4, initial_mem=initial_insns)
769 # MMU mode, redirect underlying Mem through RADIX
770 if mmu:
771 self.mem = RADIX(self.mem, self)
772 if icachemmu:
773 self.imem = RADIX(self.imem, self)
774
775 # TODO, needed here:
776 # FPR (same as GPR except for FP nums)
777 # 4.2.2 p124 FPSCR (definitely "separate" - not in SPR)
778 # note that mffs, mcrfs, mtfsf "manage" this FPSCR
779 # 2.3.1 CR (and sub-fields CR0..CR6 - CR0 SO comes from XER.SO)
780 # note that mfocrf, mfcr, mtcr, mtocrf, mcrxrx "manage" CRs
781 # -- Done
782 # 2.3.2 LR (actually SPR #8) -- Done
783 # 2.3.3 CTR (actually SPR #9) -- Done
784 # 2.3.4 TAR (actually SPR #815)
785 # 3.2.2 p45 XER (actually SPR #1) -- Done
786 # 3.2.3 p46 p232 VRSAVE (actually SPR #256)
787
788 # create CR then allow portions of it to be "selectable" (below)
789 self.cr_fields = CRFields(initial_cr)
790 self.cr = self.cr_fields.cr
791
792 # "undefined", just set to variable-bit-width int (use exts "max")
793 # self.undefined = SelectableInt(0, 256) # TODO, not hard-code 256!
794
795 self.namespace = {}
796 self.namespace.update(self.spr)
797 self.namespace.update({'GPR': self.gpr,
798 'FPR': self.fpr,
799 'MEM': self.mem,
800 'SPR': self.spr,
801 'memassign': self.memassign,
802 'NIA': self.pc.NIA,
803 'CIA': self.pc.CIA,
804 'SVSTATE': self.svstate,
805 'SVSHAPE0': self.spr['SVSHAPE0'],
806 'SVSHAPE1': self.spr['SVSHAPE1'],
807 'SVSHAPE2': self.spr['SVSHAPE2'],
808 'SVSHAPE3': self.spr['SVSHAPE3'],
809 'CR': self.cr,
810 'MSR': self.msr,
811 'undefined': undefined,
812 'mode_is_64bit': True,
813 'SO': XER_bits['SO'],
814 'XLEN': 64 # elwidth overrides, later
815 })
816
817 # update pc to requested start point
818 self.set_pc(initial_pc)
819
820 # field-selectable versions of Condition Register
821 self.crl = self.cr_fields.crl
822 for i in range(8):
823 self.namespace["CR%d" % i] = self.crl[i]
824
825 self.decoder = decoder2.dec
826 self.dec2 = decoder2
827
828 super().__init__(XLEN=self.namespace["XLEN"])
829
830 @property
831 def XLEN(self):
832 return self.namespace["XLEN"]
833
834 def call_trap(self, trap_addr, trap_bit):
835 """calls TRAP and sets up NIA to the new execution location.
836 next instruction will begin at trap_addr.
837 """
838 self.TRAP(trap_addr, trap_bit)
839 self.namespace['NIA'] = self.trap_nia
840 self.pc.update(self.namespace, self.is_svp64_mode)
841
842 def TRAP(self, trap_addr=0x700, trap_bit=PIb.TRAP):
843 """TRAP> saves PC, MSR (and TODO SVSTATE), and updates MSR
844
845 TRAP function is callable from inside the pseudocode itself,
846 hence the default arguments. when calling from inside ISACaller
847 it is best to use call_trap()
848 """
849 # https://bugs.libre-soc.org/show_bug.cgi?id=859
850 kaivb = self.spr['KAIVB'].value
851 msr = self.namespace['MSR'].value
852 log("TRAP:", hex(trap_addr), hex(msr), "kaivb", hex(kaivb))
853 # store CIA(+4?) in SRR0, set NIA to 0x700
854 # store MSR in SRR1, set MSR to um errr something, have to check spec
855 # store SVSTATE (if enabled) in SVSRR0
856 self.spr['SRR0'].value = self.pc.CIA.value
857 self.spr['SRR1'].value = msr
858 if self.is_svp64_mode:
859 self.spr['SVSRR0'] = self.namespace['SVSTATE'].value
860 self.trap_nia = SelectableInt(trap_addr | (kaivb&~0x1fff), 64)
861 self.spr['SRR1'][trap_bit] = 1 # change *copy* of MSR in SRR1
862
863 # set exception bits. TODO: this should, based on the address
864 # in figure 66 p1065 V3.0B and the table figure 65 p1063 set these
865 # bits appropriately. however it turns out that *for now* in all
866 # cases (all trap_addrs) the exact same thing is needed.
867 self.msr[MSRb.IR] = 0
868 self.msr[MSRb.DR] = 0
869 self.msr[MSRb.FE0] = 0
870 self.msr[MSRb.FE1] = 0
871 self.msr[MSRb.EE] = 0
872 self.msr[MSRb.RI] = 0
873 self.msr[MSRb.SF] = 1
874 self.msr[MSRb.TM] = 0
875 self.msr[MSRb.VEC] = 0
876 self.msr[MSRb.VSX] = 0
877 self.msr[MSRb.PR] = 0
878 self.msr[MSRb.FP] = 0
879 self.msr[MSRb.PMM] = 0
880 self.msr[MSRb.TEs] = 0
881 self.msr[MSRb.TEe] = 0
882 self.msr[MSRb.UND] = 0
883 self.msr[MSRb.LE] = 1
884
885 def memassign(self, ea, sz, val):
886 self.mem.memassign(ea, sz, val)
887
888 def prep_namespace(self, insn_name, formname, op_fields):
889 # TODO: get field names from form in decoder*1* (not decoder2)
890 # decoder2 is hand-created, and decoder1.sigform is auto-generated
891 # from spec
892 # then "yield" fields only from op_fields rather than hard-coded
893 # list, here.
894 fields = self.decoder.sigforms[formname]
895 log("prep_namespace", formname, op_fields, insn_name)
896 for name in op_fields:
897 # CR immediates. deal with separately. needs modifying
898 # pseudocode
899 if self.is_svp64_mode and name in ['BI']: # TODO, more CRs
900 # BI is a 5-bit, must reconstruct the value
901 regnum, is_vec = yield from get_pdecode_cr_in(self.dec2, name)
902 sig = getattr(fields, name)
903 val = yield sig
904 # low 2 LSBs (CR field selector) remain same, CR num extended
905 assert regnum <= 7, "sigh, TODO, 128 CR fields"
906 val = (val & 0b11) | (regnum << 2)
907 else:
908 sig = getattr(fields, name)
909 val = yield sig
910 # these are all opcode fields involved in index-selection of CR,
911 # and need to do "standard" arithmetic. CR[BA+32] for example
912 # would, if using SelectableInt, only be 5-bit.
913 if name in ['BF', 'BFA', 'BC', 'BA', 'BB', 'BT', 'BI']:
914 self.namespace[name] = val
915 else:
916 self.namespace[name] = SelectableInt(val, sig.width)
917
918 self.namespace['XER'] = self.spr['XER']
919 self.namespace['CA'] = self.spr['XER'][XER_bits['CA']].value
920 self.namespace['CA32'] = self.spr['XER'][XER_bits['CA32']].value
921
922 # add some SVSTATE convenience variables
923 vl = self.svstate.vl
924 srcstep = self.svstate.srcstep
925 self.namespace['VL'] = vl
926 self.namespace['srcstep'] = srcstep
927
928 # sv.bc* need some extra fields
929 if self.is_svp64_mode and insn_name.startswith("sv.bc"):
930 # blegh grab bits manually
931 mode = yield self.dec2.rm_dec.rm_in.mode
932 bc_vlset = (mode & SVP64MODE.BC_VLSET) != 0
933 bc_vli = (mode & SVP64MODE.BC_VLI) != 0
934 bc_snz = (mode & SVP64MODE.BC_SNZ) != 0
935 bc_vsb = yield self.dec2.rm_dec.bc_vsb
936 bc_lru = yield self.dec2.rm_dec.bc_lru
937 bc_gate = yield self.dec2.rm_dec.bc_gate
938 sz = yield self.dec2.rm_dec.pred_sz
939 self.namespace['ALL'] = SelectableInt(bc_gate, 1)
940 self.namespace['VSb'] = SelectableInt(bc_vsb, 1)
941 self.namespace['LRu'] = SelectableInt(bc_lru, 1)
942 self.namespace['VLSET'] = SelectableInt(bc_vlset, 1)
943 self.namespace['VLI'] = SelectableInt(bc_vli, 1)
944 self.namespace['sz'] = SelectableInt(sz, 1)
945 self.namespace['SNZ'] = SelectableInt(bc_snz, 1)
946
947 def handle_carry_(self, inputs, outputs, already_done):
948 inv_a = yield self.dec2.e.do.invert_in
949 if inv_a:
950 inputs[0] = ~inputs[0]
951
952 imm_ok = yield self.dec2.e.do.imm_data.ok
953 if imm_ok:
954 imm = yield self.dec2.e.do.imm_data.data
955 inputs.append(SelectableInt(imm, 64))
956 assert len(outputs) >= 1
957 log("outputs", repr(outputs))
958 if isinstance(outputs, list) or isinstance(outputs, tuple):
959 output = outputs[0]
960 else:
961 output = outputs
962 gts = []
963 for x in inputs:
964 log("gt input", x, output)
965 gt = (gtu(x, output))
966 gts.append(gt)
967 log(gts)
968 cy = 1 if any(gts) else 0
969 log("CA", cy, gts)
970 if not (1 & already_done):
971 self.spr['XER'][XER_bits['CA']] = cy
972
973 log("inputs", already_done, inputs)
974 # 32 bit carry
975 # ARGH... different for OP_ADD... *sigh*...
976 op = yield self.dec2.e.do.insn_type
977 if op == MicrOp.OP_ADD.value:
978 res32 = (output.value & (1 << 32)) != 0
979 a32 = (inputs[0].value & (1 << 32)) != 0
980 if len(inputs) >= 2:
981 b32 = (inputs[1].value & (1 << 32)) != 0
982 else:
983 b32 = False
984 cy32 = res32 ^ a32 ^ b32
985 log("CA32 ADD", cy32)
986 else:
987 gts = []
988 for x in inputs:
989 log("input", x, output)
990 log(" x[32:64]", x, x[32:64])
991 log(" o[32:64]", output, output[32:64])
992 gt = (gtu(x[32:64], output[32:64])) == SelectableInt(1, 1)
993 gts.append(gt)
994 cy32 = 1 if any(gts) else 0
995 log("CA32", cy32, gts)
996 if not (2 & already_done):
997 self.spr['XER'][XER_bits['CA32']] = cy32
998
999 def handle_overflow(self, inputs, outputs, div_overflow):
1000 if hasattr(self.dec2.e.do, "invert_in"):
1001 inv_a = yield self.dec2.e.do.invert_in
1002 if inv_a:
1003 inputs[0] = ~inputs[0]
1004
1005 imm_ok = yield self.dec2.e.do.imm_data.ok
1006 if imm_ok:
1007 imm = yield self.dec2.e.do.imm_data.data
1008 inputs.append(SelectableInt(imm, 64))
1009 assert len(outputs) >= 1
1010 log("handle_overflow", inputs, outputs, div_overflow)
1011 if len(inputs) < 2 and div_overflow is None:
1012 return
1013
1014 # div overflow is different: it's returned by the pseudo-code
1015 # because it's more complex than can be done by analysing the output
1016 if div_overflow is not None:
1017 ov, ov32 = div_overflow, div_overflow
1018 # arithmetic overflow can be done by analysing the input and output
1019 elif len(inputs) >= 2:
1020 output = outputs[0]
1021
1022 # OV (64-bit)
1023 input_sgn = [exts(x.value, x.bits) < 0 for x in inputs]
1024 output_sgn = exts(output.value, output.bits) < 0
1025 ov = 1 if input_sgn[0] == input_sgn[1] and \
1026 output_sgn != input_sgn[0] else 0
1027
1028 # OV (32-bit)
1029 input32_sgn = [exts(x.value, 32) < 0 for x in inputs]
1030 output32_sgn = exts(output.value, 32) < 0
1031 ov32 = 1 if input32_sgn[0] == input32_sgn[1] and \
1032 output32_sgn != input32_sgn[0] else 0
1033
1034 # now update XER OV/OV32/SO
1035 so = self.spr['XER'][XER_bits['SO']]
1036 new_so = so | ov # sticky overflow ORs in old with new
1037 self.spr['XER'][XER_bits['OV']] = ov
1038 self.spr['XER'][XER_bits['OV32']] = ov32
1039 self.spr['XER'][XER_bits['SO']] = new_so
1040 log(" set overflow", ov, ov32, so, new_so)
1041
1042 def handle_comparison(self, outputs, cr_idx=0, overflow=None, no_so=False):
1043 out = outputs[0]
1044 assert isinstance(out, SelectableInt), \
1045 "out zero not a SelectableInt %s" % repr(outputs)
1046 log("handle_comparison", out.bits, hex(out.value))
1047 # TODO - XXX *processor* in 32-bit mode
1048 # https://bugs.libre-soc.org/show_bug.cgi?id=424
1049 # if is_32bit:
1050 # o32 = exts(out.value, 32)
1051 # print ("handle_comparison exts 32 bit", hex(o32))
1052 out = exts(out.value, out.bits)
1053 log("handle_comparison exts", hex(out))
1054 # create the three main CR flags, EQ GT LT
1055 zero = SelectableInt(out == 0, 1)
1056 positive = SelectableInt(out > 0, 1)
1057 negative = SelectableInt(out < 0, 1)
1058 # get (or not) XER.SO. for setvl this is important *not* to read SO
1059 if no_so:
1060 SO = SelectableInt(1, 0)
1061 else:
1062 SO = self.spr['XER'][XER_bits['SO']]
1063 log("handle_comparison SO overflow", SO, overflow)
1064 # alternative overflow checking (setvl mainly at the moment)
1065 if overflow is not None and overflow == 1:
1066 SO = SelectableInt(1, 1)
1067 # create the four CR field values and set the required CR field
1068 cr_field = selectconcat(negative, positive, zero, SO)
1069 log("handle_comparison cr_field", self.cr, cr_idx, cr_field)
1070 self.crl[cr_idx].eq(cr_field)
1071
1072 def set_pc(self, pc_val):
1073 self.namespace['NIA'] = SelectableInt(pc_val, 64)
1074 self.pc.update(self.namespace, self.is_svp64_mode)
1075
1076 def get_next_insn(self):
1077 """check instruction
1078 """
1079 if self.respect_pc:
1080 pc = self.pc.CIA.value
1081 else:
1082 pc = self.fake_pc
1083 ins = self.imem.ld(pc, 4, False, True, instr_fetch=True)
1084 if ins is None:
1085 raise KeyError("no instruction at 0x%x" % pc)
1086 return pc, ins
1087
1088 def setup_one(self):
1089 """set up one instruction
1090 """
1091 pc, insn = self.get_next_insn()
1092 yield from self.setup_next_insn(pc, insn)
1093
1094 def setup_next_insn(self, pc, ins):
1095 """set up next instruction
1096 """
1097 self._pc = pc
1098 log("setup: 0x%x 0x%x %s" % (pc, ins & 0xffffffff, bin(ins)))
1099 log("CIA NIA", self.respect_pc, self.pc.CIA.value, self.pc.NIA.value)
1100
1101 yield self.dec2.sv_rm.eq(0)
1102 yield self.dec2.dec.raw_opcode_in.eq(ins & 0xffffffff)
1103 yield self.dec2.dec.bigendian.eq(self.bigendian)
1104 yield self.dec2.state.msr.eq(self.msr.value)
1105 yield self.dec2.state.pc.eq(pc)
1106 if self.svstate is not None:
1107 yield self.dec2.state.svstate.eq(self.svstate.value)
1108
1109 # SVP64. first, check if the opcode is EXT001, and SVP64 id bits set
1110 yield Settle()
1111 opcode = yield self.dec2.dec.opcode_in
1112 opcode = SelectableInt(value=opcode, bits=32)
1113 pfx = SVP64Instruction.Prefix(opcode)
1114 log("prefix test: opcode:", pfx.po, bin(pfx.po), pfx.id)
1115 self.is_svp64_mode = bool((pfx.po == 0b000001) and (pfx.id == 0b11))
1116 self.pc.update_nia(self.is_svp64_mode)
1117 # set SVP64 decode
1118 yield self.dec2.is_svp64_mode.eq(self.is_svp64_mode)
1119 self.namespace['NIA'] = self.pc.NIA
1120 self.namespace['SVSTATE'] = self.svstate
1121 if not self.is_svp64_mode:
1122 return
1123
1124 # in SVP64 mode. decode/print out svp64 prefix, get v3.0B instruction
1125 log("svp64.rm", bin(pfx.rm))
1126 log(" svstate.vl", self.svstate.vl)
1127 log(" svstate.mvl", self.svstate.maxvl)
1128 ins = self.imem.ld(pc+4, 4, False, True, instr_fetch=True)
1129 log(" svsetup: 0x%x 0x%x %s" % (pc+4, ins & 0xffffffff, bin(ins)))
1130 yield self.dec2.dec.raw_opcode_in.eq(ins & 0xffffffff) # v3.0B suffix
1131 yield self.dec2.sv_rm.eq(int(pfx.rm)) # svp64 prefix
1132 yield Settle()
1133
1134 def execute_one(self):
1135 """execute one instruction
1136 """
1137 # get the disassembly code for this instruction
1138 if self.is_svp64_mode:
1139 if not self.disassembly:
1140 code = yield from self.get_assembly_name()
1141 else:
1142 code = self.disassembly[self._pc+4]
1143 log(" svp64 sim-execute", hex(self._pc), code)
1144 else:
1145 if not self.disassembly:
1146 code = yield from self.get_assembly_name()
1147 else:
1148 code = self.disassembly[self._pc]
1149 log("sim-execute", hex(self._pc), code)
1150 opname = code.split(' ')[0]
1151 try:
1152 yield from self.call(opname) # execute the instruction
1153 except MemException as e: # check for memory errors
1154 if e.args[0] == 'unaligned': # alignment error
1155 # run a Trap but set DAR first
1156 print("memory unaligned exception, DAR", e.dar)
1157 self.spr['DAR'] = SelectableInt(e.dar, 64)
1158 self.call_trap(0x600, PIb.PRIV) # 0x600, privileged
1159 return
1160 elif e.args[0] == 'invalid': # invalid
1161 # run a Trap but set DAR first
1162 log("RADIX MMU memory invalid error, mode %s" % e.mode)
1163 if e.mode == 'EXECUTE':
1164 # XXX TODO: must set a few bits in SRR1,
1165 # see microwatt loadstore1.vhdl
1166 # if m_in.segerr = '0' then
1167 # v.srr1(47 - 33) := m_in.invalid;
1168 # v.srr1(47 - 35) := m_in.perm_error; -- noexec fault
1169 # v.srr1(47 - 44) := m_in.badtree;
1170 # v.srr1(47 - 45) := m_in.rc_error;
1171 # v.intr_vec := 16#400#;
1172 # else
1173 # v.intr_vec := 16#480#;
1174 self.call_trap(0x400, PIb.PRIV) # 0x400, privileged
1175 else:
1176 self.call_trap(0x300, PIb.PRIV) # 0x300, privileged
1177 return
1178 # not supported yet:
1179 raise e # ... re-raise
1180
1181 # don't use this except in special circumstances
1182 if not self.respect_pc:
1183 self.fake_pc += 4
1184
1185 log("execute one, CIA NIA", hex(self.pc.CIA.value),
1186 hex(self.pc.NIA.value))
1187
1188 def get_assembly_name(self):
1189 # TODO, asmregs is from the spec, e.g. add RT,RA,RB
1190 # see http://bugs.libre-riscv.org/show_bug.cgi?id=282
1191 dec_insn = yield self.dec2.e.do.insn
1192 insn_1_11 = yield self.dec2.e.do.insn[1:11]
1193 asmcode = yield self.dec2.dec.op.asmcode
1194 int_op = yield self.dec2.dec.op.internal_op
1195 log("get assembly name asmcode", asmcode, int_op,
1196 hex(dec_insn), bin(insn_1_11))
1197 asmop = insns.get(asmcode, None)
1198
1199 # sigh reconstruct the assembly instruction name
1200 if hasattr(self.dec2.e.do, "oe"):
1201 ov_en = yield self.dec2.e.do.oe.oe
1202 ov_ok = yield self.dec2.e.do.oe.ok
1203 else:
1204 ov_en = False
1205 ov_ok = False
1206 if hasattr(self.dec2.e.do, "rc"):
1207 rc_en = yield self.dec2.e.do.rc.rc
1208 rc_ok = yield self.dec2.e.do.rc.ok
1209 else:
1210 rc_en = False
1211 rc_ok = False
1212 # grrrr have to special-case MUL op (see DecodeOE)
1213 log("ov %d en %d rc %d en %d op %d" %
1214 (ov_ok, ov_en, rc_ok, rc_en, int_op))
1215 if int_op in [MicrOp.OP_MUL_H64.value, MicrOp.OP_MUL_H32.value]:
1216 log("mul op")
1217 if rc_en & rc_ok:
1218 asmop += "."
1219 else:
1220 if not asmop.endswith("."): # don't add "." to "andis."
1221 if rc_en & rc_ok:
1222 asmop += "."
1223 if hasattr(self.dec2.e.do, "lk"):
1224 lk = yield self.dec2.e.do.lk
1225 if lk:
1226 asmop += "l"
1227 log("int_op", int_op)
1228 if int_op in [MicrOp.OP_B.value, MicrOp.OP_BC.value]:
1229 AA = yield self.dec2.dec.fields.FormI.AA[0:-1]
1230 log("AA", AA)
1231 if AA:
1232 asmop += "a"
1233 spr_msb = yield from self.get_spr_msb()
1234 if int_op == MicrOp.OP_MFCR.value:
1235 if spr_msb:
1236 asmop = 'mfocrf'
1237 else:
1238 asmop = 'mfcr'
1239 # XXX TODO: for whatever weird reason this doesn't work
1240 # https://bugs.libre-soc.org/show_bug.cgi?id=390
1241 if int_op == MicrOp.OP_MTCRF.value:
1242 if spr_msb:
1243 asmop = 'mtocrf'
1244 else:
1245 asmop = 'mtcrf'
1246 return asmop
1247
1248 def get_remap_indices(self):
1249 """WARNING, this function stores remap_idxs and remap_loopends
1250 in the class for later use. this to avoid problems with yield
1251 """
1252 # go through all iterators in lock-step, advance to next remap_idx
1253 srcstep, dststep, ssubstep, dsubstep = self.get_src_dststeps()
1254 # get four SVSHAPEs. here we are hard-coding
1255 SVSHAPE0 = self.spr['SVSHAPE0']
1256 SVSHAPE1 = self.spr['SVSHAPE1']
1257 SVSHAPE2 = self.spr['SVSHAPE2']
1258 SVSHAPE3 = self.spr['SVSHAPE3']
1259 # set up the iterators
1260 remaps = [(SVSHAPE0, SVSHAPE0.get_iterator()),
1261 (SVSHAPE1, SVSHAPE1.get_iterator()),
1262 (SVSHAPE2, SVSHAPE2.get_iterator()),
1263 (SVSHAPE3, SVSHAPE3.get_iterator()),
1264 ]
1265
1266 self.remap_loopends = [0] * 4
1267 self.remap_idxs = [0, 1, 2, 3]
1268 dbg = []
1269 for i, (shape, remap) in enumerate(remaps):
1270 # zero is "disabled"
1271 if shape.value == 0x0:
1272 self.remap_idxs[i] = 0
1273 # pick src or dststep depending on reg num (0-2=in, 3-4=out)
1274 step = dststep if (i in [3, 4]) else srcstep
1275 # this is terrible. O(N^2) looking for the match. but hey.
1276 for idx, (remap_idx, loopends) in enumerate(remap):
1277 if idx == step:
1278 break
1279 self.remap_idxs[i] = remap_idx
1280 self.remap_loopends[i] = loopends
1281 dbg.append((i, step, remap_idx, loopends))
1282 for (i, step, remap_idx, loopends) in dbg:
1283 log("SVSHAPE %d idx, end" % i, step, remap_idx, bin(loopends))
1284 return remaps
1285
1286 def get_spr_msb(self):
1287 dec_insn = yield self.dec2.e.do.insn
1288 return dec_insn & (1 << 20) != 0 # sigh - XFF.spr[-1]?
1289
1290 def call(self, name):
1291 """call(opcode) - the primary execution point for instructions
1292 """
1293 self.last_st_addr = None # reset the last known store address
1294 self.last_ld_addr = None # etc.
1295
1296 ins_name = name.strip() # remove spaces if not already done so
1297 if self.halted:
1298 log("halted - not executing", ins_name)
1299 return
1300
1301 # TODO, asmregs is from the spec, e.g. add RT,RA,RB
1302 # see http://bugs.libre-riscv.org/show_bug.cgi?id=282
1303 asmop = yield from self.get_assembly_name()
1304 log("call", ins_name, asmop)
1305
1306 # sv.setvl is *not* a loop-function. sigh
1307 log("is_svp64_mode", self.is_svp64_mode, asmop)
1308
1309 # check privileged
1310 int_op = yield self.dec2.dec.op.internal_op
1311 spr_msb = yield from self.get_spr_msb()
1312
1313 instr_is_privileged = False
1314 if int_op in [MicrOp.OP_ATTN.value,
1315 MicrOp.OP_MFMSR.value,
1316 MicrOp.OP_MTMSR.value,
1317 MicrOp.OP_MTMSRD.value,
1318 # TODO: OP_TLBIE
1319 MicrOp.OP_RFID.value]:
1320 instr_is_privileged = True
1321 if int_op in [MicrOp.OP_MFSPR.value,
1322 MicrOp.OP_MTSPR.value] and spr_msb:
1323 instr_is_privileged = True
1324
1325 log("is priv", instr_is_privileged, hex(self.msr.value),
1326 self.msr[MSRb.PR])
1327 # check MSR priv bit and whether op is privileged: if so, throw trap
1328 if instr_is_privileged and self.msr[MSRb.PR] == 1:
1329 self.call_trap(0x700, PIb.PRIV)
1330 return
1331
1332 # check halted condition
1333 if ins_name == 'attn':
1334 self.halted = True
1335 return
1336
1337 # check illegal instruction
1338 illegal = False
1339 if ins_name not in ['mtcrf', 'mtocrf']:
1340 illegal = ins_name != asmop
1341
1342 # list of instructions not being supported by binutils (.long)
1343 dotstrp = asmop[:-1] if asmop[-1] == '.' else asmop
1344 if dotstrp in [*FPTRANS_INSNS,
1345 'ffmadds', 'fdmadds', 'ffadds',
1346 'mins', 'maxs', 'minu', 'maxu',
1347 'setvl', 'svindex', 'svremap', 'svstep',
1348 'svshape', 'svshape2',
1349 'grev', 'ternlogi', 'bmask', 'cprop',
1350 'absdu', 'absds', 'absdacs', 'absdacu', 'avgadd',
1351 'fmvis', 'fishmv',
1352 ]:
1353 illegal = False
1354 ins_name = dotstrp
1355
1356 # branch-conditional redirects to sv.bc
1357 if asmop.startswith('bc') and self.is_svp64_mode:
1358 ins_name = 'sv.%s' % ins_name
1359
1360 log(" post-processed name", dotstrp, ins_name, asmop)
1361
1362 # illegal instructions call TRAP at 0x700
1363 if illegal:
1364 print("illegal", ins_name, asmop)
1365 self.call_trap(0x700, PIb.ILLEG)
1366 print("name %s != %s - calling ILLEGAL trap, PC: %x" %
1367 (ins_name, asmop, self.pc.CIA.value))
1368 return
1369
1370 # this is for setvl "Vertical" mode: if set true,
1371 # srcstep/dststep is explicitly advanced. mode says which SVSTATE to
1372 # test for Rc=1 end condition. 3 bits of all 3 loops are put into CR0
1373 self.allow_next_step_inc = False
1374 self.svstate_next_mode = 0
1375
1376 # nop has to be supported, we could let the actual op calculate
1377 # but PowerDecoder has a pattern for nop
1378 if ins_name == 'nop':
1379 self.update_pc_next()
1380 return
1381
1382 # look up instruction in ISA.instrs, prepare namespace
1383 info = self.instrs[ins_name]
1384 yield from self.prep_namespace(ins_name, info.form, info.op_fields)
1385
1386 # preserve order of register names
1387 input_names = create_args(list(info.read_regs) +
1388 list(info.uninit_regs))
1389 log("input names", input_names)
1390
1391 # get SVP64 entry for the current instruction
1392 sv_rm = self.svp64rm.instrs.get(ins_name)
1393 if sv_rm is not None:
1394 dest_cr, src_cr, src_byname, dest_byname = decode_extra(sv_rm)
1395 else:
1396 dest_cr, src_cr, src_byname, dest_byname = False, False, {}, {}
1397 log("sv rm", sv_rm, dest_cr, src_cr, src_byname, dest_byname)
1398
1399 # see if srcstep/dststep need skipping over masked-out predicate bits
1400 if (self.is_svp64_mode or ins_name in ['setvl', 'svremap', 'svstate']):
1401 yield from self.svstate_pre_inc()
1402 if self.is_svp64_mode:
1403 pre = yield from self.update_new_svstate_steps()
1404 if pre:
1405 self.svp64_reset_loop()
1406 self.update_nia()
1407 self.update_pc_next()
1408 return
1409 srcstep, dststep, ssubstep, dsubstep = self.get_src_dststeps()
1410 pred_dst_zero = self.pred_dst_zero
1411 pred_src_zero = self.pred_src_zero
1412 vl = self.svstate.vl
1413 subvl = yield self.dec2.rm_dec.rm_in.subvl
1414
1415 # VL=0 in SVP64 mode means "do nothing: skip instruction"
1416 if self.is_svp64_mode and vl == 0:
1417 self.pc.update(self.namespace, self.is_svp64_mode)
1418 log("SVP64: VL=0, end of call", self.namespace['CIA'],
1419 self.namespace['NIA'], kind=LogKind.InstrInOuts)
1420 return
1421
1422 # for when SVREMAP is active, using pre-arranged schedule.
1423 # note: modifying PowerDecoder2 needs to "settle"
1424 remap_en = self.svstate.SVme
1425 persist = self.svstate.RMpst
1426 active = (persist or self.last_op_svshape) and remap_en != 0
1427 if self.is_svp64_mode:
1428 yield self.dec2.remap_active.eq(remap_en if active else 0)
1429 yield Settle()
1430 if persist or self.last_op_svshape:
1431 remaps = self.get_remap_indices()
1432 if self.is_svp64_mode and (persist or self.last_op_svshape):
1433 yield from self.remap_set_steps(remaps)
1434 # after that, settle down (combinatorial) to let Vector reg numbers
1435 # work themselves out
1436 yield Settle()
1437 if self.is_svp64_mode:
1438 remap_active = yield self.dec2.remap_active
1439 else:
1440 remap_active = False
1441 log("remap active", bin(remap_active))
1442
1443 # main input registers (RT, RA ...)
1444 inputs = []
1445 for name in input_names:
1446 log("name", name)
1447 regval = (yield from self.get_input(name))
1448 log("regval", regval)
1449 inputs.append(regval)
1450
1451 # arrrrgh, awful hack, to get _RT into namespace
1452 if ins_name in ['setvl', 'svstep']:
1453 regname = "_RT"
1454 RT = yield self.dec2.dec.RT
1455 self.namespace[regname] = SelectableInt(RT, 5)
1456 if RT == 0:
1457 self.namespace["RT"] = SelectableInt(0, 5)
1458 regnum, is_vec = yield from get_pdecode_idx_out(self.dec2, "RT")
1459 log('hack input reg %s %s' % (name, str(regnum)), is_vec)
1460
1461 # in SVP64 mode for LD/ST work out immediate
1462 # XXX TODO: replace_ds for DS-Form rather than D-Form.
1463 # use info.form to detect
1464 if self.is_svp64_mode:
1465 yield from self.check_replace_d(info, remap_active)
1466
1467 # "special" registers
1468 for special in info.special_regs:
1469 if special in special_sprs:
1470 inputs.append(self.spr[special])
1471 else:
1472 inputs.append(self.namespace[special])
1473
1474 # clear trap (trap) NIA
1475 self.trap_nia = None
1476
1477 # check if this was an sv.bc* and create an indicator that
1478 # this is the last check to be made as a loop. combined with
1479 # the ALL/ANY mode we can early-exit
1480 if self.is_svp64_mode and ins_name.startswith("sv.bc"):
1481 no_in_vec = yield self.dec2.no_in_vec # BI is scalar
1482 # XXX TODO - pack/unpack here
1483 end_loop = no_in_vec or srcstep == vl-1 or dststep == vl-1
1484 self.namespace['end_loop'] = SelectableInt(end_loop, 1)
1485
1486 # execute actual instruction here (finally)
1487 log("inputs", inputs)
1488 results = info.func(self, *inputs)
1489 log("results", results)
1490
1491 # "inject" decorator takes namespace from function locals: we need to
1492 # overwrite NIA being overwritten (sigh)
1493 if self.trap_nia is not None:
1494 self.namespace['NIA'] = self.trap_nia
1495
1496 log("after func", self.namespace['CIA'], self.namespace['NIA'])
1497
1498 # check if op was a LD/ST so that debugging can check the
1499 # address
1500 if int_op in [MicrOp.OP_STORE.value,
1501 ]:
1502 self.last_st_addr = self.mem.last_st_addr
1503 if int_op in [MicrOp.OP_LOAD.value,
1504 ]:
1505 self.last_ld_addr = self.mem.last_ld_addr
1506 log("op", int_op, MicrOp.OP_STORE.value, MicrOp.OP_LOAD.value,
1507 self.last_st_addr, self.last_ld_addr)
1508
1509 # detect if CA/CA32 already in outputs (sra*, basically)
1510 already_done = 0
1511 if info.write_regs:
1512 output_names = create_args(info.write_regs)
1513 for name in output_names:
1514 if name == 'CA':
1515 already_done |= 1
1516 if name == 'CA32':
1517 already_done |= 2
1518
1519 log("carry already done?", bin(already_done))
1520 if hasattr(self.dec2.e.do, "output_carry"):
1521 carry_en = yield self.dec2.e.do.output_carry
1522 else:
1523 carry_en = False
1524 if carry_en:
1525 yield from self.handle_carry_(inputs, results, already_done)
1526
1527 # check if one of the regs was named "overflow"
1528 overflow = None
1529 if info.write_regs:
1530 for name, output in zip(output_names, results):
1531 if name == 'overflow':
1532 overflow = output
1533
1534 if not self.is_svp64_mode: # yeah just no. not in parallel processing
1535 # detect if overflow was in return result
1536 if hasattr(self.dec2.e.do, "oe"):
1537 ov_en = yield self.dec2.e.do.oe.oe
1538 ov_ok = yield self.dec2.e.do.oe.ok
1539 else:
1540 ov_en = False
1541 ov_ok = False
1542 log("internal overflow", ins_name, overflow, "en?", ov_en, ov_ok)
1543 if ov_en & ov_ok:
1544 yield from self.handle_overflow(inputs, results, overflow)
1545
1546 # only do SVP64 dest predicated Rc=1 if dest-pred is not enabled
1547 rc_en = False
1548 if not self.is_svp64_mode or not pred_dst_zero:
1549 if hasattr(self.dec2.e.do, "rc"):
1550 rc_en = yield self.dec2.e.do.rc.rc
1551 if rc_en and ins_name not in ['svstep']:
1552 if ins_name.startswith("f"):
1553 rc_reg = "CR1" # not calculated correctly yet (not FP compares)
1554 else:
1555 rc_reg = "CR0"
1556 regnum, is_vec = yield from get_pdecode_cr_out(self.dec2, rc_reg)
1557 cmps = results
1558 # hang on... for `setvl` actually you want to test SVSTATE.VL
1559 is_setvl = ins_name == 'setvl'
1560 if is_setvl:
1561 vl = results[0].vl
1562 cmps = (SelectableInt(vl, 64), overflow,)
1563 else:
1564 overflow = None # do not override overflow except in setvl
1565 self.handle_comparison(cmps, regnum, overflow, no_so=is_setvl)
1566
1567 # any modified return results?
1568 if info.write_regs:
1569 for name, output in zip(output_names, results):
1570 yield from self.check_write(info, name, output, carry_en)
1571
1572 nia_update = (yield from self.check_step_increment(results, rc_en,
1573 asmop, ins_name))
1574 if nia_update:
1575 self.update_pc_next()
1576
1577 def check_replace_d(self, info, remap_active):
1578 replace_d = False # update / replace constant in pseudocode
1579 ldstmode = yield self.dec2.rm_dec.ldstmode
1580 vl = self.svstate.vl
1581 subvl = yield self.dec2.rm_dec.rm_in.subvl
1582 srcstep, dststep = self.new_srcstep, self.new_dststep
1583 ssubstep, dsubstep = self.new_ssubstep, self.new_dsubstep
1584 if info.form == 'DS':
1585 # DS-Form, multiply by 4 then knock 2 bits off after
1586 imm = yield self.dec2.dec.fields.FormDS.DS[0:14] * 4
1587 else:
1588 imm = yield self.dec2.dec.fields.FormD.D[0:16]
1589 imm = exts(imm, 16) # sign-extend to integer
1590 # get the right step. LD is from srcstep, ST is dststep
1591 op = yield self.dec2.e.do.insn_type
1592 offsmul = 0
1593 if op == MicrOp.OP_LOAD.value:
1594 if remap_active:
1595 offsmul = yield self.dec2.in1_step
1596 log("D-field REMAP src", imm, offsmul)
1597 else:
1598 offsmul = (srcstep * (subvl+1)) + ssubstep
1599 log("D-field src", imm, offsmul)
1600 elif op == MicrOp.OP_STORE.value:
1601 # XXX NOTE! no bit-reversed STORE! this should not ever be used
1602 offsmul = (dststep * (subvl+1)) + dsubstep
1603 log("D-field dst", imm, offsmul)
1604 # Unit-Strided LD/ST adds offset*width to immediate
1605 if ldstmode == SVP64LDSTmode.UNITSTRIDE.value:
1606 ldst_len = yield self.dec2.e.do.data_len
1607 imm = SelectableInt(imm + offsmul * ldst_len, 32)
1608 replace_d = True
1609 # Element-strided multiplies the immediate by element step
1610 elif ldstmode == SVP64LDSTmode.ELSTRIDE.value:
1611 imm = SelectableInt(imm * offsmul, 32)
1612 replace_d = True
1613 if replace_d:
1614 ldst_ra_vec = yield self.dec2.rm_dec.ldst_ra_vec
1615 ldst_imz_in = yield self.dec2.rm_dec.ldst_imz_in
1616 log("LDSTmode", SVP64LDSTmode(ldstmode),
1617 offsmul, imm, ldst_ra_vec, ldst_imz_in)
1618 # new replacement D... errr.. DS
1619 if replace_d:
1620 if info.form == 'DS':
1621 # TODO: assert 2 LSBs are zero?
1622 log("DS-Form, TODO, assert 2 LSBs zero?", bin(imm.value))
1623 imm.value = imm.value >> 2
1624 self.namespace['DS'] = imm
1625 else:
1626 self.namespace['D'] = imm
1627
1628 def get_input(self, name):
1629 # using PowerDecoder2, first, find the decoder index.
1630 # (mapping name RA RB RC RS to in1, in2, in3)
1631 regnum, is_vec = yield from get_pdecode_idx_in(self.dec2, name)
1632 if regnum is None:
1633 # doing this is not part of svp64, it's because output
1634 # registers, to be modified, need to be in the namespace.
1635 regnum, is_vec = yield from get_pdecode_idx_out(self.dec2, name)
1636 if regnum is None:
1637 regnum, is_vec = yield from get_pdecode_idx_out2(self.dec2, name)
1638
1639 # in case getting the register number is needed, _RA, _RB
1640 regname = "_" + name
1641 self.namespace[regname] = regnum
1642 if not self.is_svp64_mode or not self.pred_src_zero:
1643 log('reading reg %s %s' % (name, str(regnum)), is_vec)
1644 if name in fregs:
1645 reg_val = SelectableInt(self.fpr(regnum))
1646 log("read reg %d: 0x%x" % (regnum, reg_val.value))
1647 elif name is not None:
1648 reg_val = SelectableInt(self.gpr(regnum))
1649 log("read reg %d: 0x%x" % (regnum, reg_val.value))
1650 else:
1651 log('zero input reg %s %s' % (name, str(regnum)), is_vec)
1652 reg_val = 0
1653 return reg_val
1654
1655 def remap_set_steps(self, remaps):
1656 """remap_set_steps sets up the in1/2/3 and out1/2 steps.
1657 they work in concert with PowerDecoder2 at the moment,
1658 there is no HDL implementation of REMAP. therefore this
1659 function, because ISACaller still uses PowerDecoder2,
1660 will *explicitly* write the dec2.XX_step values. this has
1661 to get sorted out.
1662 """
1663 # just some convenient debug info
1664 for i in range(4):
1665 sname = 'SVSHAPE%d' % i
1666 shape = self.spr[sname]
1667 log(sname, bin(shape.value))
1668 log(" lims", shape.lims)
1669 log(" mode", shape.mode)
1670 log(" skip", shape.skip)
1671
1672 # set up the list of steps to remap
1673 mi0 = self.svstate.mi0
1674 mi1 = self.svstate.mi1
1675 mi2 = self.svstate.mi2
1676 mo0 = self.svstate.mo0
1677 mo1 = self.svstate.mo1
1678 steps = [(self.dec2.in1_step, mi0), # RA
1679 (self.dec2.in2_step, mi1), # RB
1680 (self.dec2.in3_step, mi2), # RC
1681 (self.dec2.o_step, mo0), # RT
1682 (self.dec2.o2_step, mo1), # EA
1683 ]
1684 remap_idxs = self.remap_idxs
1685 rremaps = []
1686 # now cross-index the required SHAPE for each of 3-in 2-out regs
1687 rnames = ['RA', 'RB', 'RC', 'RT', 'EA']
1688 for i, (dstep, shape_idx) in enumerate(steps):
1689 (shape, remap) = remaps[shape_idx]
1690 remap_idx = remap_idxs[shape_idx]
1691 # zero is "disabled"
1692 if shape.value == 0x0:
1693 continue
1694 # now set the actual requested step to the current index
1695 yield dstep.eq(remap_idx)
1696
1697 # debug printout info
1698 rremaps.append((shape.mode, i, rnames[i], shape_idx, remap_idx))
1699 for x in rremaps:
1700 log("shape remap", x)
1701
1702 def check_write(self, info, name, output, carry_en):
1703 if name == 'overflow': # ignore, done already (above)
1704 return
1705 if isinstance(output, int):
1706 output = SelectableInt(output, 256)
1707 if name in ['CA', 'CA32']:
1708 if carry_en:
1709 log("writing %s to XER" % name, output)
1710 log("write XER %s 0x%x" % (name, output.value))
1711 self.spr['XER'][XER_bits[name]] = output.value
1712 else:
1713 log("NOT writing %s to XER" % name, output)
1714 elif name in info.special_regs:
1715 log('writing special %s' % name, output, special_sprs)
1716 log("write reg %s 0x%x" % (name, output.value))
1717 if name in special_sprs:
1718 self.spr[name] = output
1719 else:
1720 self.namespace[name].eq(output)
1721 if name == 'MSR':
1722 log('msr written', hex(self.msr.value))
1723 else:
1724 regnum, is_vec = yield from get_pdecode_idx_out(self.dec2, name)
1725 if regnum is None:
1726 regnum, is_vec = yield from get_pdecode_idx_out2(
1727 self.dec2, name)
1728 if regnum is None:
1729 # temporary hack for not having 2nd output
1730 regnum = yield getattr(self.decoder, name)
1731 is_vec = False
1732 if self.is_svp64_mode and self.pred_dst_zero:
1733 log('zeroing reg %d %s' % (regnum, str(output)),
1734 is_vec)
1735 output = SelectableInt(0, 256)
1736 else:
1737 if name in fregs:
1738 reg_prefix = 'f'
1739 else:
1740 reg_prefix = 'r'
1741 log("write reg %s%d %0xx" % (reg_prefix, regnum, output.value))
1742 if output.bits > 64:
1743 output = SelectableInt(output.value, 64)
1744 if name in fregs:
1745 self.fpr[regnum] = output
1746 else:
1747 self.gpr[regnum] = output
1748
1749 def check_step_increment(self, results, rc_en, asmop, ins_name):
1750 # check if it is the SVSTATE.src/dest step that needs incrementing
1751 # this is our Sub-Program-Counter loop from 0 to VL-1
1752 pre = False
1753 post = False
1754 nia_update = True
1755 if self.allow_next_step_inc:
1756 log("SVSTATE_NEXT: inc requested, mode",
1757 self.svstate_next_mode, self.allow_next_step_inc)
1758 yield from self.svstate_pre_inc()
1759 pre = yield from self.update_new_svstate_steps()
1760 if pre:
1761 # reset at end of loop including exit Vertical Mode
1762 log("SVSTATE_NEXT: end of loop, reset")
1763 self.svp64_reset_loop()
1764 self.svstate.vfirst = 0
1765 self.update_nia()
1766 if not rc_en:
1767 return True
1768 results = [SelectableInt(0, 64)]
1769 self.handle_comparison(results) # CR0
1770 return True
1771 if self.allow_next_step_inc == 2:
1772 log("SVSTATE_NEXT: read")
1773 nia_update = (yield from self.svstate_post_inc(ins_name))
1774 else:
1775 log("SVSTATE_NEXT: post-inc")
1776 # use actual src/dst-step here to check end, do NOT
1777 # use bit-reversed version
1778 srcstep, dststep = self.new_srcstep, self.new_dststep
1779 ssubstep, dsubstep = self.new_ssubstep, self.new_dsubstep
1780 remaps = self.get_remap_indices()
1781 remap_idxs = self.remap_idxs
1782 vl = self.svstate.vl
1783 subvl = yield self.dec2.rm_dec.rm_in.subvl
1784 end_src = srcstep == vl-1
1785 end_dst = dststep == vl-1
1786 if self.allow_next_step_inc != 2:
1787 yield from self.advance_svstate_steps(end_src, end_dst)
1788 #self.namespace['SVSTATE'] = self.svstate.spr
1789 # set CR0 (if Rc=1) based on end
1790 if rc_en:
1791 endtest = 1 if (end_src or end_dst) else 0
1792 #results = [SelectableInt(endtest, 64)]
1793 # self.handle_comparison(results) # CR0
1794
1795 # see if svstep was requested, if so, which SVSTATE
1796 endings = 0b111
1797 if self.svstate_next_mode > 0:
1798 shape_idx = self.svstate_next_mode.value-1
1799 endings = self.remap_loopends[shape_idx]
1800 cr_field = SelectableInt((~endings) << 1 | endtest, 4)
1801 log("svstep Rc=1, CR0", cr_field)
1802 self.crl[0].eq(cr_field) # CR0
1803 if end_src or end_dst:
1804 # reset at end of loop including exit Vertical Mode
1805 log("SVSTATE_NEXT: after increments, reset")
1806 self.svp64_reset_loop()
1807 self.svstate.vfirst = 0
1808 return nia_update
1809
1810 if self.is_svp64_mode:
1811 return (yield from self.svstate_post_inc(ins_name))
1812
1813 # XXX only in non-SVP64 mode!
1814 # record state of whether the current operation was an svshape,
1815 # OR svindex!
1816 # to be able to know if it should apply in the next instruction.
1817 # also (if going to use this instruction) should disable ability
1818 # to interrupt in between. sigh.
1819 self.last_op_svshape = asmop in ['svremap', 'svindex', 'svshape2']
1820
1821 return True
1822
1823 def SVSTATE_NEXT(self, mode, submode):
1824 """explicitly moves srcstep/dststep on to next element, for
1825 "Vertical-First" mode. this function is called from
1826 setvl pseudo-code, as a pseudo-op "svstep"
1827
1828 WARNING: this function uses information that was created EARLIER
1829 due to it being in the middle of a yield, but this function is
1830 *NOT* called from yield (it's called from compiled pseudocode).
1831 """
1832 self.allow_next_step_inc = submode.value + 1
1833 log("SVSTATE_NEXT mode", mode, submode, self.allow_next_step_inc)
1834 self.svstate_next_mode = mode
1835 if self.svstate_next_mode > 0 and self.svstate_next_mode < 5:
1836 shape_idx = self.svstate_next_mode.value-1
1837 return SelectableInt(self.remap_idxs[shape_idx], 7)
1838 if self.svstate_next_mode == 5:
1839 self.svstate_next_mode = 0
1840 return SelectableInt(self.svstate.srcstep, 7)
1841 if self.svstate_next_mode == 6:
1842 self.svstate_next_mode = 0
1843 return SelectableInt(self.svstate.dststep, 7)
1844 return SelectableInt(0, 7)
1845
1846 def get_src_dststeps(self):
1847 """gets srcstep, dststep, and ssubstep, dsubstep
1848 """
1849 return (self.new_srcstep, self.new_dststep,
1850 self.new_ssubstep, self.new_dsubstep)
1851
1852 def update_new_svstate_steps(self):
1853 # note, do not get the bit-reversed srcstep here!
1854 srcstep, dststep = self.new_srcstep, self.new_dststep
1855 ssubstep, dsubstep = self.new_ssubstep, self.new_dsubstep
1856
1857 # update SVSTATE with new srcstep
1858 self.svstate.srcstep = srcstep
1859 self.svstate.dststep = dststep
1860 self.svstate.ssubstep = ssubstep
1861 self.svstate.dsubstep = dsubstep
1862 self.namespace['SVSTATE'] = self.svstate
1863 yield self.dec2.state.svstate.eq(self.svstate.value)
1864 yield Settle() # let decoder update
1865 srcstep = self.svstate.srcstep
1866 dststep = self.svstate.dststep
1867 ssubstep = self.svstate.ssubstep
1868 dsubstep = self.svstate.dsubstep
1869 pack = self.svstate.pack
1870 unpack = self.svstate.unpack
1871 vl = self.svstate.vl
1872 subvl = yield self.dec2.rm_dec.rm_in.subvl
1873 log(" srcstep", srcstep)
1874 log(" dststep", dststep)
1875 log(" pack", pack)
1876 log(" unpack", unpack)
1877 log(" ssubstep", ssubstep)
1878 log(" dsubstep", dsubstep)
1879 log(" vl", vl)
1880 log(" subvl", subvl)
1881
1882 # check if end reached (we let srcstep overrun, above)
1883 # nothing needs doing (TODO zeroing): just do next instruction
1884 return ((ssubstep == subvl and srcstep == vl) or
1885 (dsubstep == subvl and dststep == vl))
1886
1887 def svstate_post_inc(self, insn_name, vf=0):
1888 # check if SV "Vertical First" mode is enabled
1889 vfirst = self.svstate.vfirst
1890 log(" SV Vertical First", vf, vfirst)
1891 if not vf and vfirst == 1:
1892 self.update_nia()
1893 return True
1894
1895 # check if it is the SVSTATE.src/dest step that needs incrementing
1896 # this is our Sub-Program-Counter loop from 0 to VL-1
1897 # XXX twin predication TODO
1898 vl = self.svstate.vl
1899 subvl = yield self.dec2.rm_dec.rm_in.subvl
1900 mvl = self.svstate.maxvl
1901 srcstep = self.svstate.srcstep
1902 dststep = self.svstate.dststep
1903 ssubstep = self.svstate.ssubstep
1904 dsubstep = self.svstate.dsubstep
1905 pack = self.svstate.pack
1906 unpack = self.svstate.unpack
1907 rm_mode = yield self.dec2.rm_dec.mode
1908 reverse_gear = yield self.dec2.rm_dec.reverse_gear
1909 sv_ptype = yield self.dec2.dec.op.SV_Ptype
1910 out_vec = not (yield self.dec2.no_out_vec)
1911 in_vec = not (yield self.dec2.no_in_vec)
1912 log(" svstate.vl", vl)
1913 log(" svstate.mvl", mvl)
1914 log(" rm.subvl", subvl)
1915 log(" svstate.srcstep", srcstep)
1916 log(" svstate.dststep", dststep)
1917 log(" svstate.ssubstep", ssubstep)
1918 log(" svstate.dsubstep", dsubstep)
1919 log(" svstate.pack", pack)
1920 log(" svstate.unpack", unpack)
1921 log(" mode", rm_mode)
1922 log(" reverse", reverse_gear)
1923 log(" out_vec", out_vec)
1924 log(" in_vec", in_vec)
1925 log(" sv_ptype", sv_ptype, sv_ptype == SVPtype.P2.value)
1926 # check if this was an sv.bc* and if so did it succeed
1927 if self.is_svp64_mode and insn_name.startswith("sv.bc"):
1928 end_loop = self.namespace['end_loop']
1929 log("branch %s end_loop" % insn_name, end_loop)
1930 if end_loop.value:
1931 self.svp64_reset_loop()
1932 self.update_pc_next()
1933 return False
1934 # check if srcstep needs incrementing by one, stop PC advancing
1935 # but for 2-pred both src/dest have to be checked.
1936 # XXX this might not be true! it may just be LD/ST
1937 if sv_ptype == SVPtype.P2.value:
1938 svp64_is_vector = (out_vec or in_vec)
1939 else:
1940 svp64_is_vector = out_vec
1941 # loops end at the first "hit" (source or dest)
1942 end_src = srcstep == vl-1
1943 end_dst = dststep == vl-1
1944 loopend = ((end_src and ssubstep == subvl) or
1945 (end_dst and dsubstep == subvl))
1946 log("loopend", loopend, end_src, end_dst,
1947 ssubstep == subvl, dsubstep == subvl)
1948 if not svp64_is_vector or loopend:
1949 # reset loop to zero and update NIA
1950 self.svp64_reset_loop()
1951 self.update_nia()
1952
1953 return True
1954
1955 # still looping, advance and update NIA
1956 yield from self.advance_svstate_steps(end_src, end_dst)
1957 self.namespace['SVSTATE'] = self.svstate
1958
1959 # not an SVP64 branch, so fix PC (NIA==CIA) for next loop
1960 # (by default, NIA is CIA+4 if v3.0B or CIA+8 if SVP64)
1961 # this way we keep repeating the same instruction (with new steps)
1962 self.pc.NIA.value = self.pc.CIA.value
1963 self.namespace['NIA'] = self.pc.NIA
1964 log("end of sub-pc call", self.namespace['CIA'], self.namespace['NIA'])
1965 return False # DO NOT allow PC update whilst Sub-PC loop running
1966
1967 def update_pc_next(self):
1968 # UPDATE program counter
1969 self.pc.update(self.namespace, self.is_svp64_mode)
1970 #self.svstate.spr = self.namespace['SVSTATE']
1971 log("end of call", self.namespace['CIA'],
1972 self.namespace['NIA'],
1973 self.namespace['SVSTATE'])
1974
1975 def svp64_reset_loop(self):
1976 self.svstate.srcstep = 0
1977 self.svstate.dststep = 0
1978 self.svstate.ssubstep = 0
1979 self.svstate.dsubstep = 0
1980 log(" svstate.srcstep loop end (PC to update)")
1981 self.namespace['SVSTATE'] = self.svstate
1982
1983 def update_nia(self):
1984 self.pc.update_nia(self.is_svp64_mode)
1985 self.namespace['NIA'] = self.pc.NIA
1986
1987
1988 def inject():
1989 """Decorator factory.
1990
1991 this decorator will "inject" variables into the function's namespace,
1992 from the *dictionary* in self.namespace. it therefore becomes possible
1993 to make it look like a whole stack of variables which would otherwise
1994 need "self." inserted in front of them (*and* for those variables to be
1995 added to the instance) "appear" in the function.
1996
1997 "self.namespace['SI']" for example becomes accessible as just "SI" but
1998 *only* inside the function, when decorated.
1999 """
2000 def variable_injector(func):
2001 @wraps(func)
2002 def decorator(*args, **kwargs):
2003 try:
2004 func_globals = func.__globals__ # Python 2.6+
2005 except AttributeError:
2006 func_globals = func.func_globals # Earlier versions.
2007
2008 context = args[0].namespace # variables to be injected
2009 saved_values = func_globals.copy() # Shallow copy of dict.
2010 log("globals before", context.keys())
2011 func_globals.update(context)
2012 result = func(*args, **kwargs)
2013 log("globals after", func_globals['CIA'], func_globals['NIA'])
2014 log("args[0]", args[0].namespace['CIA'],
2015 args[0].namespace['NIA'],
2016 args[0].namespace['SVSTATE'])
2017 if 'end_loop' in func_globals:
2018 log("args[0] end_loop", func_globals['end_loop'])
2019 args[0].namespace = func_globals
2020 #exec (func.__code__, func_globals)
2021
2022 # finally:
2023 # func_globals = saved_values # Undo changes.
2024
2025 return result
2026
2027 return decorator
2028
2029 return variable_injector