add <-iea operator
[soc.git] / src / soc / decoder / pseudo / parser.py
1 # Based on GardenSnake - a parser generator demonstration program
2 # GardenSnake was released into the Public Domain by Andrew Dalke.
3
4 # Portions of this work are derived from Python's Grammar definition
5 # and may be covered under the Python copyright and license
6 #
7 # Andrew Dalke / Dalke Scientific Software, LLC
8 # 30 August 2006 / Cape Town, South Africa
9
10 # Modifications for inclusion in PLY distribution
11 from pprint import pprint
12 from ply import lex, yacc
13 import astor
14
15 from soc.decoder.power_decoder import create_pdecode
16 from soc.decoder.pseudo.lexer import IndentLexer
17 from soc.decoder.orderedset import OrderedSet
18
19 # I use the Python AST
20 #from compiler import ast
21 import ast
22
23 # Helper function
24
25
26 def Assign(left, right, iea_mode):
27 names = []
28 print("Assign", left, right)
29 if isinstance(left, ast.Name):
30 # Single assignment on left
31 # XXX when doing IntClass, which will have an "eq" function,
32 # this is how to access it
33 # eq = ast.Attribute(left, "eq") # get eq fn
34 # return ast.Call(eq, [right], []) # now call left.eq(right)
35 return ast.Assign([ast.Name(left.id, ast.Store())], right)
36 elif isinstance(left, ast.Tuple):
37 # List of things - make sure they are Name nodes
38 names = []
39 for child in left.getChildren():
40 if not isinstance(child, ast.Name):
41 raise SyntaxError("that assignment not supported")
42 names.append(child.name)
43 ass_list = [ast.AssName(name, 'OP_ASSIGN') for name in names]
44 return ast.Assign([ast.AssTuple(ass_list)], right)
45 elif isinstance(left, ast.Subscript):
46 return ast.Assign([left], right)
47 # XXX HMMM probably not needed...
48 ls = left.slice
49 if isinstance(ls, ast.Slice):
50 lower, upper, step = ls.lower, ls.upper, ls.step
51 print("slice assign", lower, upper, step)
52 if step is None:
53 ls = (lower, upper, None)
54 else:
55 ls = (lower, upper, step)
56 ls = ast.Tuple(ls)
57 return ast.Call(ast.Name("selectassign"),
58 [left.value, ls, right], [])
59 else:
60 print("Assign fail")
61 raise SyntaxError("Can't do that yet")
62
63
64 # I implemented INDENT / DEDENT generation as a post-processing filter
65
66 # The original lex token stream contains WS and NEWLINE characters.
67 # WS will only occur before any other tokens on a line.
68
69 # I have three filters. One tags tokens by adding two attributes.
70 # "must_indent" is True if the token must be indented from the
71 # previous code. The other is "at_line_start" which is True for WS
72 # and the first non-WS/non-NEWLINE on a line. It flags the check so
73 # see if the new line has changed indication level.
74
75
76 # No using Python's approach because Ply supports precedence
77
78 # comparison: expr (comp_op expr)*
79 # arith_expr: term (('+'|'-') term)*
80 # term: factor (('*'|'/'|'%'|'//') factor)*
81 # factor: ('+'|'-'|'~') factor | power
82 # comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
83
84 def make_le_compare(arg):
85 (left, right) = arg
86 return ast.Compare(left, [ast.LtE()], [right])
87
88
89 def make_ge_compare(arg):
90 (left, right) = arg
91 return ast.Compare(left, [ast.GtE()], [right])
92
93
94 def make_lt_compare(arg):
95 (left, right) = arg
96 return ast.Compare(left, [ast.Lt()], [right])
97
98
99 def make_gt_compare(arg):
100 (left, right) = arg
101 return ast.Compare(left, [ast.Gt()], [right])
102
103
104 def make_eq_compare(arg):
105 (left, right) = arg
106 return ast.Compare(left, [ast.Eq()], [right])
107
108
109 binary_ops = {
110 "^": ast.BitXor(),
111 "&": ast.BitAnd(),
112 "|": ast.BitOr(),
113 "+": ast.Add(),
114 "-": ast.Sub(),
115 "*": ast.Mult(),
116 "/": ast.Div(),
117 "%": ast.Mod(),
118 "<=": make_le_compare,
119 ">=": make_ge_compare,
120 "<": make_lt_compare,
121 ">": make_gt_compare,
122 "=": make_eq_compare,
123 }
124 unary_ops = {
125 "+": ast.UAdd(),
126 "-": ast.USub(),
127 "¬": ast.Invert(),
128 }
129
130
131 def check_concat(node): # checks if the comparison is already a concat
132 print("check concat", node)
133 if not isinstance(node, ast.Call):
134 return [node]
135 print("func", node.func.id)
136 if node.func.id != 'concat':
137 return [node]
138 if node.keywords: # a repeated list-constant, don't optimise
139 return [node]
140 return node.args
141
142
143 # identify SelectableInt pattern
144 def identify_sint_mul_pattern(p):
145 if not isinstance(p[3], ast.Constant):
146 return False
147 if not isinstance(p[1], ast.List):
148 return False
149 l = p[1].elts
150 if len(l) != 1:
151 return False
152 elt = l[0]
153 return isinstance(elt, ast.Constant)
154
155 def apply_trailer(atom, trailer):
156 if trailer[0] == "TLIST":
157 # assume depth of one
158 atom = apply_trailer(atom, trailer[1])
159 trailer = trailer[2]
160 if trailer[0] == "CALL":
161 #p[0] = ast.Expr(ast.Call(p[1], p[2][1], []))
162 return ast.Call(atom, trailer[1], [])
163 # if p[1].id == 'print':
164 # p[0] = ast.Printnl(ast.Tuple(p[2][1]), None, None)
165 # else:
166 # p[0] = ast.CallFunc(p[1], p[2][1], None, None)
167 else:
168 print("subscript atom", trailer[1])
169 #raise AssertionError("not implemented %s" % p[2][0])
170 subs = trailer[1]
171 if len(subs) == 1:
172 idx = subs[0]
173 else:
174 idx = ast.Slice(subs[0], subs[1], None)
175 return ast.Subscript(atom, idx, ast.Load())
176
177 ########## Parser (tokens -> AST) ######
178
179 # also part of Ply
180 #import yacc
181
182 # https://www.mathcs.emory.edu/~valerie/courses/fall10/155/resources/op_precedence.html
183 # python operator precedence
184 # Highest precedence at top, lowest at bottom.
185 # Operators in the same box evaluate left to right.
186 #
187 # Operator Description
188 # () Parentheses (grouping)
189 # f(args...) Function call
190 # x[index:index] Slicing
191 # x[index] Subscription
192 # x.attribute Attribute reference
193 # ** Exponentiation
194 # ~x Bitwise not
195 # +x, -x Positive, negative
196 # *, /, % mul, div, remainder
197 # +, - Addition, subtraction
198 # <<, >> Bitwise shifts
199 # & Bitwise AND
200 # ^ Bitwise XOR
201 # | Bitwise OR
202 # in, not in, is, is not, <, <=, >, >=, <>, !=, == comp, membership, ident
203 # not x Boolean NOT
204 # and Boolean AND
205 # or Boolean OR
206 # lambda Lambda expression
207
208 class PowerParser:
209
210 precedence = (
211 ("left", "EQ", "GT", "LT", "LE", "GE", "LTU", "GTU"),
212 ("left", "BITOR"),
213 ("left", "BITXOR"),
214 ("left", "BITAND"),
215 ("left", "PLUS", "MINUS"),
216 ("left", "MULT", "DIV", "MOD"),
217 ("left", "INVERT"),
218 )
219
220 def __init__(self):
221 self.gprs = {}
222 for rname in ['RA', 'RB', 'RC', 'RT', 'RS']:
223 self.gprs[rname] = None
224 self.read_regs = OrderedSet()
225 self.uninit_regs = OrderedSet()
226 self.write_regs = OrderedSet()
227
228 # The grammar comments come from Python's Grammar/Grammar file
229
230 # NB: compound_stmt in single_input is followed by extra NEWLINE!
231 # file_input: (NEWLINE | stmt)* ENDMARKER
232
233 def p_file_input_end(self, p):
234 """file_input_end : file_input ENDMARKER"""
235 print("end", p[1])
236 p[0] = p[1]
237
238 def p_file_input(self, p):
239 """file_input : file_input NEWLINE
240 | file_input stmt
241 | NEWLINE
242 | stmt"""
243 if isinstance(p[len(p)-1], str):
244 if len(p) == 3:
245 p[0] = p[1]
246 else:
247 p[0] = [] # p == 2 --> only a blank line
248 else:
249 if len(p) == 3:
250 p[0] = p[1] + p[2]
251 else:
252 p[0] = p[1]
253
254 # funcdef: [decorators] 'def' NAME parameters ':' suite
255 # ignoring decorators
256
257 def p_funcdef(self, p):
258 "funcdef : DEF NAME parameters COLON suite"
259 p[0] = ast.FunctionDef(p[2], p[3], p[5], ())
260
261 # parameters: '(' [varargslist] ')'
262 def p_parameters(self, p):
263 """parameters : LPAR RPAR
264 | LPAR varargslist RPAR"""
265 if len(p) == 3:
266 args = []
267 else:
268 args = p[2]
269 p[0] = ast.arguments(args=args, vararg=None, kwarg=None, defaults=[])
270
271 # varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' '**' NAME] |
272 # '**' NAME) |
273 # highly simplified
274
275 def p_varargslist(self, p):
276 """varargslist : varargslist COMMA NAME
277 | NAME"""
278 if len(p) == 4:
279 p[0] = p[1] + p[3]
280 else:
281 p[0] = [p[1]]
282
283 # stmt: simple_stmt | compound_stmt
284 def p_stmt_simple(self, p):
285 """stmt : simple_stmt"""
286 # simple_stmt is a list
287 p[0] = p[1]
288
289 def p_stmt_compound(self, p):
290 """stmt : compound_stmt"""
291 p[0] = [p[1]]
292
293 # simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
294 def p_simple_stmt(self, p):
295 """simple_stmt : small_stmts NEWLINE
296 | small_stmts SEMICOLON NEWLINE"""
297 p[0] = p[1]
298
299 def p_small_stmts(self, p):
300 """small_stmts : small_stmts SEMICOLON small_stmt
301 | small_stmt"""
302 if len(p) == 4:
303 p[0] = p[1] + [p[3]]
304 else:
305 p[0] = [p[1]]
306
307 # small_stmt: expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt |
308 # import_stmt | global_stmt | exec_stmt | assert_stmt
309 def p_small_stmt(self, p):
310 """small_stmt : flow_stmt
311 | break_stmt
312 | expr_stmt"""
313 if isinstance(p[1], ast.Call):
314 p[0] = ast.Expr(p[1])
315 else:
316 p[0] = p[1]
317
318 # expr_stmt: testlist (augassign (yield_expr|testlist) |
319 # ('=' (yield_expr|testlist))*)
320 # augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
321 # '<<=' | '>>=' | '**=' | '//=')
322 def p_expr_stmt(self, p):
323 """expr_stmt : testlist ASSIGNEA testlist
324 | testlist ASSIGN testlist
325 | testlist """
326 print("expr_stmt", p)
327 if len(p) == 2:
328 # a list of expressions
329 #p[0] = ast.Discard(p[1])
330 p[0] = p[1]
331 else:
332 iea_mode = p[2] == '<-iea'
333 name = None
334 if isinstance(p[1], ast.Name):
335 name = p[1].id
336 elif isinstance(p[1], ast.Subscript):
337 name = p[1].value.id
338 if name in self.gprs:
339 # add to list of uninitialised
340 self.uninit_regs.add(name)
341 elif isinstance(p[1], ast.Call) and p[1].func.id == 'GPR':
342 print(astor.dump_tree(p[1]))
343 # replace GPR(x) with GPR[x]
344 idx = p[1].args[0]
345 p[1] = ast.Subscript(p[1].func, idx)
346 elif isinstance(p[1], ast.Call) and p[1].func.id == 'MEM':
347 print ("mem assign")
348 print(astor.dump_tree(p[1]))
349 p[1].func.id = "memassign" # change function name to set
350 p[1].args.append(p[3])
351 p[0] = p[1]
352 print ("mem rewrite")
353 print(astor.dump_tree(p[0]))
354 return
355 else:
356 print ("help, help")
357 print(astor.dump_tree(p[1]))
358 print("expr assign", name, p[1])
359 if name and name in self.gprs:
360 self.write_regs.add(name) # add to list of regs to write
361 p[0] = Assign(p[1], p[3], iea_mode)
362
363 def p_flow_stmt(self, p):
364 "flow_stmt : return_stmt"
365 p[0] = p[1]
366
367 # return_stmt: 'return' [testlist]
368 def p_return_stmt(self, p):
369 "return_stmt : RETURN testlist"
370 p[0] = ast.Return(p[2])
371
372 def p_compound_stmt(self, p):
373 """compound_stmt : if_stmt
374 | while_stmt
375 | for_stmt
376 | funcdef
377 """
378 p[0] = p[1]
379
380 def p_break_stmt(self, p):
381 """break_stmt : BREAK
382 """
383 p[0] = ast.Break()
384
385 def p_for_stmt(self, p):
386 """for_stmt : FOR atom EQ test TO test COLON suite
387 | DO atom EQ test TO test COLON suite
388 """
389 # auto-add-one (sigh) due to python range
390 start = p[4]
391 end = ast.BinOp(p[6], ast.Add(), ast.Constant(1))
392 it = ast.Call(ast.Name("range"), [start, end], [])
393 p[0] = ast.For(p[2], it, p[8], [])
394
395 def p_while_stmt(self, p):
396 """while_stmt : DO WHILE test COLON suite ELSE COLON suite
397 | DO WHILE test COLON suite
398 """
399 if len(p) == 6:
400 p[0] = ast.While(p[3], p[5], [])
401 else:
402 p[0] = ast.While(p[3], p[5], p[8])
403
404 def p_if_stmt(self, p):
405 """if_stmt : IF test COLON suite ELSE COLON if_stmt
406 | IF test COLON suite ELSE COLON suite
407 | IF test COLON suite
408 """
409 if len(p) == 8 and isinstance(p[7], ast.If):
410 p[0] = ast.If(p[2], p[4], [p[7]])
411 elif len(p) == 5:
412 p[0] = ast.If(p[2], p[4], [])
413 else:
414 p[0] = ast.If(p[2], p[4], p[7])
415
416 def p_suite(self, p):
417 """suite : simple_stmt
418 | NEWLINE INDENT stmts DEDENT"""
419 if len(p) == 2:
420 p[0] = p[1]
421 else:
422 p[0] = p[3]
423
424 def p_stmts(self, p):
425 """stmts : stmts stmt
426 | stmt"""
427 if len(p) == 3:
428 p[0] = p[1] + p[2]
429 else:
430 p[0] = p[1]
431
432 def p_comparison(self, p):
433 """comparison : comparison PLUS comparison
434 | comparison MINUS comparison
435 | comparison MULT comparison
436 | comparison DIV comparison
437 | comparison MOD comparison
438 | comparison EQ comparison
439 | comparison LE comparison
440 | comparison GE comparison
441 | comparison LTU comparison
442 | comparison GTU comparison
443 | comparison LT comparison
444 | comparison GT comparison
445 | comparison BITOR comparison
446 | comparison BITXOR comparison
447 | comparison BITAND comparison
448 | PLUS comparison
449 | comparison MINUS
450 | INVERT comparison
451 | comparison APPEND comparison
452 | power"""
453 if len(p) == 4:
454 print(list(p))
455 if p[2] == '<u':
456 p[0] = ast.Call(ast.Name("ltu"), (p[1], p[3]), [])
457 elif p[2] == '>u':
458 p[0] = ast.Call(ast.Name("gtu"), (p[1], p[3]), [])
459 elif p[2] == '||':
460 l = check_concat(p[1]) + check_concat(p[3])
461 p[0] = ast.Call(ast.Name("concat"), l, [])
462 elif p[2] in ['<', '>', '=', '<=', '>=']:
463 p[0] = binary_ops[p[2]]((p[1], p[3]))
464 elif identify_sint_mul_pattern(p):
465 keywords=[ast.keyword(arg='repeat', value=p[3])]
466 l = p[1].elts
467 p[0] = ast.Call(ast.Name("concat"), l, keywords)
468 else:
469 p[0] = ast.BinOp(p[1], binary_ops[p[2]], p[3])
470 elif len(p) == 3:
471 if isinstance(p[2], str) and p[2] == '-':
472 p[0] = ast.UnaryOp(unary_ops[p[2]], p[1])
473 else:
474 p[0] = ast.UnaryOp(unary_ops[p[1]], p[2])
475 else:
476 p[0] = p[1]
477
478 # power: atom trailer* ['**' factor]
479 # trailers enables function calls (and subscripts).
480 # so this is 'trailerlist'
481 def p_power(self, p):
482 """power : atom
483 | atom trailerlist"""
484 if len(p) == 2:
485 p[0] = p[1]
486 else:
487 print("power dump atom")
488 print(astor.dump_tree(p[1]))
489 print("power dump trailerlist")
490 print(astor.dump_tree(p[2]))
491 p[0] = apply_trailer(p[1], p[2])
492
493 def p_atom_name(self, p):
494 """atom : NAME"""
495 p[0] = ast.Name(id=p[1], ctx=ast.Load())
496
497 def p_atom_number(self, p):
498 """atom : BINARY
499 | NUMBER
500 | STRING"""
501 p[0] = ast.Constant(p[1])
502
503 # '[' [listmaker] ']' |
504
505 def p_atom_listmaker(self, p):
506 """atom : LBRACK listmaker RBRACK"""
507 p[0] = p[2]
508
509 def p_listmaker(self, p):
510 """listmaker : test COMMA listmaker
511 | test
512 """
513 if len(p) == 2:
514 p[0] = ast.List([p[1]])
515 else:
516 p[0] = ast.List([p[1]] + p[3].nodes)
517
518 def p_atom_tuple(self, p):
519 """atom : LPAR testlist RPAR"""
520 print("tuple", p[2])
521 print("astor dump")
522 print(astor.dump_tree(p[2]))
523
524 if isinstance(p[2], ast.Name):
525 print("tuple name", p[2].id)
526 if p[2].id in self.gprs:
527 self.read_regs.add(p[2].id) # add to list of regs to read
528 #p[0] = ast.Subscript(ast.Name("GPR"), ast.Str(p[2].id))
529 # return
530 p[0] = p[2]
531 elif isinstance(p[2], ast.BinOp):
532 if isinstance(p[2].left, ast.Name) and \
533 isinstance(p[2].right, ast.Constant) and \
534 p[2].right.value == 0 and \
535 p[2].left.id in self.gprs:
536 rid = p[2].left.id
537 self.read_regs.add(rid) # add to list of regs to read
538 # create special call to GPR.getz
539 gprz = ast.Name("GPR")
540 gprz = ast.Attribute(gprz, "getz") # get testzero function
541 # *sigh* see class GPR. we need index itself not reg value
542 ridx = ast.Name("_%s" % rid)
543 p[0] = ast.Call(gprz, [ridx], [])
544 print("tree", astor.dump_tree(p[0]))
545 else:
546 p[0] = p[2]
547 else:
548 p[0] = p[2]
549
550 def p_trailerlist(self, p):
551 """trailerlist : trailer trailerlist
552 | trailer
553 """
554 if len(p) == 2:
555 p[0] = p[1]
556 else:
557 p[0] = ("TLIST", p[1], p[2])
558
559 # trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
560 def p_trailer(self, p):
561 """trailer : trailer_arglist
562 | trailer_subscript
563 """
564 p[0] = p[1]
565
566 def p_trailer_arglist(self, p):
567 "trailer_arglist : LPAR arglist RPAR"
568 p[0] = ("CALL", p[2])
569
570 def p_trailer_subscript(self, p):
571 "trailer_subscript : LBRACK subscript RBRACK"
572 p[0] = ("SUBS", p[2])
573
574 # subscript: '.' '.' '.' | test | [test] ':' [test]
575
576 def p_subscript(self, p):
577 """subscript : test COLON test
578 | test
579 """
580 if len(p) == 4:
581 # add one to end
582 if isinstance(p[3], ast.Constant):
583 end = ast.Constant(p[3].value+1)
584 else:
585 end = ast.BinOp(p[3], ast.Add(), ast.Constant(1))
586 p[0] = [p[1], end]
587 else:
588 p[0] = [p[1]]
589
590 # testlist: test (',' test)* [',']
591 # Contains shift/reduce error
592
593 def p_testlist(self, p):
594 """testlist : testlist_multi COMMA
595 | testlist_multi """
596 if len(p) == 2:
597 p[0] = p[1]
598 else:
599 # May need to promote singleton to tuple
600 if isinstance(p[1], list):
601 p[0] = p[1]
602 else:
603 p[0] = [p[1]]
604 # Convert into a tuple?
605 if isinstance(p[0], list):
606 p[0] = ast.Tuple(p[0])
607
608 def p_testlist_multi(self, p):
609 """testlist_multi : testlist_multi COMMA test
610 | test"""
611 if len(p) == 2:
612 # singleton
613 p[0] = p[1]
614 else:
615 if isinstance(p[1], list):
616 p[0] = p[1] + [p[3]]
617 else:
618 # singleton -> tuple
619 p[0] = [p[1], p[3]]
620
621 # test: or_test ['if' or_test 'else' test] | lambdef
622 # as I don't support 'and', 'or', and 'not' this works down to 'comparison'
623
624 def p_test(self, p):
625 "test : comparison"
626 p[0] = p[1]
627
628 # arglist: (argument ',')* (argument [',']| '*' test [',' '**' test]
629 # | '**' test)
630 # XXX INCOMPLETE: this doesn't allow the trailing comma
631
632 def p_arglist(self, p):
633 """arglist : arglist COMMA argument
634 | argument"""
635 if len(p) == 4:
636 p[0] = p[1] + [p[3]]
637 else:
638 p[0] = [p[1]]
639
640 # argument: test [gen_for] | test '=' test # Really [keyword '='] test
641 def p_argument(self, p):
642 "argument : test"
643 p[0] = p[1]
644
645 def p_error(self, p):
646 # print "Error!", repr(p)
647 raise SyntaxError(p)
648
649
650 class GardenSnakeParser(PowerParser):
651 def __init__(self, lexer=None, debug=False):
652 PowerParser.__init__(self)
653 self.debug = debug
654 if lexer is None:
655 lexer = IndentLexer(debug=0)
656 self.lexer = lexer
657 self.tokens = lexer.tokens
658 self.parser = yacc.yacc(module=self, start="file_input_end",
659 debug=debug, write_tables=False)
660
661 self.sd = create_pdecode()
662
663 def parse(self, code):
664 # self.lexer.input(code)
665 result = self.parser.parse(code, lexer=self.lexer, debug=self.debug)
666 return ast.Module(result)
667
668
669 ###### Code generation ######
670
671 #from compiler import misc, syntax, pycodegen
672
673 class GardenSnakeCompiler(object):
674 def __init__(self, debug=False):
675 self.parser = GardenSnakeParser(debug=debug)
676
677 def compile(self, code, mode="exec", filename="<string>"):
678 tree = self.parser.parse(code)
679 print("snake")
680 pprint(tree)
681 return tree
682 #misc.set_filename(filename, tree)
683 return compile(tree, mode="exec", filename="<string>")
684 # syntax.check(tree)
685 gen = pycodegen.ModuleCodeGenerator(tree)
686 code = gen.getCode()
687 return code