start to convert to POWER decoder, add slice/subscript
[soc.git] / src / soc / decoder / power_pseudo.py
1 # Based on GardenSnake - a parser generator demonstration program
2 # GardenSnake was released into the Public Domain by Andrew Dalke.
3
4 # Portions of this work are derived from Python's Grammar definition
5 # and may be covered under the Python copyright and license
6 #
7 # Andrew Dalke / Dalke Scientific Software, LLC
8 # 30 August 2006 / Cape Town, South Africa
9
10 # Modifications for inclusion in PLY distribution
11 import sys
12 from pprint import pprint
13 from copy import copy
14 from ply import lex, yacc
15
16 ##### Lexer ######
17 #import lex
18 import decimal
19
20 tokens = (
21 'DEF',
22 'IF',
23 'ELSE',
24 'FOR',
25 'TO',
26 'THEN',
27 'WHILE',
28 'NAME',
29 'NUMBER', # Python decimals
30 'STRING', # single quoted strings only; syntax of raw strings
31 'LPAR',
32 'RPAR',
33 'LBRACK',
34 'RBRACK',
35 'COLON',
36 'EQ',
37 'ASSIGN',
38 'LT',
39 'GT',
40 'PLUS',
41 'MINUS',
42 'MULT',
43 'DIV',
44 'RETURN',
45 'WS',
46 'NEWLINE',
47 'COMMA',
48 'SEMICOLON',
49 'INDENT',
50 'DEDENT',
51 'ENDMARKER',
52 )
53
54 #t_NUMBER = r'\d+'
55 # taken from decmial.py but without the leading sign
56 def t_NUMBER(t):
57 r"""(\d+(\.\d*)?|\.\d+)([eE][-+]? \d+)?"""
58 t.value = decimal.Decimal(t.value)
59 return t
60
61 def t_STRING(t):
62 r"'([^\\']+|\\'|\\\\)*'" # I think this is right ...
63 t.value=t.value[1:-1].decode("string-escape") # .swapcase() # for fun
64 return t
65
66 t_COLON = r':'
67 t_EQ = r'=='
68 t_ASSIGN = r'<-'
69 t_LT = r'<'
70 t_GT = r'>'
71 t_PLUS = r'\+'
72 t_MINUS = r'-'
73 t_MULT = r'\*'
74 t_DIV = r'/'
75 t_COMMA = r','
76 t_SEMICOLON = r';'
77
78 # Ply nicely documented how to do this.
79
80 RESERVED = {
81 "def": "DEF",
82 "if": "IF",
83 "else": "ELSE",
84 "for": "FOR",
85 "to": "TO",
86 "while": "WHILE",
87 "return": "RETURN",
88 }
89
90 def t_NAME(t):
91 r'[a-zA-Z_][a-zA-Z0-9_]*'
92 t.type = RESERVED.get(t.value, "NAME")
93 return t
94
95 # Putting this before t_WS let it consume lines with only comments in
96 # them so the latter code never sees the WS part. Not consuming the
97 # newline. Needed for "if 1: #comment"
98 def t_comment(t):
99 r"[ ]*\043[^\n]*" # \043 is '#'
100 pass
101
102
103 # Whitespace
104 def t_WS(t):
105 r'[ ]+'
106 if t.lexer.at_line_start and t.lexer.paren_count == 0 and \
107 t.lexer.brack_count == 0:
108 return t
109
110 # Don't generate newline tokens when inside of parenthesis, eg
111 # a = (1,
112 # 2, 3)
113 def t_newline(t):
114 r'\n+'
115 t.lexer.lineno += len(t.value)
116 t.type = "NEWLINE"
117 if t.lexer.paren_count == 0 and t.lexer.brack_count == 0:
118 return t
119
120 def t_LBRACK(t):
121 r'\['
122 t.lexer.brack_count += 1
123 return t
124
125 def t_RBRACK(t):
126 r'\]'
127 # check for underflow? should be the job of the parser
128 t.lexer.brack_count -= 1
129 return t
130
131 def t_LPAR(t):
132 r'\('
133 t.lexer.paren_count += 1
134 return t
135
136 def t_RPAR(t):
137 r'\)'
138 # check for underflow? should be the job of the parser
139 t.lexer.paren_count -= 1
140 return t
141
142 #t_ignore = " "
143
144 def t_error(t):
145 raise SyntaxError("Unknown symbol %r" % (t.value[0],))
146 print ("Skipping", repr(t.value[0]))
147 t.lexer.skip(1)
148
149 ## I implemented INDENT / DEDENT generation as a post-processing filter
150
151 # The original lex token stream contains WS and NEWLINE characters.
152 # WS will only occur before any other tokens on a line.
153
154 # I have three filters. One tags tokens by adding two attributes.
155 # "must_indent" is True if the token must be indented from the
156 # previous code. The other is "at_line_start" which is True for WS
157 # and the first non-WS/non-NEWLINE on a line. It flags the check so
158 # see if the new line has changed indication level.
159
160 # Python's syntax has three INDENT states
161 # 0) no colon hence no need to indent
162 # 1) "if 1: go()" - simple statements have a COLON but no need for an indent
163 # 2) "if 1:\n go()" - complex statements have a COLON NEWLINE and must indent
164 NO_INDENT = 0
165 MAY_INDENT = 1
166 MUST_INDENT = 2
167
168 # only care about whitespace at the start of a line
169 def track_tokens_filter(lexer, tokens):
170 oldignore = lexer.lexignore
171 lexer.at_line_start = at_line_start = True
172 indent = NO_INDENT
173 saw_colon = False
174 for token in tokens:
175 #print ("token", token)
176 token.at_line_start = at_line_start
177
178 if token.type == "COLON":
179 at_line_start = False
180 indent = MAY_INDENT
181 token.must_indent = False
182
183 elif token.type == "NEWLINE":
184 at_line_start = True
185 if indent == MAY_INDENT:
186 indent = MUST_INDENT
187 token.must_indent = False
188
189 elif token.type == "WS":
190 assert token.at_line_start == True
191 at_line_start = True
192 token.must_indent = False
193
194 else:
195 # A real token; only indent after COLON NEWLINE
196 if indent == MUST_INDENT:
197 token.must_indent = True
198 else:
199 token.must_indent = False
200 at_line_start = False
201 indent = NO_INDENT
202
203 # really bad hack that changes ignore lexer state.
204 # when "must indent" is seen (basically "real tokens" seen)
205 # then ignore whitespace.
206 if token.must_indent:
207 lexer.lexignore = ('ignore', ' ')
208 else:
209 lexer.lexignore = oldignore
210
211 token.indent = indent
212 yield token
213 lexer.at_line_start = at_line_start
214
215 def _new_token(type, lineno):
216 tok = lex.LexToken()
217 tok.type = type
218 tok.value = None
219 tok.lineno = lineno
220 tok.lexpos = -1
221 return tok
222
223 # Synthesize a DEDENT tag
224 def DEDENT(lineno):
225 return _new_token("DEDENT", lineno)
226
227 # Synthesize an INDENT tag
228 def INDENT(lineno):
229 return _new_token("INDENT", lineno)
230
231
232 # Track the indentation level and emit the right INDENT / DEDENT events.
233 def indentation_filter(tokens):
234 # A stack of indentation levels; will never pop item 0
235 levels = [0]
236 token = None
237 depth = 0
238 prev_was_ws = False
239 for token in tokens:
240 if 1:
241 print "Process", depth, token.indent, token,
242 if token.at_line_start:
243 print "at_line_start",
244 if token.must_indent:
245 print "must_indent",
246 print
247
248 # WS only occurs at the start of the line
249 # There may be WS followed by NEWLINE so
250 # only track the depth here. Don't indent/dedent
251 # until there's something real.
252 if token.type == "WS":
253 assert depth == 0
254 depth = len(token.value)
255 prev_was_ws = True
256 # WS tokens are never passed to the parser
257 continue
258
259 if token.type == "NEWLINE":
260 depth = 0
261 if prev_was_ws or token.at_line_start:
262 # ignore blank lines
263 continue
264 # pass the other cases on through
265 yield token
266 continue
267
268 # then it must be a real token (not WS, not NEWLINE)
269 # which can affect the indentation level
270
271 prev_was_ws = False
272 if token.must_indent:
273 # The current depth must be larger than the previous level
274 if not (depth > levels[-1]):
275 raise IndentationError("expected an indented block")
276
277 levels.append(depth)
278 yield INDENT(token.lineno)
279
280 elif token.at_line_start:
281 # Must be on the same level or one of the previous levels
282 if depth == levels[-1]:
283 # At the same level
284 pass
285 elif depth > levels[-1]:
286 raise IndentationError("indentation increase but not in new block")
287 else:
288 # Back up; but only if it matches a previous level
289 try:
290 i = levels.index(depth)
291 except ValueError:
292 raise IndentationError("inconsistent indentation")
293 for _ in range(i+1, len(levels)):
294 yield DEDENT(token.lineno)
295 levels.pop()
296
297 yield token
298
299 ### Finished processing ###
300
301 # Must dedent any remaining levels
302 if len(levels) > 1:
303 assert token is not None
304 for _ in range(1, len(levels)):
305 yield DEDENT(token.lineno)
306
307
308 # The top-level filter adds an ENDMARKER, if requested.
309 # Python's grammar uses it.
310 def filter(lexer, add_endmarker = True):
311 token = None
312 tokens = iter(lexer.token, None)
313 tokens = track_tokens_filter(lexer, tokens)
314 for token in indentation_filter(tokens):
315 yield token
316
317 if add_endmarker:
318 lineno = 1
319 if token is not None:
320 lineno = token.lineno
321 yield _new_token("ENDMARKER", lineno)
322
323 # Combine Ply and my filters into a new lexer
324
325 class IndentLexer(object):
326 def __init__(self, debug=0, optimize=0, lextab='lextab', reflags=0):
327 self.lexer = lex.lex(debug=debug, optimize=optimize, lextab=lextab, reflags=reflags)
328 self.token_stream = None
329 def input(self, s, add_endmarker=True):
330 self.lexer.paren_count = 0
331 self.lexer.brack_count = 0
332 self.lexer.input(s)
333 self.token_stream = filter(self.lexer, add_endmarker)
334 def token(self):
335 try:
336 return self.token_stream.next()
337 except StopIteration:
338 return None
339
340 ########## Parser (tokens -> AST) ######
341
342 # also part of Ply
343 #import yacc
344
345 # I use the Python AST
346 from compiler import ast
347
348 # Helper function
349 def Assign(left, right):
350 names = []
351 if isinstance(left, ast.Name):
352 # Single assignment on left
353 return ast.Assign([ast.AssName(left.name, 'OP_ASSIGN')], right)
354 elif isinstance(left, ast.Tuple):
355 # List of things - make sure they are Name nodes
356 names = []
357 for child in left.getChildren():
358 if not isinstance(child, ast.Name):
359 raise SyntaxError("that assignment not supported")
360 names.append(child.name)
361 ass_list = [ast.AssName(name, 'OP_ASSIGN') for name in names]
362 return ast.Assign([ast.AssTuple(ass_list)], right)
363 else:
364 raise SyntaxError("Can't do that yet")
365
366
367 # The grammar comments come from Python's Grammar/Grammar file
368
369 ## NB: compound_stmt in single_input is followed by extra NEWLINE!
370 # file_input: (NEWLINE | stmt)* ENDMARKER
371 def p_file_input_end(p):
372 """file_input_end : file_input ENDMARKER"""
373 print "end", p[1]
374 p[0] = ast.Stmt(p[1])
375
376 def p_file_input(p):
377 """file_input : file_input NEWLINE
378 | file_input stmt
379 | NEWLINE
380 | stmt"""
381 if isinstance(p[len(p)-1], basestring):
382 if len(p) == 3:
383 p[0] = p[1]
384 else:
385 p[0] = [] # p == 2 --> only a blank line
386 else:
387 if len(p) == 3:
388 p[0] = p[1] + p[2]
389 else:
390 p[0] = p[1]
391
392
393 # funcdef: [decorators] 'def' NAME parameters ':' suite
394 # ignoring decorators
395 def p_funcdef(p):
396 "funcdef : DEF NAME parameters COLON suite"
397 p[0] = ast.Function(None, p[2], list(p[3]), (), 0, None, p[5])
398
399 # parameters: '(' [varargslist] ')'
400 def p_parameters(p):
401 """parameters : LPAR RPAR
402 | LPAR varargslist RPAR"""
403 if len(p) == 3:
404 p[0] = []
405 else:
406 p[0] = p[2]
407
408
409 # varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' '**' NAME] | '**' NAME) |
410 # highly simplified
411 def p_varargslist(p):
412 """varargslist : varargslist COMMA NAME
413 | NAME"""
414 if len(p) == 4:
415 p[0] = p[1] + p[3]
416 else:
417 p[0] = [p[1]]
418
419 # stmt: simple_stmt | compound_stmt
420 def p_stmt_simple(p):
421 """stmt : simple_stmt"""
422 # simple_stmt is a list
423 p[0] = p[1]
424
425 def p_stmt_compound(p):
426 """stmt : compound_stmt"""
427 p[0] = [p[1]]
428
429 # simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
430 def p_simple_stmt(p):
431 """simple_stmt : small_stmts NEWLINE
432 | small_stmts SEMICOLON NEWLINE"""
433 p[0] = p[1]
434
435 def p_small_stmts(p):
436 """small_stmts : small_stmts SEMICOLON small_stmt
437 | small_stmt"""
438 if len(p) == 4:
439 p[0] = p[1] + [p[3]]
440 else:
441 p[0] = [p[1]]
442
443 # small_stmt: expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt |
444 # import_stmt | global_stmt | exec_stmt | assert_stmt
445 def p_small_stmt(p):
446 """small_stmt : flow_stmt
447 | expr_stmt"""
448 p[0] = p[1]
449
450 # expr_stmt: testlist (augassign (yield_expr|testlist) |
451 # ('=' (yield_expr|testlist))*)
452 # augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
453 # '<<=' | '>>=' | '**=' | '//=')
454 def p_expr_stmt(p):
455 """expr_stmt : testlist ASSIGN testlist
456 | testlist """
457 if len(p) == 2:
458 # a list of expressions
459 #p[0] = ast.Discard(p[1])
460 p[0] = p[1]
461 else:
462 p[0] = Assign(p[1], p[3])
463
464 def p_flow_stmt(p):
465 "flow_stmt : return_stmt"
466 p[0] = p[1]
467
468 # return_stmt: 'return' [testlist]
469 def p_return_stmt(p):
470 "return_stmt : RETURN testlist"
471 p[0] = ast.Return(p[2])
472
473
474 def p_compound_stmt(p):
475 """compound_stmt : if_stmt
476 | while_stmt
477 | funcdef
478 """
479 p[0] = p[1]
480
481 def p_while_stmt(p):
482 """while_stmt : WHILE test COLON suite ELSE COLON suite
483 | WHILE test COLON suite
484 """
485 if len(p) == 5:
486 p[0] = ast.While(p[2], p[4], None)
487 else:
488 p[0] = ast.While(p[2], p[4], p[7])
489
490 def p_if_stmt(p):
491 """if_stmt : IF test COLON suite ELSE COLON suite
492 | IF test COLON suite
493 """
494 if len(p) == 5:
495 p[0] = ast.If([(p[2], p[4])], None)
496 else:
497 p[0] = ast.If([(p[2], p[4])], p[7])
498
499 def p_suite(p):
500 """suite : simple_stmt
501 | NEWLINE INDENT stmts DEDENT"""
502 if len(p) == 2:
503 p[0] = ast.Stmt(p[1])
504 else:
505 p[0] = ast.Stmt(p[3])
506
507
508 def p_stmts(p):
509 """stmts : stmts stmt
510 | stmt"""
511 if len(p) == 3:
512 p[0] = p[1] + p[2]
513 else:
514 p[0] = p[1]
515
516 ## No using Python's approach because Ply supports precedence
517
518 # comparison: expr (comp_op expr)*
519 # arith_expr: term (('+'|'-') term)*
520 # term: factor (('*'|'/'|'%'|'//') factor)*
521 # factor: ('+'|'-'|'~') factor | power
522 # comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
523
524 def make_lt_compare(arg):
525 (left, right) = arg
526 return ast.Compare(left, [('<', right),])
527 def make_gt_compare(arg):
528 (left, right) = arg
529 return ast.Compare(left, [('>', right),])
530 def make_eq_compare(arg):
531 (left, right) = arg
532 return ast.Compare(left, [('==', right),])
533
534
535 binary_ops = {
536 "+": ast.Add,
537 "-": ast.Sub,
538 "*": ast.Mul,
539 "/": ast.Div,
540 "<": make_lt_compare,
541 ">": make_gt_compare,
542 "==": make_eq_compare,
543 }
544 unary_ops = {
545 "+": ast.UnaryAdd,
546 "-": ast.UnarySub,
547 }
548 precedence = (
549 ("left", "EQ", "GT", "LT"),
550 ("left", "PLUS", "MINUS"),
551 ("left", "MULT", "DIV"),
552 )
553
554 def p_comparison(p):
555 """comparison : comparison PLUS comparison
556 | comparison MINUS comparison
557 | comparison MULT comparison
558 | comparison DIV comparison
559 | comparison LT comparison
560 | comparison EQ comparison
561 | comparison GT comparison
562 | PLUS comparison
563 | MINUS comparison
564 | power"""
565 if len(p) == 4:
566 p[0] = binary_ops[p[2]]((p[1], p[3]))
567 elif len(p) == 3:
568 p[0] = unary_ops[p[1]](p[2])
569 else:
570 p[0] = p[1]
571
572 # power: atom trailer* ['**' factor]
573 # trailers enables function calls (and subscripts).
574 # I only allow one level of calls
575 # so this is 'trailer'
576 def p_power(p):
577 """power : atom
578 | atom trailer"""
579 if len(p) == 2:
580 p[0] = p[1]
581 else:
582 if p[2][0] == "CALL":
583 if p[1].name == 'print':
584 p[0] = ast.Printnl(ast.Tuple(p[2][1]), None, None)
585 else:
586 p[0] = ast.CallFunc(p[1], p[2][1], None, None)
587 else:
588 print p[2][1]
589 #raise AssertionError("not implemented %s" % p[2][0])
590 subs = p[2][1]
591 if len(subs) == 1:
592 p[0] = ast.Subscript(p[1], 'OP_APPLY', subs[0])
593 else:
594 p[0] = ast.Slice(p[1], 'OP_APPLY', subs[0], subs[1])
595
596 def p_atom_name(p):
597 """atom : NAME"""
598 p[0] = ast.Name(p[1])
599
600 def p_atom_number(p):
601 """atom : NUMBER
602 | STRING"""
603 p[0] = ast.Const(p[1])
604
605 def p_atom_tuple(p):
606 """atom : LPAR testlist RPAR"""
607 p[0] = p[2]
608
609 # trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
610 def p_trailer(p):
611 """trailer : trailer_arglist
612 | trailer_subscript
613 """
614 p[0] = p[1]
615
616 def p_trailer_arglist(p):
617 "trailer_arglist : LPAR arglist RPAR"
618 p[0] = ("CALL", p[2])
619
620 def p_trailer_subscript(p):
621 "trailer_subscript : LBRACK subscript RBRACK"
622 p[0] = ("SUBS", p[2])
623
624 #subscript: '.' '.' '.' | test | [test] ':' [test]
625
626 def p_subscript(p):
627 """subscript : test COLON test
628 | test
629 """
630 if len(p) == 4:
631 p[0] = [p[1], p[3]]
632 else:
633 p[0] = [p[1]]
634
635
636 # testlist: test (',' test)* [',']
637 # Contains shift/reduce error
638 def p_testlist(p):
639 """testlist : testlist_multi COMMA
640 | testlist_multi """
641 if len(p) == 2:
642 p[0] = p[1]
643 else:
644 # May need to promote singleton to tuple
645 if isinstance(p[1], list):
646 p[0] = p[1]
647 else:
648 p[0] = [p[1]]
649 # Convert into a tuple?
650 if isinstance(p[0], list):
651 p[0] = ast.Tuple(p[0])
652
653 def p_testlist_multi(p):
654 """testlist_multi : testlist_multi COMMA test
655 | test"""
656 if len(p) == 2:
657 # singleton
658 p[0] = p[1]
659 else:
660 if isinstance(p[1], list):
661 p[0] = p[1] + [p[3]]
662 else:
663 # singleton -> tuple
664 p[0] = [p[1], p[3]]
665
666
667 # test: or_test ['if' or_test 'else' test] | lambdef
668 # as I don't support 'and', 'or', and 'not' this works down to 'comparison'
669 def p_test(p):
670 "test : comparison"
671 p[0] = p[1]
672
673
674
675 # arglist: (argument ',')* (argument [',']| '*' test [',' '**' test] | '**' test)
676 # XXX INCOMPLETE: this doesn't allow the trailing comma
677 def p_arglist(p):
678 """arglist : arglist COMMA argument
679 | argument"""
680 if len(p) == 4:
681 p[0] = p[1] + [p[3]]
682 else:
683 p[0] = [p[1]]
684
685 # argument: test [gen_for] | test '=' test # Really [keyword '='] test
686 def p_argument(p):
687 "argument : test"
688 p[0] = p[1]
689
690 def p_error(p):
691 #print "Error!", repr(p)
692 raise SyntaxError(p)
693
694
695 class GardenSnakeParser(object):
696 def __init__(self, lexer = None):
697 if lexer is None:
698 lexer = IndentLexer(debug=1)
699 self.lexer = lexer
700 self.parser = yacc.yacc(start="file_input_end",
701 debug=False, write_tables=False)
702
703 def parse(self, code):
704 self.lexer.input(code)
705 result = self.parser.parse(lexer = self.lexer, debug=False)
706 return ast.Module(None, result)
707
708
709 ###### Code generation ######
710
711 from compiler import misc, syntax, pycodegen
712
713 class GardenSnakeCompiler(object):
714 def __init__(self):
715 self.parser = GardenSnakeParser()
716 def compile(self, code, mode="exec", filename="<string>"):
717 tree = self.parser.parse(code)
718 print "snake"
719 pprint(tree)
720 misc.set_filename(filename, tree)
721 syntax.check(tree)
722 gen = pycodegen.ModuleCodeGenerator(tree)
723 code = gen.getCode()
724 return code
725
726 ####### Test code #######
727
728 bpermd = r"""
729 for i = 0 to 7
730 index <- (RS)[8*i:8*i+7]
731 if index < 64 then
732 permi <- (RB)[index]
733 else
734 permi <- 0
735 RA <- [0]*56|| perm[0:7]
736 """
737
738 bpermd = r"""
739 index <- (RS)[8*i:8*i+7]
740 #RA <- [0]*56 # || perm[0:7]
741 """
742
743 code = bpermd
744
745 lexer = IndentLexer(debug=1)
746 # Give the lexer some input
747 print "code"
748 print code
749 lexer.input(code)
750
751 # Tokenize
752 while True:
753 tok = lexer.token()
754 if not tok:
755 break # No more input
756 print(tok)
757
758 #sys.exit(0)
759
760 # Set up the GardenSnake run-time environment
761 def print_(*args):
762 print ("args", args)
763 print ("-->", " ".join(map(str,args)))
764
765 #d = copy(globals())
766 d = {}
767 d["print"] = print_
768 #d["a"] = 10
769 l = {}
770
771
772 compile = GardenSnakeCompiler().compile
773
774 compiled_code = compile(code, mode="single", filename="string")
775
776 from compiler import parse
777 tree = parse(code, "exec")
778 pprint(tree)
779
780 print (compiled_code)
781
782 exec (compiled_code, d)
783 print ("Done")
784
785 #print d
786 #print l