add listmaker
[soc.git] / src / soc / decoder / power_pseudo.py
1 # Based on GardenSnake - a parser generator demonstration program
2 # GardenSnake was released into the Public Domain by Andrew Dalke.
3
4 # Portions of this work are derived from Python's Grammar definition
5 # and may be covered under the Python copyright and license
6 #
7 # Andrew Dalke / Dalke Scientific Software, LLC
8 # 30 August 2006 / Cape Town, South Africa
9
10 # Modifications for inclusion in PLY distribution
11 import sys
12 from pprint import pprint
13 from copy import copy
14 from ply import lex, yacc
15
16 ##### Lexer ######
17 #import lex
18 import decimal
19
20 tokens = (
21 'DEF',
22 'IF',
23 'ELSE',
24 'FOR',
25 'TO',
26 'THEN',
27 'WHILE',
28 'NAME',
29 'NUMBER', # Python decimals
30 'STRING', # single quoted strings only; syntax of raw strings
31 'LPAR',
32 'RPAR',
33 'LBRACK',
34 'RBRACK',
35 'COLON',
36 'EQ',
37 'ASSIGN',
38 'LT',
39 'GT',
40 'PLUS',
41 'MINUS',
42 'MULT',
43 'DIV',
44 'RETURN',
45 'WS',
46 'NEWLINE',
47 'COMMA',
48 'SEMICOLON',
49 'INDENT',
50 'DEDENT',
51 'ENDMARKER',
52 )
53
54 #t_NUMBER = r'\d+'
55 # taken from decmial.py but without the leading sign
56 def t_NUMBER(t):
57 r"""(\d+(\.\d*)?|\.\d+)([eE][-+]? \d+)?"""
58 t.value = int(t.value)
59 return t
60
61 def t_STRING(t):
62 r"'([^\\']+|\\'|\\\\)*'" # I think this is right ...
63 t.value=t.value[1:-1].decode("string-escape") # .swapcase() # for fun
64 return t
65
66 t_COLON = r':'
67 t_EQ = r'=='
68 t_ASSIGN = r'<-'
69 t_LT = r'<'
70 t_GT = r'>'
71 t_PLUS = r'\+'
72 t_MINUS = r'-'
73 t_MULT = r'\*'
74 t_DIV = r'/'
75 t_COMMA = r','
76 t_SEMICOLON = r';'
77
78 # Ply nicely documented how to do this.
79
80 RESERVED = {
81 "def": "DEF",
82 "if": "IF",
83 "else": "ELSE",
84 "for": "FOR",
85 "to": "TO",
86 "while": "WHILE",
87 "return": "RETURN",
88 }
89
90 def t_NAME(t):
91 r'[a-zA-Z_][a-zA-Z0-9_]*'
92 t.type = RESERVED.get(t.value, "NAME")
93 return t
94
95 # Putting this before t_WS let it consume lines with only comments in
96 # them so the latter code never sees the WS part. Not consuming the
97 # newline. Needed for "if 1: #comment"
98 def t_comment(t):
99 r"[ ]*\043[^\n]*" # \043 is '#'
100 pass
101
102
103 # Whitespace
104 def t_WS(t):
105 r'[ ]+'
106 if t.lexer.at_line_start and t.lexer.paren_count == 0 and \
107 t.lexer.brack_count == 0:
108 return t
109
110 # Don't generate newline tokens when inside of parenthesis, eg
111 # a = (1,
112 # 2, 3)
113 def t_newline(t):
114 r'\n+'
115 t.lexer.lineno += len(t.value)
116 t.type = "NEWLINE"
117 if t.lexer.paren_count == 0 and t.lexer.brack_count == 0:
118 return t
119
120 def t_LBRACK(t):
121 r'\['
122 t.lexer.brack_count += 1
123 return t
124
125 def t_RBRACK(t):
126 r'\]'
127 # check for underflow? should be the job of the parser
128 t.lexer.brack_count -= 1
129 return t
130
131 def t_LPAR(t):
132 r'\('
133 t.lexer.paren_count += 1
134 return t
135
136 def t_RPAR(t):
137 r'\)'
138 # check for underflow? should be the job of the parser
139 t.lexer.paren_count -= 1
140 return t
141
142 #t_ignore = " "
143
144 def t_error(t):
145 raise SyntaxError("Unknown symbol %r" % (t.value[0],))
146 print ("Skipping", repr(t.value[0]))
147 t.lexer.skip(1)
148
149 ## I implemented INDENT / DEDENT generation as a post-processing filter
150
151 # The original lex token stream contains WS and NEWLINE characters.
152 # WS will only occur before any other tokens on a line.
153
154 # I have three filters. One tags tokens by adding two attributes.
155 # "must_indent" is True if the token must be indented from the
156 # previous code. The other is "at_line_start" which is True for WS
157 # and the first non-WS/non-NEWLINE on a line. It flags the check so
158 # see if the new line has changed indication level.
159
160 # Python's syntax has three INDENT states
161 # 0) no colon hence no need to indent
162 # 1) "if 1: go()" - simple statements have a COLON but no need for an indent
163 # 2) "if 1:\n go()" - complex statements have a COLON NEWLINE and must indent
164 NO_INDENT = 0
165 MAY_INDENT = 1
166 MUST_INDENT = 2
167
168 # only care about whitespace at the start of a line
169 def track_tokens_filter(lexer, tokens):
170 oldignore = lexer.lexignore
171 lexer.at_line_start = at_line_start = True
172 indent = NO_INDENT
173 saw_colon = False
174 for token in tokens:
175 #print ("token", token)
176 token.at_line_start = at_line_start
177
178 if token.type == "COLON":
179 at_line_start = False
180 indent = MAY_INDENT
181 token.must_indent = False
182
183 elif token.type == "NEWLINE":
184 at_line_start = True
185 if indent == MAY_INDENT:
186 indent = MUST_INDENT
187 token.must_indent = False
188
189 elif token.type == "WS":
190 assert token.at_line_start == True
191 at_line_start = True
192 token.must_indent = False
193
194 else:
195 # A real token; only indent after COLON NEWLINE
196 if indent == MUST_INDENT:
197 token.must_indent = True
198 else:
199 token.must_indent = False
200 at_line_start = False
201 indent = NO_INDENT
202
203 # really bad hack that changes ignore lexer state.
204 # when "must indent" is seen (basically "real tokens" seen)
205 # then ignore whitespace.
206 if token.must_indent:
207 lexer.lexignore = ('ignore', ' ')
208 else:
209 lexer.lexignore = oldignore
210
211 token.indent = indent
212 yield token
213 lexer.at_line_start = at_line_start
214
215 def _new_token(type, lineno):
216 tok = lex.LexToken()
217 tok.type = type
218 tok.value = None
219 tok.lineno = lineno
220 tok.lexpos = -1
221 return tok
222
223 # Synthesize a DEDENT tag
224 def DEDENT(lineno):
225 return _new_token("DEDENT", lineno)
226
227 # Synthesize an INDENT tag
228 def INDENT(lineno):
229 return _new_token("INDENT", lineno)
230
231
232 # Track the indentation level and emit the right INDENT / DEDENT events.
233 def indentation_filter(tokens):
234 # A stack of indentation levels; will never pop item 0
235 levels = [0]
236 token = None
237 depth = 0
238 prev_was_ws = False
239 for token in tokens:
240 if 1:
241 print "Process", depth, token.indent, token,
242 if token.at_line_start:
243 print "at_line_start",
244 if token.must_indent:
245 print "must_indent",
246 print
247
248 # WS only occurs at the start of the line
249 # There may be WS followed by NEWLINE so
250 # only track the depth here. Don't indent/dedent
251 # until there's something real.
252 if token.type == "WS":
253 assert depth == 0
254 depth = len(token.value)
255 prev_was_ws = True
256 # WS tokens are never passed to the parser
257 continue
258
259 if token.type == "NEWLINE":
260 depth = 0
261 if prev_was_ws or token.at_line_start:
262 # ignore blank lines
263 continue
264 # pass the other cases on through
265 yield token
266 continue
267
268 # then it must be a real token (not WS, not NEWLINE)
269 # which can affect the indentation level
270
271 prev_was_ws = False
272 if token.must_indent:
273 # The current depth must be larger than the previous level
274 if not (depth > levels[-1]):
275 raise IndentationError("expected an indented block")
276
277 levels.append(depth)
278 yield INDENT(token.lineno)
279
280 elif token.at_line_start:
281 # Must be on the same level or one of the previous levels
282 if depth == levels[-1]:
283 # At the same level
284 pass
285 elif depth > levels[-1]:
286 raise IndentationError("indentation increase but not in new block")
287 else:
288 # Back up; but only if it matches a previous level
289 try:
290 i = levels.index(depth)
291 except ValueError:
292 raise IndentationError("inconsistent indentation")
293 for _ in range(i+1, len(levels)):
294 yield DEDENT(token.lineno)
295 levels.pop()
296
297 yield token
298
299 ### Finished processing ###
300
301 # Must dedent any remaining levels
302 if len(levels) > 1:
303 assert token is not None
304 for _ in range(1, len(levels)):
305 yield DEDENT(token.lineno)
306
307
308 # The top-level filter adds an ENDMARKER, if requested.
309 # Python's grammar uses it.
310 def filter(lexer, add_endmarker = True):
311 token = None
312 tokens = iter(lexer.token, None)
313 tokens = track_tokens_filter(lexer, tokens)
314 for token in indentation_filter(tokens):
315 yield token
316
317 if add_endmarker:
318 lineno = 1
319 if token is not None:
320 lineno = token.lineno
321 yield _new_token("ENDMARKER", lineno)
322
323 # Combine Ply and my filters into a new lexer
324
325 class IndentLexer(object):
326 def __init__(self, debug=0, optimize=0, lextab='lextab', reflags=0):
327 self.lexer = lex.lex(debug=debug, optimize=optimize, lextab=lextab, reflags=reflags)
328 self.token_stream = None
329 def input(self, s, add_endmarker=True):
330 self.lexer.paren_count = 0
331 self.lexer.brack_count = 0
332 self.lexer.input(s)
333 self.token_stream = filter(self.lexer, add_endmarker)
334 def token(self):
335 try:
336 return self.token_stream.next()
337 except StopIteration:
338 return None
339
340 ########## Parser (tokens -> AST) ######
341
342 # also part of Ply
343 #import yacc
344
345 # I use the Python AST
346 from compiler import ast
347
348 # Helper function
349 def Assign(left, right):
350 names = []
351 if isinstance(left, ast.Name):
352 # Single assignment on left
353 return ast.Assign([ast.AssName(left.name, 'OP_ASSIGN')], right)
354 elif isinstance(left, ast.Tuple):
355 # List of things - make sure they are Name nodes
356 names = []
357 for child in left.getChildren():
358 if not isinstance(child, ast.Name):
359 raise SyntaxError("that assignment not supported")
360 names.append(child.name)
361 ass_list = [ast.AssName(name, 'OP_ASSIGN') for name in names]
362 return ast.Assign([ast.AssTuple(ass_list)], right)
363 else:
364 raise SyntaxError("Can't do that yet")
365
366
367 # The grammar comments come from Python's Grammar/Grammar file
368
369 ## NB: compound_stmt in single_input is followed by extra NEWLINE!
370 # file_input: (NEWLINE | stmt)* ENDMARKER
371 def p_file_input_end(p):
372 """file_input_end : file_input ENDMARKER"""
373 print "end", p[1]
374 p[0] = ast.Stmt(p[1])
375
376 def p_file_input(p):
377 """file_input : file_input NEWLINE
378 | file_input stmt
379 | NEWLINE
380 | stmt"""
381 if isinstance(p[len(p)-1], basestring):
382 if len(p) == 3:
383 p[0] = p[1]
384 else:
385 p[0] = [] # p == 2 --> only a blank line
386 else:
387 if len(p) == 3:
388 p[0] = p[1] + p[2]
389 else:
390 p[0] = p[1]
391
392
393 # funcdef: [decorators] 'def' NAME parameters ':' suite
394 # ignoring decorators
395 def p_funcdef(p):
396 "funcdef : DEF NAME parameters COLON suite"
397 p[0] = ast.Function(None, p[2], list(p[3]), (), 0, None, p[5])
398
399 # parameters: '(' [varargslist] ')'
400 def p_parameters(p):
401 """parameters : LPAR RPAR
402 | LPAR varargslist RPAR"""
403 if len(p) == 3:
404 p[0] = []
405 else:
406 p[0] = p[2]
407
408
409 # varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' '**' NAME] | '**' NAME) |
410 # highly simplified
411 def p_varargslist(p):
412 """varargslist : varargslist COMMA NAME
413 | NAME"""
414 if len(p) == 4:
415 p[0] = p[1] + p[3]
416 else:
417 p[0] = [p[1]]
418
419 # stmt: simple_stmt | compound_stmt
420 def p_stmt_simple(p):
421 """stmt : simple_stmt"""
422 # simple_stmt is a list
423 p[0] = p[1]
424
425 def p_stmt_compound(p):
426 """stmt : compound_stmt"""
427 p[0] = [p[1]]
428
429 # simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
430 def p_simple_stmt(p):
431 """simple_stmt : small_stmts NEWLINE
432 | small_stmts SEMICOLON NEWLINE"""
433 p[0] = p[1]
434
435 def p_small_stmts(p):
436 """small_stmts : small_stmts SEMICOLON small_stmt
437 | small_stmt"""
438 if len(p) == 4:
439 p[0] = p[1] + [p[3]]
440 else:
441 p[0] = [p[1]]
442
443 # small_stmt: expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt |
444 # import_stmt | global_stmt | exec_stmt | assert_stmt
445 def p_small_stmt(p):
446 """small_stmt : flow_stmt
447 | expr_stmt"""
448 p[0] = p[1]
449
450 # expr_stmt: testlist (augassign (yield_expr|testlist) |
451 # ('=' (yield_expr|testlist))*)
452 # augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
453 # '<<=' | '>>=' | '**=' | '//=')
454 def p_expr_stmt(p):
455 """expr_stmt : testlist ASSIGN testlist
456 | testlist """
457 if len(p) == 2:
458 # a list of expressions
459 #p[0] = ast.Discard(p[1])
460 p[0] = p[1]
461 else:
462 p[0] = Assign(p[1], p[3])
463
464 def p_flow_stmt(p):
465 "flow_stmt : return_stmt"
466 p[0] = p[1]
467
468 # return_stmt: 'return' [testlist]
469 def p_return_stmt(p):
470 "return_stmt : RETURN testlist"
471 p[0] = ast.Return(p[2])
472
473
474 def p_compound_stmt(p):
475 """compound_stmt : if_stmt
476 | while_stmt
477 | funcdef
478 """
479 p[0] = p[1]
480
481 def p_while_stmt(p):
482 """while_stmt : WHILE test COLON suite ELSE COLON suite
483 | WHILE test COLON suite
484 """
485 if len(p) == 5:
486 p[0] = ast.While(p[2], p[4], None)
487 else:
488 p[0] = ast.While(p[2], p[4], p[7])
489
490 def p_if_stmt(p):
491 """if_stmt : IF test COLON suite ELSE COLON suite
492 | IF test COLON suite
493 """
494 if len(p) == 5:
495 p[0] = ast.If([(p[2], p[4])], None)
496 else:
497 p[0] = ast.If([(p[2], p[4])], p[7])
498
499 def p_suite(p):
500 """suite : simple_stmt
501 | NEWLINE INDENT stmts DEDENT"""
502 if len(p) == 2:
503 p[0] = ast.Stmt(p[1])
504 else:
505 p[0] = ast.Stmt(p[3])
506
507
508 def p_stmts(p):
509 """stmts : stmts stmt
510 | stmt"""
511 if len(p) == 3:
512 p[0] = p[1] + p[2]
513 else:
514 p[0] = p[1]
515
516 ## No using Python's approach because Ply supports precedence
517
518 # comparison: expr (comp_op expr)*
519 # arith_expr: term (('+'|'-') term)*
520 # term: factor (('*'|'/'|'%'|'//') factor)*
521 # factor: ('+'|'-'|'~') factor | power
522 # comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
523
524 def make_lt_compare(arg):
525 (left, right) = arg
526 return ast.Compare(left, [('<', right),])
527 def make_gt_compare(arg):
528 (left, right) = arg
529 return ast.Compare(left, [('>', right),])
530 def make_eq_compare(arg):
531 (left, right) = arg
532 return ast.Compare(left, [('==', right),])
533
534
535 binary_ops = {
536 "+": ast.Add,
537 "-": ast.Sub,
538 "*": ast.Mul,
539 "/": ast.Div,
540 "<": make_lt_compare,
541 ">": make_gt_compare,
542 "==": make_eq_compare,
543 }
544 unary_ops = {
545 "+": ast.UnaryAdd,
546 "-": ast.UnarySub,
547 }
548 precedence = (
549 ("left", "EQ", "GT", "LT"),
550 ("left", "PLUS", "MINUS"),
551 ("left", "MULT", "DIV"),
552 )
553
554 def p_comparison(p):
555 """comparison : comparison PLUS comparison
556 | comparison MINUS comparison
557 | comparison MULT comparison
558 | comparison DIV comparison
559 | comparison LT comparison
560 | comparison EQ comparison
561 | comparison GT comparison
562 | PLUS comparison
563 | MINUS comparison
564 | power"""
565 if len(p) == 4:
566 p[0] = binary_ops[p[2]]((p[1], p[3]))
567 elif len(p) == 3:
568 p[0] = unary_ops[p[1]](p[2])
569 else:
570 p[0] = p[1]
571
572 # power: atom trailer* ['**' factor]
573 # trailers enables function calls (and subscripts).
574 # I only allow one level of calls
575 # so this is 'trailer'
576 def p_power(p):
577 """power : atom
578 | atom trailer"""
579 if len(p) == 2:
580 p[0] = p[1]
581 else:
582 if p[2][0] == "CALL":
583 if p[1].name == 'print':
584 p[0] = ast.Printnl(ast.Tuple(p[2][1]), None, None)
585 else:
586 p[0] = ast.CallFunc(p[1], p[2][1], None, None)
587 else:
588 print p[2][1]
589 #raise AssertionError("not implemented %s" % p[2][0])
590 subs = p[2][1]
591 if len(subs) == 1:
592 p[0] = ast.Subscript(p[1], 'OP_APPLY', subs[0])
593 else:
594 p[0] = ast.Slice(p[1], 'OP_APPLY', subs[0], subs[1])
595
596 def p_atom_name(p):
597 """atom : NAME"""
598 p[0] = ast.Name(p[1])
599
600 def p_atom_number(p):
601 """atom : NUMBER
602 | STRING"""
603 p[0] = ast.Const(p[1])
604
605 #'[' [listmaker] ']' |
606
607 def p_atom_listmaker(p):
608 """atom : LBRACK listmaker RBRACK"""
609 p[0] = p[2]
610
611 def p_listmaker(p):
612 """listmaker : test COMMA listmaker
613 | test
614 """
615 if len(p) == 2:
616 p[0] = ast.List([p[1]])
617 else:
618 p[0] = ast.List([p[1]] + p[3].nodes)
619
620 def p_atom_tuple(p):
621 """atom : LPAR testlist RPAR"""
622 p[0] = p[2]
623
624 # trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
625 def p_trailer(p):
626 """trailer : trailer_arglist
627 | trailer_subscript
628 """
629 p[0] = p[1]
630
631 def p_trailer_arglist(p):
632 "trailer_arglist : LPAR arglist RPAR"
633 p[0] = ("CALL", p[2])
634
635 def p_trailer_subscript(p):
636 "trailer_subscript : LBRACK subscript RBRACK"
637 p[0] = ("SUBS", p[2])
638
639 #subscript: '.' '.' '.' | test | [test] ':' [test]
640
641 def p_subscript(p):
642 """subscript : test COLON test
643 | test
644 """
645 if len(p) == 4:
646 p[0] = [p[1], p[3]]
647 else:
648 p[0] = [p[1]]
649
650
651 # testlist: test (',' test)* [',']
652 # Contains shift/reduce error
653 def p_testlist(p):
654 """testlist : testlist_multi COMMA
655 | testlist_multi """
656 if len(p) == 2:
657 p[0] = p[1]
658 else:
659 # May need to promote singleton to tuple
660 if isinstance(p[1], list):
661 p[0] = p[1]
662 else:
663 p[0] = [p[1]]
664 # Convert into a tuple?
665 if isinstance(p[0], list):
666 p[0] = ast.Tuple(p[0])
667
668 def p_testlist_multi(p):
669 """testlist_multi : testlist_multi COMMA test
670 | test"""
671 if len(p) == 2:
672 # singleton
673 p[0] = p[1]
674 else:
675 if isinstance(p[1], list):
676 p[0] = p[1] + [p[3]]
677 else:
678 # singleton -> tuple
679 p[0] = [p[1], p[3]]
680
681
682 # test: or_test ['if' or_test 'else' test] | lambdef
683 # as I don't support 'and', 'or', and 'not' this works down to 'comparison'
684 def p_test(p):
685 "test : comparison"
686 p[0] = p[1]
687
688
689
690 # arglist: (argument ',')* (argument [',']| '*' test [',' '**' test] | '**' test)
691 # XXX INCOMPLETE: this doesn't allow the trailing comma
692 def p_arglist(p):
693 """arglist : arglist COMMA argument
694 | argument"""
695 if len(p) == 4:
696 p[0] = p[1] + [p[3]]
697 else:
698 p[0] = [p[1]]
699
700 # argument: test [gen_for] | test '=' test # Really [keyword '='] test
701 def p_argument(p):
702 "argument : test"
703 p[0] = p[1]
704
705 def p_error(p):
706 #print "Error!", repr(p)
707 raise SyntaxError(p)
708
709
710 class GardenSnakeParser(object):
711 def __init__(self, lexer = None):
712 if lexer is None:
713 lexer = IndentLexer(debug=1)
714 self.lexer = lexer
715 self.parser = yacc.yacc(start="file_input_end",
716 debug=False, write_tables=False)
717
718 def parse(self, code):
719 self.lexer.input(code)
720 result = self.parser.parse(lexer = self.lexer, debug=False)
721 return ast.Module(None, result)
722
723
724 ###### Code generation ######
725
726 from compiler import misc, syntax, pycodegen
727
728 class GardenSnakeCompiler(object):
729 def __init__(self):
730 self.parser = GardenSnakeParser()
731 def compile(self, code, mode="exec", filename="<string>"):
732 tree = self.parser.parse(code)
733 print "snake"
734 pprint(tree)
735 misc.set_filename(filename, tree)
736 syntax.check(tree)
737 gen = pycodegen.ModuleCodeGenerator(tree)
738 code = gen.getCode()
739 return code
740
741 ####### Test code #######
742
743 bpermd = r"""
744 for i = 0 to 7
745 index <- (RS)[8*i:8*i+7]
746 if index < 64 then
747 permi <- (RB)[index]
748 else
749 permi <- 0
750 RA <- [0]*56|| perm[0:7]
751 """
752
753 bpermd = r"""
754 #index <- (RS)[8*i:8*i+7]
755 RA <- [0]*56 # || perm[0:7]
756 print (RA)
757 """
758
759 code = bpermd
760
761 lexer = IndentLexer(debug=1)
762 # Give the lexer some input
763 print "code"
764 print code
765 lexer.input(code)
766
767 # Tokenize
768 while True:
769 tok = lexer.token()
770 if not tok:
771 break # No more input
772 print(tok)
773
774 #sys.exit(0)
775
776 # Set up the GardenSnake run-time environment
777 def print_(*args):
778 print ("args", args)
779 print ("-->", " ".join(map(str,args)))
780
781 #d = copy(globals())
782 d = {}
783 d["print"] = print_
784 #d["a"] = 10
785 l = {}
786
787
788 compile = GardenSnakeCompiler().compile
789
790 compiled_code = compile(code, mode="single", filename="string")
791
792 from compiler import parse
793 tree = parse(code, "exec")
794 pprint(tree)
795
796 print (compiled_code)
797
798 exec (compiled_code, d)
799 print ("Done")
800
801 #print d
802 #print l