oppc/pseudocode: always use keywords for emit calls
[openpower-isa.git] / src / openpower / oppc / pc_parser.py
1 import itertools
2
3 from ply import yacc
4
5 import openpower.oppc.pc_ast as pc_ast
6
7
8 class SyntaxError2(Exception):
9 """
10 Class used to raise a syntax error but get ply to stop eating errors
11 since it catches and discards SyntaxError after setting a flag.
12 """
13
14 def __init__(self, *args, cls=SyntaxError):
15 super().__init__(*args)
16 self.cls = cls
17
18 def __repr__(self):
19 return repr(self.cls(*self.args))
20
21 def __str__(self):
22 return str(self.cls(*self.args))
23
24 def raise_syntax_error(self):
25 raise self.cls(*self.args) from self
26
27
28 def raise_syntax_error(msg,
29 filename, lineno, lexpos, data,
30 cls=SyntaxError):
31 line_start = data.rfind('\n', 0, lexpos) + 1
32 line_end = data.find('\n', line_start)
33 col = (lexpos - line_start) + 1
34
35 raise SyntaxError2(str(msg),
36 (filename, lineno, col, data[line_start:line_end]),
37 cls=cls)
38
39
40 binary_ops = {
41 "^": pc_ast.BitXor,
42 "&": pc_ast.BitAnd,
43 "|": pc_ast.BitOr,
44 "+": pc_ast.Add,
45 "-": pc_ast.Sub,
46 "<<": pc_ast.LShift,
47 ">>": pc_ast.RShift,
48 "*": pc_ast.Mul,
49 "/": pc_ast.Div,
50 "%": pc_ast.Mod,
51 "<=": pc_ast.Le,
52 ">=": pc_ast.Ge,
53 "<": pc_ast.Lt,
54 ">": pc_ast.Gt,
55 "=": pc_ast.Eq,
56 "!=": pc_ast.NotEq,
57 }
58 unary_ops = {
59 "+": pc_ast.Plus,
60 "-": pc_ast.Minus,
61 "¬": pc_ast.Not,
62 }
63
64
65 class Parser:
66 REGS = {}
67 REGS.update(map(lambda reg: (reg, pc_ast.GPR), pc_ast.GPR))
68 REGS.update(map(lambda reg: (reg, pc_ast.FPR), pc_ast.FPR))
69 REGS.update(map(lambda reg: (reg, pc_ast.CR3), pc_ast.CR3))
70 REGS.update(map(lambda reg: (reg, pc_ast.CR5), pc_ast.CR5))
71 REGS.update(map(lambda reg: (reg, pc_ast.XER), pc_ast.XER))
72
73 def __init__(self, lexer, debug=False, optimize=False, write_tables=True):
74 ignore = lambda token: token in ("WS", "THEN")
75 self.tokens = tuple(itertools.filterfalse(ignore, lexer.tokens))
76
77 self.__lexer = lexer
78 self.__parser = yacc.yacc(
79 module=self,
80 start="file_input_end",
81 debug=debug,
82 optimize=optimize,
83 write_tables=write_tables,
84 tabmodule="yacctab")
85
86 return super().__init__()
87
88 precedence = (
89 ("left", "EQ", "NE", "GT", "LT", "LE", "GE", "LTU", "GTU"),
90 ("left", "BITOR"),
91 ("left", "BITXOR"),
92 ("left", "BITAND"),
93 ("left", "LSHIFT", "RSHIFT"),
94 ("left", "PLUS", "MINUS"),
95 ("left", "MULT", "DIV", "MOD"),
96 ("left", "INVERT"),
97 )
98
99 def p_file_input_end(self, p):
100 """
101 file_input_end : file_input ENDMARKER
102 """
103 p[0] = p[1]
104
105 def p_file_input(self, p):
106 """
107 file_input : file_input NEWLINE
108 | file_input stmt
109 | NEWLINE
110 | stmt
111 """
112 if isinstance(p[len(p)-1], pc_ast.Linebreak):
113 if len(p) == 3:
114 p[0] = p[1]
115 else:
116 p[0] = pc_ast.Scope()
117 else:
118 if len(p) == 3:
119 stmt = p[2]
120 if not isinstance(stmt, pc_ast.Scope):
121 stmt = pc_ast.Scope([stmt])
122 p[0] = pc_ast.Scope(p[1] + stmt)
123 else:
124 p[0] = p[1]
125
126 # funcdef: [decorators] 'def' NAME parameters ':' suite
127 # ignoring decorators
128 def p_funcdef(self, p):
129 """
130 funcdef : DEF NAME parameters COLON suite
131 """
132 raise NotImplementedError()
133
134 # parameters: '(' [varargslist] ')'
135 def p_parameters(self, p):
136 """
137 parameters : LPAR RPAR
138 | LPAR varargslist RPAR
139 """
140 raise NotImplementedError()
141
142 # varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' '**' NAME] |
143 # '**' NAME) |
144 # highly simplified
145
146 def p_varargslist(self, p):
147 """
148 varargslist : varargslist COMMA NAME
149 | NAME
150 """
151 raise NotImplementedError()
152
153 # stmt: simple_stmt | compound_stmt
154 def p_stmt_simple(self, p):
155 """
156 stmt : simple_stmt
157 """
158 # simple_stmt is a list
159 p[0] = p[1]
160
161 def p_stmt_compound(self, p):
162 """
163 stmt : compound_stmt
164 """
165 p[0] = pc_ast.Scope([p[1]])
166
167 # simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
168 def p_simple_stmt(self, p):
169 """
170 simple_stmt : small_stmts NEWLINE
171 | small_stmts SEMICOLON NEWLINE
172 """
173 p[0] = p[1]
174
175 def p_small_stmts(self, p):
176 """
177 small_stmts : small_stmts SEMICOLON small_stmt
178 | small_stmt
179 """
180 if len(p) == 4:
181 p[0] = pc_ast.Scope(p[1] + (p[3],))
182 else:
183 p[0] = pc_ast.Scope([p[1]])
184
185 # small_stmt: expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt |
186 # import_stmt | global_stmt | exec_stmt | assert_stmt
187 def p_small_stmt(self, p):
188 """
189 small_stmt : flow_stmt
190 | break_stmt
191 | expr_stmt
192 """
193 p[0] = p[1]
194
195 # expr_stmt: testlist (augassign (yield_expr|testlist) |
196 # ('=' (yield_expr|testlist))*)
197 # augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
198 # '<<=' | '>>=' | '**=' | '//=')
199 def p_expr_stmt(self, p):
200 """
201 expr_stmt : testlist ASSIGNEA testlist
202 | testlist ASSIGN testlist
203 | testlist
204 """
205 if len(p) == 2:
206 p[0] = p[1]
207 else:
208 (lvalue, rvalue) = (p[1], p[3])
209 if isinstance(p[2], pc_ast.AssignOp):
210 cls = pc_ast.AssignExpr
211 else:
212 cls = pc_ast.AssignIEAExpr
213 if (isinstance(lvalue, pc_ast.Symbol) and
214 (str(lvalue) in self.__class__.REGS)):
215 lvalue = self.__class__.REGS[str(lvalue)](lvalue)
216 p[0] = cls(lvalue=lvalue, rvalue=rvalue)
217
218 def p_flow_stmt(self, p):
219 "flow_stmt : return_stmt"
220 p[0] = p[1]
221
222 # return_stmt: 'return' [testlist]
223 def p_return_stmt(self, p):
224 "return_stmt : RETURN testlist"
225 p[0] = pc_ast.Return(p[2])
226
227 def p_compound_stmt(self, p):
228 """
229 compound_stmt : if_stmt
230 | while_stmt
231 | switch_stmt
232 | for_stmt
233 | funcdef
234 """
235 p[0] = p[1]
236
237 def p_break_stmt(self, p):
238 """
239 break_stmt : BREAK
240 """
241 p[0] = p[1]
242
243 def p_for_stmt(self, p):
244 """
245 for_stmt : FOR atom EQ comparison TO comparison COLON suite
246 | DO atom EQ comparison TO comparison COLON suite
247 """
248 p[0] = pc_ast.ForExpr(subject=p[2], start=p[4], end=p[6], body=p[8])
249
250 def p_while_stmt(self, p):
251 """
252 while_stmt : DO WHILE test COLON suite ELSE COLON suite
253 | DO WHILE test COLON suite
254 """
255 if len(p) == 9:
256 p[0] = pc_ast.WhileExpr(test=p[3], body=p[5], orelse=p[8])
257 else:
258 p[0] = pc_ast.WhileExpr(test=p[3], body=p[5], orelse=pc_ast.Scope())
259
260 def p_switch_smt(self, p):
261 """
262 switch_stmt : SWITCH LPAR atom RPAR COLON NEWLINE INDENT cases DEDENT
263 """
264 p[0] = pc_ast.SwitchExpr(subject=p[3], cases=p[8])
265
266 def p_cases(self, p):
267 """
268 cases : switch_list switch_default
269 | switch_default
270 """
271 if len(p) == 3:
272 p[0] = pc_ast.Cases(p[1] + (p[2],))
273 else:
274 p[0] = pc_ast.Cases([p[1]])
275
276 def p_switch_list(self, p):
277 """
278 switch_list : switch_case switch_list
279 | switch_case
280 """
281 if len(p) == 3:
282 p[0] = pc_ast.Sequence((p[1],) + p[2])
283 else:
284 p[0] = pc_ast.Sequence([p[1]])
285
286 def p_switch_case(self, p):
287 """
288 switch_case : CASE LPAR labels RPAR COLON suite
289 """
290 p[0] = pc_ast.Case(labels=p[3], body=p[6])
291
292 def p_switch_default(self, p):
293 """
294 switch_default : DEFAULT COLON suite
295 """
296 p[0] = pc_ast.Case(body=p[3],
297 labels=pc_ast.Labels([pc_ast.DefaultLabel()]))
298
299 def p_labels(self, p):
300 """
301 labels : atom COMMA labels
302 | atom
303 """
304 if not isinstance(p[1], pc_ast.IntLiteral):
305 raise_syntax_error(str(p),
306 self.filename, p.lineno, p.lexpos,
307 self.input_text)
308 label = pc_ast.Label(str(p[1]))
309 if len(p) == 4:
310 p[0] = pc_ast.Labels((label,) + p[3])
311 else:
312 p[0] = pc_ast.Labels([label])
313
314 def p_if_stmt(self, p):
315 """
316 if_stmt : IF test COLON suite ELSE COLON if_stmt
317 | IF test COLON suite ELSE COLON suite
318 | IF test COLON suite
319 """
320 (test, body) = (p[2], p[4])
321 if len(p) == 8:
322 orelse = p[7]
323 else:
324 orelse = pc_ast.Scope()
325 if not isinstance(body, pc_ast.Scope):
326 body = pc_ast.Scope([body])
327 if not isinstance(orelse, pc_ast.Scope):
328 orelse = pc_ast.Scope([orelse])
329 p[0] = pc_ast.IfExpr(test=test,
330 body=body, orelse=orelse)
331
332 def p_suite(self, p):
333 """
334 suite : simple_stmt
335 | NEWLINE INDENT stmts DEDENT
336 """
337 if len(p) == 2:
338 p[0] = p[1]
339 else:
340 p[0] = p[3]
341
342 def p_stmts(self, p):
343 """
344 stmts : stmts stmt
345 | stmt
346 """
347 if len(p) == 3:
348 p[0] = pc_ast.Scope(p[1] + p[2])
349 else:
350 p[0] = p[1]
351
352 def p_comparison(self, p):
353 """
354 comparison : comparison PLUS comparison
355 | comparison MINUS comparison
356 | comparison MULT comparison
357 | comparison LSHIFT comparison
358 | comparison RSHIFT comparison
359 | comparison DIV comparison
360 | comparison MOD comparison
361 | comparison EQ comparison
362 | comparison NE comparison
363 | comparison LE comparison
364 | comparison GE comparison
365 | comparison LTU comparison
366 | comparison GTU comparison
367 | comparison LT comparison
368 | comparison GT comparison
369 | comparison BITOR comparison
370 | comparison BITXOR comparison
371 | comparison BITAND comparison
372 | PLUS comparison
373 | MINUS comparison
374 | INVERT comparison
375 | comparison APPEND comparison
376 | power
377 """
378 if len(p) == 4:
379 def reg0(left, op, right):
380 if (isinstance(left, pc_ast.Symbol) and
381 isinstance(op, pc_ast.BitOr) and
382 (isinstance(right, pc_ast.DecLiteral) and (str(right) == "0")) and
383 (str(left) in frozenset(pc_ast.GPRZero))):
384 return pc_ast.GPRZero(str(left))
385 return None
386
387 def repeat(left, op, right):
388 if (isinstance(left, pc_ast.Sequence) and
389 (len(left) == 1) and
390 isinstance(op, pc_ast.Mul)):
391 return pc_ast.RepeatExpr(subject=left[0], times=right)
392 return None
393
394 (left, op, right) = p[1:]
395 for hook in (reg0, repeat):
396 p[0] = hook(left, op, right)
397 if p[0] is not None:
398 break
399 else:
400 p[0] = pc_ast.BinaryExpr(left=left, op=op, right=right)
401
402 elif len(p) == 3:
403 (op, value) = p[1:]
404 p[0] = pc_ast.UnaryExpr(op=op, value=value)
405 else:
406 p[0] = p[1]
407
408 # power: atom trailer* ['**' factor]
409 # trailers enables function calls (and subscripts).
410 # so this is 'trailerlist'
411 def p_power(self, p):
412 """
413 power : atom
414 | atom trailerlist
415 """
416 if len(p) == 2:
417 p[0] = p[1]
418 else:
419 attribute_or_subscript = (
420 pc_ast.Attribute,
421 pc_ast.SubscriptExpr,
422 pc_ast.RangeSubscriptExpr,
423 )
424 if isinstance(p[2], attribute_or_subscript):
425 node = p[2]
426 while isinstance(node.subject, attribute_or_subscript):
427 node = node.subject
428 if isinstance(node.subject, pc_ast.Call.Arguments):
429 name = pc_ast.Call.Name(str(p[1]))
430 node.subject = pc_ast.Call(name=name, args=node.subject)
431 else:
432 if (isinstance(p[1], pc_ast.Symbol) and
433 (str(p[1]) in self.__class__.REGS)):
434 p[1] = self.__class__.REGS[str(p[1])](p[1])
435 node.subject = p[1]
436 p[0] = p[2]
437 elif isinstance(p[2], pc_ast.Call.Arguments):
438 name = pc_ast.Call.Name(str(p[1]))
439 p[0] = pc_ast.Call(name=name, args=p[2])
440 else:
441 raise NotImplementedError()
442
443 def p_atom_name(self, p):
444 """
445 atom : NAME
446 """
447 # Note: GPR and FPR are handled separately.
448 specials = {frozenset(cls):cls for cls in (
449 pc_ast.CR3,
450 pc_ast.CR5,
451 pc_ast.XER,
452 pc_ast.Reserve,
453 pc_ast.Overflow,
454 pc_ast.Special,
455 pc_ast.XLEN,
456 )}
457 value = str(p[1])
458 for (variants, cls) in specials.items():
459 if value in variants:
460 p[0] = cls(value)
461 break
462 else:
463 p[0] = p[1]
464
465 def p_atom_number(self, p):
466 """
467 atom : BINARY
468 | NUMBER
469 | HEX
470 | STRING
471 """
472 p[0] = p[1]
473
474 # '[' [listmaker] ']' |
475 def p_atom_listmaker(self, p):
476 """
477 atom : LBRACK listmaker RBRACK
478 """
479 p[0] = p[2]
480
481 def p_listmaker(self, p):
482 """
483 listmaker : test COMMA listmaker
484 | test
485 """
486 if len(p) == 2:
487 p[0] = pc_ast.Sequence([p[1]])
488 else:
489 p[0] = pc_ast.Sequence((p[0],) + p[1])
490
491 def p_atom_tuple(self, p):
492 """
493 atom : LPAR testlist RPAR
494 """
495 value = p[2]
496 if (isinstance(value, pc_ast.Symbol) and
497 (str(value) in self.__class__.REGS)):
498 value = self.__class__.REGS[str(value)](value)
499 p[0] = value
500
501 def p_trailerlist(self, p):
502 """
503 trailerlist : trailer trailerlist
504 | trailer
505 """
506 if len(p) == 2:
507 p[0] = p[1]
508 else:
509 attribute_or_subscript = (
510 pc_ast.Attribute,
511 pc_ast.SubscriptExpr,
512 pc_ast.RangeSubscriptExpr,
513 )
514 if isinstance(p[2], attribute_or_subscript):
515 node = p[2]
516 while isinstance(node.subject, attribute_or_subscript):
517 node = node.subject
518 node.subject = p[1]
519 p[0] = p[2]
520 else:
521 p[0] = pc_ast.Sequence(p[1] + (p[2],))
522
523 # trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
524 def p_trailer(self, p):
525 """
526 trailer : trailer_arglist
527 | trailer_subscript
528 | trailer_attr
529 """
530 p[0] = p[1]
531
532 def p_trailer_arglist(self, p):
533 """
534 trailer_arglist : LPAR arglist RPAR
535 | LPAR RPAR
536 """
537 if len(p) == 3:
538 p[0] = pc_ast.Call.Arguments()
539 else:
540 p[0] = p[2]
541
542 def p_trailer_subscript(self, p):
543 """
544 trailer_subscript : LBRACK subscript RBRACK
545 """
546 p[0] = p[2]
547
548 def p_trailer_attr(self, p):
549 """
550 trailer_attr : PERIOD NAME
551 """
552 p[0] = pc_ast.Attribute(name=p[2])
553
554 # subscript: '.' '.' '.' | test | [test] ':' [test]
555 def p_subscript(self, p):
556 """subscript : test COLON test
557 | test
558 """
559 if len(p) == 4:
560 p[0] = pc_ast.RangeSubscriptExpr(start=p[1], end=p[3])
561 else:
562 p[0] = pc_ast.SubscriptExpr(index=p[1])
563
564 # testlist: test (',' test)* [',']
565 # Contains shift/reduce error
566 def p_testlist(self, p):
567 """
568 testlist : testlist_multi COMMA
569 | testlist_multi
570 """
571 if len(p) == 2:
572 p[0] = p[1]
573 else:
574 if isinstance(p[1], pc_ast.Sequence):
575 p[0] = p[1]
576 else:
577 p[0] = pc_ast.Sequence([p[1]])
578
579 def p_testlist_multi(self, p):
580 """
581 testlist_multi : testlist_multi COMMA test
582 | test
583 """
584 if len(p) == 2:
585 p[0] = p[1]
586 else:
587 if isinstance(p[1], pc_ast.Sequence):
588 p[0] = pc_ast.Sequence(p[1] + (p[3],))
589 else:
590 p[0] = pc_ast.Sequence([p[1], p[3]])
591
592 # test: or_test ['if' or_test 'else' test] | lambdef
593 # as I don't support 'and', 'or', and 'not' this works down to 'comparison'
594 def p_test(self, p):
595 """
596 test : comparison
597 | comparison QMARK test COLON test
598 """
599 if len(p) == 2:
600 p[0] = p[1]
601 else:
602 p[0] = pc_ast.IfExpr(test=p[1],
603 body=pc_ast.Scope([p[3]]),
604 orelse=pc_ast.Scope([p[5]]))
605
606 # arglist: (argument ',')* (argument [',']| '*' test [',' '**' test]
607 # | '**' test)
608 # XXX INCOMPLETE: this doesn't allow the trailing comma
609 def p_arglist(self, p):
610 """
611 arglist : arglist COMMA argument
612 | argument
613 """
614 if len(p) == 4:
615 p[0] = pc_ast.Call.Arguments(p[1] + (p[3],))
616 else:
617 p[0] = pc_ast.Call.Arguments([p[1]])
618
619 # argument: test [gen_for] | test '=' test # Really [keyword '='] test
620 def p_argument(self, p):
621 """
622 argument : test
623 """
624 p[0] = p[1]
625
626 def p_error(self, p):
627 raise_syntax_error(str(p.value),
628 self.filename, p.lineno, p.lexpos,
629 self.input_text)
630
631 def parse(self, code, filename=None, debug=False):
632 self.filename = filename
633 self.input_text = code
634 return self.__parser.parse(lexer=self.__lexer, debug=debug, input=code)