33d2d8dbcd2655e8da4faa061e30aa206ae5992c
[openpower-isa.git] / src / openpower / oppc / pc_parser.py
1 import itertools
2
3 from ply import yacc
4
5 import openpower.oppc.pc_ast as pc_ast
6
7
8 class SyntaxError2(Exception):
9 """
10 Class used to raise a syntax error but get ply to stop eating errors
11 since it catches and discards SyntaxError after setting a flag.
12 """
13
14 def __init__(self, *args, cls=SyntaxError):
15 super().__init__(*args)
16 self.cls = cls
17
18 def __repr__(self):
19 return repr(self.cls(*self.args))
20
21 def __str__(self):
22 return str(self.cls(*self.args))
23
24 def raise_syntax_error(self):
25 raise self.cls(*self.args) from self
26
27
28 def raise_syntax_error(msg,
29 filename, lineno, lexpos, data,
30 cls=SyntaxError):
31 line_start = data.rfind('\n', 0, lexpos) + 1
32 line_end = data.find('\n', line_start)
33 col = (lexpos - line_start) + 1
34
35 raise SyntaxError2(str(msg),
36 (filename, lineno, col, data[line_start:line_end]),
37 cls=cls)
38
39
40 binary_ops = {
41 "^": pc_ast.BitXor,
42 "&": pc_ast.BitAnd,
43 "|": pc_ast.BitOr,
44 "+": pc_ast.Add,
45 "-": pc_ast.Sub,
46 "<<": pc_ast.LShift,
47 ">>": pc_ast.RShift,
48 "*": pc_ast.Mul,
49 "/": pc_ast.Div,
50 "%": pc_ast.Mod,
51 "<=": pc_ast.Le,
52 ">=": pc_ast.Ge,
53 "<": pc_ast.Lt,
54 ">": pc_ast.Gt,
55 "=": pc_ast.Eq,
56 "!=": pc_ast.NotEq,
57 }
58 unary_ops = {
59 "+": pc_ast.Plus,
60 "-": pc_ast.Minus,
61 "¬": pc_ast.Not,
62 }
63
64
65 class Parser:
66 REGS = {}
67 REGS.update(map(lambda reg: (reg, pc_ast.GPR), pc_ast.GPR))
68 REGS.update(map(lambda reg: (reg, pc_ast.FPR), pc_ast.FPR))
69 REGS.update(map(lambda reg: (reg, pc_ast.CR3), pc_ast.CR3))
70 REGS.update(map(lambda reg: (reg, pc_ast.CR5), pc_ast.CR5))
71 REGS.update(map(lambda reg: (reg, pc_ast.XER), pc_ast.XER))
72
73 def __init__(self, lexer, debug=False, optimize=False, write_tables=True):
74 ignore = lambda token: token in ("WS", "THEN")
75 self.tokens = tuple(itertools.filterfalse(ignore, lexer.tokens))
76
77 self.__lexer = lexer
78 self.__parser = yacc.yacc(
79 module=self,
80 start="file_input_end",
81 debug=debug,
82 optimize=optimize,
83 write_tables=write_tables,
84 tabmodule="yacctab")
85
86 return super().__init__()
87
88 precedence = (
89 ("left", "EQ", "NE", "GT", "LT", "LE", "GE", "LTU", "GTU"),
90 ("left", "BITOR"),
91 ("left", "BITXOR"),
92 ("left", "BITAND"),
93 ("left", "LSHIFT", "RSHIFT"),
94 ("left", "PLUS", "MINUS"),
95 ("left", "MULT", "DIV", "MOD"),
96 ("left", "INVERT"),
97 )
98
99 def p_file_input_end(self, p):
100 """
101 file_input_end : file_input ENDMARKER
102 """
103 p[0] = p[1]
104
105 def p_file_input(self, p):
106 """
107 file_input : file_input NEWLINE
108 | file_input stmt
109 | NEWLINE
110 | stmt
111 """
112 if isinstance(p[len(p)-1], pc_ast.Linebreak):
113 if len(p) == 3:
114 p[0] = p[1]
115 else:
116 p[0] = pc_ast.Scope()
117 else:
118 if len(p) == 3:
119 stmt = p[2]
120 if not isinstance(stmt, pc_ast.Scope):
121 stmt = pc_ast.Scope([stmt])
122 p[0] = pc_ast.Scope(p[1] + stmt)
123 else:
124 p[0] = p[1]
125
126 # funcdef: [decorators] 'def' NAME parameters ':' suite
127 # ignoring decorators
128 def p_funcdef(self, p):
129 """
130 funcdef : DEF NAME parameters COLON suite
131 """
132 raise NotImplementedError()
133
134 # parameters: '(' [varargslist] ')'
135 def p_parameters(self, p):
136 """
137 parameters : LPAR RPAR
138 | LPAR varargslist RPAR
139 """
140 raise NotImplementedError()
141
142 # varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' '**' NAME] |
143 # '**' NAME) |
144 # highly simplified
145
146 def p_varargslist(self, p):
147 """
148 varargslist : varargslist COMMA NAME
149 | NAME
150 """
151 raise NotImplementedError()
152
153 # stmt: simple_stmt | compound_stmt
154 def p_stmt_simple(self, p):
155 """
156 stmt : simple_stmt
157 """
158 # simple_stmt is a list
159 p[0] = p[1]
160
161 def p_stmt_compound(self, p):
162 """
163 stmt : compound_stmt
164 """
165 p[0] = pc_ast.Scope([p[1]])
166
167 # simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
168 def p_simple_stmt(self, p):
169 """
170 simple_stmt : small_stmts NEWLINE
171 | small_stmts SEMICOLON NEWLINE
172 """
173 p[0] = p[1]
174
175 def p_small_stmts(self, p):
176 """
177 small_stmts : small_stmts SEMICOLON small_stmt
178 | small_stmt
179 """
180 if len(p) == 4:
181 p[0] = pc_ast.Scope(p[1] + (p[3],))
182 else:
183 p[0] = pc_ast.Scope([p[1]])
184
185 # small_stmt: expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt |
186 # import_stmt | global_stmt | exec_stmt | assert_stmt
187 def p_small_stmt(self, p):
188 """
189 small_stmt : flow_stmt
190 | break_stmt
191 | expr_stmt
192 """
193 p[0] = p[1]
194
195 # expr_stmt: testlist (augassign (yield_expr|testlist) |
196 # ('=' (yield_expr|testlist))*)
197 # augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
198 # '<<=' | '>>=' | '**=' | '//=')
199 def p_expr_stmt(self, p):
200 """
201 expr_stmt : testlist ASSIGNEA testlist
202 | testlist ASSIGN testlist
203 | testlist
204 """
205 if len(p) == 2:
206 p[0] = p[1]
207 else:
208 (lvalue, rvalue) = (p[1], p[3])
209 if isinstance(p[2], pc_ast.AssignOp):
210 cls = pc_ast.AssignExpr
211 else:
212 cls = pc_ast.AssignIEAExpr
213 if (isinstance(lvalue, pc_ast.Symbol) and
214 (str(lvalue) in self.__class__.REGS)):
215 lvalue = self.__class__.REGS[str(lvalue)](lvalue)
216 p[0] = cls(lvalue=lvalue, rvalue=rvalue)
217
218 def p_flow_stmt(self, p):
219 "flow_stmt : return_stmt"
220 p[0] = p[1]
221
222 # return_stmt: 'return' [testlist]
223 def p_return_stmt(self, p):
224 "return_stmt : RETURN testlist"
225 p[0] = pc_ast.Return(p[2])
226
227 def p_compound_stmt(self, p):
228 """
229 compound_stmt : if_stmt
230 | while_stmt
231 | switch_stmt
232 | for_stmt
233 | funcdef
234 """
235 p[0] = p[1]
236
237 def p_break_stmt(self, p):
238 """
239 break_stmt : BREAK
240 """
241 p[0] = p[1]
242
243 def p_for_stmt(self, p):
244 """
245 for_stmt : FOR atom EQ comparison TO comparison COLON suite
246 | DO atom EQ comparison TO comparison COLON suite
247 """
248 p[0] = pc_ast.ForExpr(subject=p[2], start=p[4], end=p[6], body=p[8])
249
250 def p_while_stmt(self, p):
251 """
252 while_stmt : DO WHILE test COLON suite ELSE COLON suite
253 | DO WHILE test COLON suite
254 """
255 if len(p) == 9:
256 p[0] = pc_ast.WhileExpr(test=p[3], body=p[5], orelse=p[8])
257 else:
258 p[0] = pc_ast.WhileExpr(test=p[3], body=p[5], orelse=pc_ast.Scope())
259
260 def p_switch_smt(self, p):
261 """
262 switch_stmt : SWITCH LPAR atom RPAR COLON NEWLINE INDENT cases DEDENT
263 """
264 p[0] = pc_ast.SwitchExpr(subject=p[3], cases=p[8])
265
266 def p_cases(self, p):
267 """
268 cases : switch_list switch_default
269 | switch_default
270 """
271 if len(p) == 3:
272 p[0] = pc_ast.Cases(p[1] + (p[2],))
273 else:
274 p[0] = pc_ast.Cases([p[1]])
275
276 def p_switch_list(self, p):
277 """
278 switch_list : switch_case switch_list
279 | switch_case
280 """
281 if len(p) == 3:
282 p[0] = pc_ast.Sequence((p[1],) + p[2])
283 else:
284 p[0] = pc_ast.Sequence([p[1]])
285
286 def p_switch_case(self, p):
287 """
288 switch_case : CASE LPAR labels RPAR COLON suite
289 """
290 p[0] = pc_ast.Case(labels=p[3], body=p[6])
291
292 def p_switch_default(self, p):
293 """
294 switch_default : DEFAULT COLON suite
295 """
296 p[0] = pc_ast.Case(body=p[3],
297 labels=pc_ast.Labels([pc_ast.DefaultLabel()]))
298
299 def p_labels(self, p):
300 """
301 labels : atom COMMA labels
302 | atom
303 """
304 if not isinstance(p[1], pc_ast.IntLiteral):
305 raise_syntax_error(str(p),
306 self.filename, p.lineno, p.lexpos,
307 self.input_text)
308 label = pc_ast.Label(str(p[1]))
309 if len(p) == 4:
310 p[0] = pc_ast.Labels((label,) + p[3])
311 else:
312 p[0] = pc_ast.Labels([label])
313
314 def p_if_stmt(self, p):
315 """
316 if_stmt : IF test COLON suite ELSE COLON if_stmt
317 | IF test COLON suite ELSE COLON suite
318 | IF test COLON suite
319 """
320 (test, body) = (p[2], p[4])
321 if len(p) == 8:
322 orelse = p[7]
323 else:
324 orelse = pc_ast.Scope()
325 if not isinstance(body, pc_ast.Scope):
326 body = pc_ast.Scope([body])
327 if not isinstance(orelse, pc_ast.Scope):
328 orelse = pc_ast.Scope([orelse])
329 p[0] = pc_ast.IfExpr(test=test,
330 body=body, orelse=orelse)
331
332 def p_suite(self, p):
333 """
334 suite : simple_stmt
335 | NEWLINE INDENT stmts DEDENT
336 """
337 if len(p) == 2:
338 p[0] = p[1]
339 else:
340 p[0] = p[3]
341
342 def p_stmts(self, p):
343 """
344 stmts : stmts stmt
345 | stmt
346 """
347 if len(p) == 3:
348 p[0] = pc_ast.Scope(p[1] + p[2])
349 else:
350 p[0] = p[1]
351
352 def p_comparison(self, p):
353 """
354 comparison : comparison PLUS comparison
355 | comparison MINUS comparison
356 | comparison MULT comparison
357 | comparison LSHIFT comparison
358 | comparison RSHIFT comparison
359 | comparison DIV comparison
360 | comparison MOD comparison
361 | comparison EQ comparison
362 | comparison NE comparison
363 | comparison LE comparison
364 | comparison GE comparison
365 | comparison LTU comparison
366 | comparison GTU comparison
367 | comparison LT comparison
368 | comparison GT comparison
369 | comparison BITOR comparison
370 | comparison BITXOR comparison
371 | comparison BITAND comparison
372 | PLUS comparison
373 | MINUS comparison
374 | INVERT comparison
375 | comparison APPEND comparison
376 | power
377 """
378 if len(p) == 4:
379 def reg0(left, op, right):
380 if (isinstance(left, pc_ast.Symbol) and
381 isinstance(op, pc_ast.BitOr) and
382 (isinstance(right, pc_ast.DecLiteral) and (str(right) == "0")) and
383 (str(left) in frozenset(pc_ast.GPRZero))):
384 return pc_ast.GPRZero(str(left))
385 return None
386
387 def repeat(left, op, right):
388 if (isinstance(left, pc_ast.Sequence) and
389 (len(left) == 1) and
390 isinstance(op, pc_ast.Mul)):
391 return pc_ast.RepeatExpr(subject=left[0], times=right)
392 return None
393
394 (left, op, right) = p[1:]
395 for hook in (reg0, repeat):
396 p[0] = hook(left, op, right)
397 if p[0] is not None:
398 break
399 else:
400 p[0] = pc_ast.BinaryExpr(left=left, op=op, right=right)
401
402 elif len(p) == 3:
403 (op, value) = p[1:]
404 p[0] = pc_ast.UnaryExpr(op=op, value=value)
405 else:
406 p[0] = p[1]
407
408 # power: atom trailer* ['**' factor]
409 # trailers enables function calls (and subscripts).
410 # so this is 'trailerlist'
411 def p_power(self, p):
412 """
413 power : atom
414 | atom trailerlist
415 """
416 if len(p) == 2:
417 p[0] = p[1]
418 else:
419 attribute_or_subscript = (
420 pc_ast.Attribute,
421 pc_ast.Subscript,
422 pc_ast.RangeSubscript,
423 )
424 if isinstance(p[2], attribute_or_subscript):
425 node = p[2]
426 while isinstance(node.subject, attribute_or_subscript):
427 node = node.subject
428 if isinstance(node.subject, pc_ast.Call.Arguments):
429 name = pc_ast.Call.Name(str(p[1]))
430 node.subject = pc_ast.Call(name=name, args=node.subject)
431 else:
432 node.subject = p[1]
433 p[0] = p[2]
434 elif isinstance(p[2], pc_ast.Call.Arguments):
435 name = pc_ast.Call.Name(str(p[1]))
436 p[0] = pc_ast.Call(name=name, args=p[2])
437 else:
438 raise NotImplementedError()
439
440 def p_atom_name(self, p):
441 """
442 atom : NAME
443 """
444 p[0] = p[1]
445
446 def p_atom_number(self, p):
447 """
448 atom : BINARY
449 | NUMBER
450 | HEX
451 | STRING
452 """
453 p[0] = p[1]
454
455 # '[' [listmaker] ']' |
456 def p_atom_listmaker(self, p):
457 """
458 atom : LBRACK listmaker RBRACK
459 """
460 p[0] = p[2]
461
462 def p_listmaker(self, p):
463 """
464 listmaker : test COMMA listmaker
465 | test
466 """
467 if len(p) == 2:
468 p[0] = pc_ast.Sequence([p[1]])
469 else:
470 p[0] = pc_ast.Sequence((p[0],) + p[1])
471
472 def p_atom_tuple(self, p):
473 """
474 atom : LPAR testlist RPAR
475 """
476 value = p[2]
477 if (isinstance(value, pc_ast.Symbol) and
478 (str(value) in self.__class__.REGS)):
479 value = self.__class__.REGS[str(value)](value)
480 p[0] = value
481
482 def p_trailerlist(self, p):
483 """
484 trailerlist : trailer trailerlist
485 | trailer
486 """
487 if len(p) == 2:
488 p[0] = p[1]
489 else:
490 attribute_or_subscript = (
491 pc_ast.Attribute,
492 pc_ast.Subscript,
493 pc_ast.RangeSubscript,
494 )
495 if isinstance(p[2], attribute_or_subscript):
496 node = p[2]
497 while isinstance(node.subject, attribute_or_subscript):
498 node = node.subject
499 node.subject = p[1]
500 p[0] = p[2]
501 else:
502 p[0] = pc_ast.Sequence(p[1] + (p[2],))
503
504 # trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
505 def p_trailer(self, p):
506 """
507 trailer : trailer_arglist
508 | trailer_subscript
509 | trailer_attr
510 """
511 p[0] = p[1]
512
513 def p_trailer_arglist(self, p):
514 """
515 trailer_arglist : LPAR arglist RPAR
516 | LPAR RPAR
517 """
518 if len(p) == 3:
519 p[0] = pc_ast.Call.Arguments()
520 else:
521 p[0] = p[2]
522
523 def p_trailer_subscript(self, p):
524 """
525 trailer_subscript : LBRACK subscript RBRACK
526 """
527 p[0] = p[2]
528
529 def p_trailer_attr(self, p):
530 """
531 trailer_attr : PERIOD NAME
532 """
533 p[0] = pc_ast.Attribute(name=p[2])
534
535 # subscript: '.' '.' '.' | test | [test] ':' [test]
536 def p_subscript(self, p):
537 """subscript : test COLON test
538 | test
539 """
540 if len(p) == 4:
541 p[0] = pc_ast.RangeSubscript(start=p[1], end=p[3])
542 else:
543 p[0] = pc_ast.Subscript(index=p[1])
544
545 # testlist: test (',' test)* [',']
546 # Contains shift/reduce error
547 def p_testlist(self, p):
548 """
549 testlist : testlist_multi COMMA
550 | testlist_multi
551 """
552 if len(p) == 2:
553 p[0] = p[1]
554 else:
555 if isinstance(p[1], pc_ast.Sequence):
556 p[0] = p[1]
557 else:
558 p[0] = pc_ast.Sequence([p[1]])
559
560 def p_testlist_multi(self, p):
561 """
562 testlist_multi : testlist_multi COMMA test
563 | test
564 """
565 if len(p) == 2:
566 p[0] = p[1]
567 else:
568 if isinstance(p[1], pc_ast.Sequence):
569 p[0] = pc_ast.Sequence(p[1] + (p[3],))
570 else:
571 p[0] = pc_ast.Sequence([p[1], p[3]])
572
573 # test: or_test ['if' or_test 'else' test] | lambdef
574 # as I don't support 'and', 'or', and 'not' this works down to 'comparison'
575 def p_test(self, p):
576 """
577 test : comparison
578 | comparison QMARK test COLON test
579 """
580 if len(p) == 2:
581 p[0] = p[1]
582 else:
583 p[0] = pc_ast.IfExpr(test=p[1],
584 body=pc_ast.Scope([p[3]]),
585 orelse=pc_ast.Scope([p[5]]))
586
587 # arglist: (argument ',')* (argument [',']| '*' test [',' '**' test]
588 # | '**' test)
589 # XXX INCOMPLETE: this doesn't allow the trailing comma
590 def p_arglist(self, p):
591 """
592 arglist : arglist COMMA argument
593 | argument
594 """
595 if len(p) == 4:
596 p[0] = pc_ast.Call.Arguments(p[1] + (p[3],))
597 else:
598 p[0] = pc_ast.Call.Arguments([p[1]])
599
600 # argument: test [gen_for] | test '=' test # Really [keyword '='] test
601 def p_argument(self, p):
602 """
603 argument : test
604 """
605 p[0] = p[1]
606
607 def p_error(self, p):
608 raise_syntax_error(str(p.value),
609 self.filename, p.lineno, p.lexpos,
610 self.input_text)
611
612 def parse(self, code, filename=None, debug=False):
613 self.filename = filename
614 self.input_text = code
615 return self.__parser.parse(lexer=self.__lexer, debug=debug, input=code)