--- /dev/null
+parser.out
+yacctab.py
--- /dev/null
+import glob
+import io
+import re
+
+from openpower.decoder.power_enums import (
+ find_wiki_dir,
+)
+
+import openpower.oppc.pc_lexer as pc_lexer
+import openpower.oppc.pc_parser as pc_parser
+import openpower.oppc.pc_util as pc_util
+
+
+def dedent(line):
+ if line.startswith(" "):
+ return line[4:].rstrip()
+ return line.rstrip()
+
+
+def parse(parser, origin):
+ origin = tuple(origin)
+ tree = parser.parse(code="\n".join(origin))
+ stream = io.StringIO()
+ for (level, line) in pc_util.pseudocode(tree):
+ print(f"{' ' * 4 * level}{line}", file=stream)
+ stream.seek(0)
+ target = tuple(stream)
+ return (origin, target)
+
+
+lexer = pc_lexer.IndentLexer(debug=False)
+parser = pc_parser.Parser(lexer=lexer)
+pattern = re.compile(r"Pseudo-code:(.*?)(?:Special Registers Altered|Description):", re.DOTALL)
+for path in []: # glob.glob(f"{find_wiki_dir()}/../isa/*.mdwn"):
+ with open(path, "r", encoding="UTF-8") as stream:
+ data = stream.read()
+ for origin in pattern.findall(data):
+ try:
+ (stage0, stage1) = parse(parser, map(dedent, origin.split("\n")))
+ (stage2, stage3) = parse(parser, map(dedent, stage1))
+ stage1 = tuple(map(dedent, stage1))
+ stage3 = tuple(map(dedent, stage3))
+ assert stage1 == stage2 and stage2 == stage3
+ except AssertionError as exc:
+ print(stage0)
+ print(stage1)
+ print(stage3)
+ raise exc
+ except Exception as exc:
+ print(path)
+ print(origin)
+ raise exc
+
+code = """
+src <- [0]*64
+src[64-XLEN:63] <- (RS)
+result <- [0]*64
+do i = 0 to 1
+ n <- i * 32
+ result[n+0:n+7] <- 0
+ result[n+8:n+19] <- DPD_TO_BCD(src[n+12:n+21])
+ result[n+20:n+31] <- DPD_TO_BCD(src[n+22:n+31])
+RA <- result[64-XLEN:63]
+"""
+tree = parser.parse(code=code)
+print(tree)
+for (level, line) in pc_util.pseudocode(tree):
+ print(f"{' ' * 4 * level}{line}")
--- /dev/null
+import copy
+import dataclasses
+
+
+class NodeMeta(type):
+ pass
+
+
+class Node(metaclass=NodeMeta):
+ def __repr__(self):
+ return f"{hex(id(self))}@{self.__class__.__name__}()"
+
+ def __eq__(self, other):
+ if not isinstance(other, self.__class__):
+ return NotImplemented
+ return (hex(id(self)) == id(other))
+
+ def clone(self):
+ return copy.deepcopy(self)
+
+
+class TokenMeta(NodeMeta):
+ pass
+
+
+class Token(Node, str, metaclass=TokenMeta):
+ def __new__(cls, value):
+ if isinstance(value, cls):
+ value = str(value)
+ if not isinstance(value, str):
+ raise ValueError(value)
+
+ return super().__new__(cls, value)
+
+ def __str__(self):
+ return super(Node, self).__str__()
+
+ def __hash__(self):
+ return super(Node, self).__hash__()
+
+ def __repr__(self):
+ return f"{hex(id(self))}@{self.__class__.__name__}({str(self)})"
+
+
+class SequenceMeta(NodeMeta):
+ __typeid__ = Node
+
+ def __new__(metacls, clsname, bases, ns, *, typeid=Node):
+ ns.setdefault("__typeid__", typeid)
+
+ return super().__new__(metacls, clsname, bases, ns)
+
+
+class Sequence(Node, tuple, metaclass=SequenceMeta):
+ def __new__(cls, iterable=tuple()):
+ def validate(item):
+ if not isinstance(item, cls.__typeid__):
+ raise ValueError(cls, item)
+ return item
+
+ return super().__new__(cls, map(validate, iterable))
+
+ def __hash__(self):
+ return super(Node, self).__hash__()
+
+ def __repr__(self):
+ return f"{hex(id(self))}@{self.__class__.__name__}({repr(list(self))})"
+
+
+class Arguments(Sequence):
+ pass
+
+
+class Scope(Sequence):
+ pass
+
+
+class Module(Sequence):
+ pass
+
+
+class DataclassMeta(NodeMeta):
+ def __new__(metacls, clsname, bases, ns):
+ cls = super().__new__(metacls, clsname, bases, ns)
+ wrap = dataclasses.dataclass(init=True, eq=False, unsafe_hash=True, frozen=False)
+ datacls = wrap(cls)
+ origin = datacls.__repr__
+ datacls.__repr__ = lambda self: f"{hex(id(self))}@{origin(self)}"
+
+ return datacls
+
+
+class Dataclass(Node, metaclass=DataclassMeta):
+ def __post_init__(self):
+ for field in dataclasses.fields(self):
+ key = field.name
+ value = getattr(self, key)
+ if not isinstance(value, field.type):
+ raise ValueError(f"{self.__class__.__name__}.{key}: {value!r}")
+
+ def clone(self, **kwargs):
+ return copy.deepcopy(dataclasses.replace(self, **kwargs))
+
+
+class LiteralMeta(TokenMeta):
+ def __new__(metacls, clsname, bases, ns, *, choices=()):
+ ns.setdefault("__choices__", choices)
+
+ return super().__new__(metacls, clsname, bases, ns)
+
+ def __iter__(cls):
+ yield from cls.__choices__
+
+
+class Literal(Token, metaclass=LiteralMeta):
+ __choices__ = ()
+
+ def __new__(cls, value):
+ choices = cls.__choices__
+ if isinstance(value, Token):
+ value = str(value)
+ if choices and value not in choices:
+ raise ValueError(value)
+
+ return super().__new__(cls, value)
+
+
+class GPR(Literal, choices=("RA", "RA0", "RB", "RB0", "RC", "RC0", "RS", "RSp", "RT", "RTp")):
+ pass
+
+
+class FPR(Literal, choices=("FPR", "FRA", "FRAp", "FRB", "FRBp", "FRC", "FRS", "FRSp", "FRT", "FRTp")):
+ pass
+
+
+class CR3(Literal, choices=("BF", "BFA")):
+ pass
+
+
+class CR5(Literal, choices=("BA", "BB", "BC", "BI", "BT")):
+ pass
+
+
+class XER(Literal, choices=("OV", "OV32", "CA", "CA32", "SO")):
+ pass
+
+
+class IntLiteral(Literal):
+ pass
+
+
+class BinLiteral(IntLiteral):
+ r"""0b[01_]+"""
+ pass
+
+
+class HexLiteral(IntLiteral):
+ r"""0x[0-9A-Fa-f_]+"""
+ pass
+
+
+class DecLiteral(IntLiteral):
+ r"""(\d+(\.\d*)?|\.\d+)([eE][-+]? \d+)?"""
+ pass
+
+
+class Symbol(Token):
+ r"""[A-Za-z_]+[A-Za-z0-9_]*"""
+ pass
+
+
+class Attribute(Dataclass):
+ name: Symbol
+ subject: Node = Node()
+
+
+class StringLiteral(Literal):
+ __STRING_ESCAPE = r"""(\\[0-9a-zA-Z._~!=&\^\-\\?'"])"""
+ __STRING_CHAR = (r"""([^"\\\n]|""" + __STRING_ESCAPE + ")")
+ __STRING = ("[\"']" + __STRING_CHAR + "*" + "[\"']")
+
+ __doc__ = __STRING
+
+ def __repr__(self):
+ return f"{hex(id(self))}@{self.__class__.__name__}({self.__str__()!r})"
+
+
+class Whitespace(Token):
+ r"""[^\S\r\n]+"""
+
+
+class Linebreak(Token):
+ r"""[\r\n]+"""
+
+
+class Comment(Token):
+ r"""[ ]*(?:\043|<!--)[^\n]*"""
+ pass
+
+
+class Indent(Node):
+ pass
+
+
+class Dedent(Node):
+ pass
+
+
+class LBracket(Token):
+ r"""\["""
+ pass
+
+
+class RBracket(Token):
+ r"""\]"""
+ pass
+
+
+class LParenthesis(Token):
+ r"""\("""
+ pass
+
+
+class RParenthesis(Token):
+ r"""\)"""
+ pass
+
+
+class Call(Dataclass):
+ name: Symbol
+ args: Arguments
+
+
+class AssignOp(Token):
+ r"""<-"""
+ pass
+
+
+class AssignIEAOp(Token):
+ r"""<-iea"""
+ pass
+
+
+class Assign(Dataclass):
+ lvalue: Node
+ rvalue: Node
+
+
+class AssignIEA(Assign):
+ lvalue: Node
+ rvalue: Node
+
+
+class Not(Token):
+ r"""(?:¬|~)"""
+ pass
+
+
+class Add(Token):
+ r"""\+"""
+ pass
+
+
+class Sub(Token):
+ r"""\-"""
+ pass
+
+
+class Plus(Node):
+ pass
+
+
+class Minus(Node):
+ pass
+
+
+class Mul(Token):
+ r"""(?:×|\*)"""
+ pass
+
+
+class MulS(Token):
+ r"""(?:×si|×s|\*si|\*s)"""
+ pass
+
+
+class MulU(Token):
+ r"""(?:×ui|×u|\*ui|\*u)"""
+ pass
+
+
+class Div(Token):
+ r"""/"""
+ pass
+
+
+class DivT(Token):
+ r"""÷"""
+ pass
+
+
+class Mod(Token):
+ r"""%"""
+ pass
+
+
+class Sqrt(Token):
+ r"""√"""
+ pass
+
+
+class Eq(Token):
+ r"""="""
+ pass
+
+
+class NotEq(Token):
+ r"""(?:≠|!=)"""
+ pass
+
+
+class Lt(Token):
+ r"""<"""
+ pass
+
+
+class Le(Token):
+ r"""<="""
+ pass
+
+
+class Ge(Token):
+ r""">="""
+ pass
+
+
+class Gt(Token):
+ r""">"""
+ pass
+
+
+class LtU(Token):
+ r"""<u"""
+ pass
+
+
+class GtU(Token):
+ r""">u"""
+ pass
+
+
+class BitAnd(Token):
+ r"""&"""
+ pass
+
+
+class BitOr(Token):
+ r"""\|"""
+ pass
+
+
+class BitXor(Token):
+ r"""(?:⊕|≡|\^)"""
+ pass
+
+
+class UnaryExpr(Dataclass):
+ op: Node
+ value: Node
+
+
+class BinaryExpr(Dataclass):
+ left: Node
+ op: Node
+ right: Node
+
+
+class Keyword(Token):
+ pass
+
+
+class FunctionKeyword(Token):
+ f"""def"""
+ pass
+
+
+class IfKeyword(Keyword):
+ r"""if"""
+ pass
+
+
+class ThenKeyword(Keyword):
+ r"""then"""
+ pass
+
+
+class ElseKeyword(Keyword):
+ r"""else"""
+ pass
+
+
+class LeaveKeyword(Keyword):
+ r"""leave"""
+ pass
+
+
+class ForKeyword(Keyword):
+ r"""for"""
+ pass
+
+
+class ToKeyword(Keyword):
+ r"""to"""
+ pass
+
+
+class WhileKeyword(Keyword):
+ r"""while"""
+ pass
+
+
+class DoKeyword(Keyword):
+ r"""while"""
+ pass
+
+
+class ReturnKeyword(Keyword):
+ r"""return"""
+ pass
+
+
+class SwitchKeyword(Keyword):
+ r"""switch"""
+ pass
+
+
+class CaseKeyword(Keyword):
+ r"""case"""
+ pass
+
+
+class DefaultKeyword(Keyword):
+ r"""while"""
+ pass
+
+
+class Colon(Token):
+ r""":"""
+ pass
+
+
+class LShift(Token):
+ r"""<<"""
+ pass
+
+
+class RShift(Token):
+ r""">>"""
+ pass
+
+
+class Comma(Token):
+ r""","""
+ pass
+
+
+class Period(Token):
+ r"""\."""
+ pass
+
+
+class Semicolon(Token):
+ r""";"""
+ pass
+
+
+class BitConcat(Token):
+ r"""\|\|"""
+ pass
+
+
+class Question(Token):
+ r"""\?"""
+ pass
+
+
+class Endmarker(Node):
+ pass
+
+
+class IfExpr(Dataclass):
+ test: Node
+ body: Scope
+ orelse: Scope
+
+
+class ForExpr(Dataclass):
+ subject: Node
+ start: Node
+ end: Node
+ body: Scope
+
+
+class WhileExpr(Dataclass):
+ test: Node
+ body: Scope
+ orelse: Scope
+
+
+class RepeatExpr(Dataclass):
+ subject: Node
+ times: Node
+
+
+class Subscript(Dataclass):
+ index: Node
+ subject: Node = Node()
+
+
+class RangeSubscript(Dataclass):
+ start: Node
+ end: Node = Node()
+ subject: Node = Node()
+
+
+class Label(Literal):
+ pass
+
+
+class DefaultLabel(Label):
+ def __new__(cls):
+ return super().__new__(cls, "default")
+
+
+class Labels(Sequence, typeid=Label):
+ pass
+
+
+class Case(Dataclass):
+ labels: Labels
+ body: Scope
+
+
+class Cases(Sequence, typeid=Case):
+ pass
+
+
+class SwitchExpr(Dataclass):
+ subject: Node
+ cases: Cases
--- /dev/null
+# Based on GardenSnake - a parser generator demonstration program
+# GardenSnake was released into the Public Domain by Andrew Dalke.
+
+# Portions of this work are derived from Python's Grammar definition
+# and may be covered under the Python copyright and license
+#
+# Andrew Dalke / Dalke Scientific Software, LLC
+# 30 August 2006 / Cape Town, South Africa
+
+# Modifications for inclusion in PLY distribution
+
+from copy import copy
+
+from ply import lex
+
+import openpower.oppc.pc_ast as pc_ast
+
+
+def bind(cls):
+ def wrapper(self, t):
+ t.value = cls(t.value)
+ return t
+
+ wrapper.__name__ = cls.__name__
+ wrapper.__doc__ = cls.__doc__
+
+ return wrapper
+
+
+class SyntaxError2(Exception):
+ """ class used to raise a syntax error but get ply to stop eating errors
+ since it catches and discards SyntaxError after setting a flag.
+ """
+
+ def __init__(self, *args, cls=SyntaxError):
+ super().__init__(*args)
+ self.cls = cls
+
+ def __repr__(self):
+ return repr(self.cls(*self.args))
+
+ def __str__(self):
+ return str(self.cls(*self.args))
+
+ def raise_syntax_error(self):
+ raise self.cls(*self.args) from self
+
+
+def raise_syntax_error(msg, filename, lineno, lexpos, input_text,
+ cls=SyntaxError):
+ line_start = input_text.rfind("\n", 0, lexpos) + 1
+ line_end = input_text.find("\n", line_start)
+ col = (lexpos - line_start) + 1
+ raise SyntaxError2(str(msg), (filename, lineno, col,
+ input_text[line_start:line_end]), cls=cls)
+
+# I implemented INDENT / DEDENT generation as a post-processing filter
+
+# The original lex token stream contains WS and NEWLINE characters.
+# WS will only occur before any other tokens on a line.
+
+# I have three filters. One tags tokens by adding two attributes.
+# "must_indent" is True if the token must be indented from the
+# previous code. The other is "at_line_start" which is True for WS
+# and the first non-WS/non-NEWLINE on a line. It flags the check so
+# see if the new line has changed indication level.
+
+# Python's syntax has three INDENT states
+# 0) no colon hence no need to indent
+# 1) "if 1: go()" - simple statements have a COLON but no need for an indent
+# 2) "if 1:\n go()" - complex statements have a COLON NEWLINE and must indent
+NO_INDENT = 0
+MAY_INDENT = 1
+MUST_INDENT = 2
+
+# turn into python-like colon syntax from pseudo-code syntax.
+# identify tokens which tell us whether a "hidden colon" is needed.
+# this in turn means that track_tokens_filter "works" without needing
+# complex grammar rules
+
+
+def python_colonify(lexer, tokens):
+ implied_colon_needed = False
+ for token in tokens:
+ if token.type == "THEN":
+ # turn then into colon
+ token.type = "COLON"
+ token.value = pc_ast.Colon(str(token.value))
+ yield token
+ elif token.type == "ELSE":
+ yield token
+ token = copy(token)
+ token.type = "COLON"
+ token.value = pc_ast.Colon(str(token.value))
+ yield token
+ elif token.type in ["DO", "WHILE", "FOR", "SWITCH"]:
+ implied_colon_needed = True
+ yield token
+ elif token.type == "NEWLINE":
+ if implied_colon_needed:
+ ctok = copy(token)
+ ctok.type = "COLON"
+ ctok.value = pc_ast.Colon(str(token.value))
+ yield ctok
+ implied_colon_needed = False
+ yield token
+ else:
+ yield token
+
+
+# only care about whitespace at the start of a line
+def track_tokens_filter(lexer, tokens):
+ oldignore = lexer.lexignore
+ lexer.at_line_start = at_line_start = True
+ indent = NO_INDENT
+ saw_colon = False
+ for token in tokens:
+ token.at_line_start = at_line_start
+
+ if token.type == "COLON":
+ at_line_start = False
+ indent = MAY_INDENT
+ token.must_indent = False
+
+ elif token.type == "NEWLINE":
+ at_line_start = True
+ if indent == MAY_INDENT:
+ indent = MUST_INDENT
+ token.must_indent = False
+
+ elif token.type == "WS":
+ assert token.at_line_start == True
+ at_line_start = True
+ token.must_indent = False
+
+ else:
+ # A real token; only indent after COLON NEWLINE
+ if indent == MUST_INDENT:
+ token.must_indent = True
+ else:
+ token.must_indent = False
+ at_line_start = False
+ indent = NO_INDENT
+
+ # really bad hack that changes ignore lexer state.
+ # when "must indent" is seen (basically "real tokens" seen)
+ # then ignore whitespace.
+ if token.must_indent:
+ lexer.lexignore = ("ignore", " ")
+ else:
+ lexer.lexignore = oldignore
+
+ token.indent = indent
+ yield token
+ lexer.at_line_start = at_line_start
+
+
+def _new_token(type, lineno):
+ cls = {
+ "ENDMARKER": pc_ast.Endmarker,
+ "INDENT": pc_ast.Indent,
+ "DEDENT": pc_ast.Dedent,
+ }[type]
+ tok = lex.LexToken()
+ tok.type = type
+ tok.value = cls()
+ tok.lineno = lineno
+ tok.lexpos = -1
+ return tok
+
+# Synthesize a DEDENT tag
+
+
+def DEDENT(lineno):
+ return _new_token("DEDENT", lineno)
+
+# Synthesize an INDENT tag
+
+
+def INDENT(lineno):
+ return _new_token("INDENT", lineno)
+
+
+def count_spaces(l):
+ for i in range(len(l)):
+ if l[i] != " ":
+ return i
+ return 0
+
+
+def annoying_case_hack_filter(code):
+ """add annoying "silent keyword" (fallthrough)
+
+ this which tricks the parser into taking the (silent) case statement
+ as a "small expression". it can then be spotted and used to indicate
+ "fall through" to the next case (in the parser)
+
+ also skips blank lines
+
+ bugs: any function that starts with the letters "case" or "default"
+ will be detected erroneously. fixing that involves doing a token
+ lexer which spots the fact that "case" and "default" are words,
+ separating them from space, colon, bracket etc.
+
+ http://bugs.libre-riscv.org/show_bug.cgi?id=280
+ """
+ res = []
+ prev_spc_count = None
+ for l in code.split("\n"):
+ spc_count = count_spaces(l)
+ nwhite = l[spc_count:]
+ if len(nwhite) == 0: # skip blank lines
+ res.append("")
+ continue
+ if nwhite.startswith("case") or nwhite.startswith("default"):
+ if (prev_spc_count is not None and
+ prev_spc_count == spc_count and
+ (res[-1].endswith(":") or res[-1].endswith(": fallthrough"))):
+ res[-1] += " fallthrough" # add to previous line
+ prev_spc_count = spc_count
+ else:
+ prev_spc_count = None
+ res.append(l)
+ return "\n".join(res)
+
+
+# Track the indentation level and emit the right INDENT / DEDENT events.
+def indentation_filter(tokens, filename):
+ # A stack of indentation levels; will never pop item 0
+ levels = [0]
+ token = None
+ depth = 0
+ prev_was_ws = False
+ for token in tokens:
+ # WS only occurs at the start of the line
+ # There may be WS followed by NEWLINE so
+ # only track the depth here. Don't indent/dedent
+ # until there's something real.
+ if token.type == "WS":
+ assert depth == 0
+ depth = len(token.value)
+ prev_was_ws = True
+ # WS tokens are never passed to the parser
+ continue
+
+ if token.type == "NEWLINE":
+ depth = 0
+ if prev_was_ws or token.at_line_start:
+ # ignore blank lines
+ continue
+ # pass the other cases on through
+ yield token
+ continue
+
+ # then it must be a real token (not WS, not NEWLINE)
+ # which can affect the indentation level
+
+ prev_was_ws = False
+ if token.must_indent:
+ # The current depth must be larger than the previous level
+ if not (depth > levels[-1]):
+ raise_syntax_error("expected an indented block",
+ filename, token.lexer.lineno,
+ token.lexer.lexpos, token.lexer.lexdata,
+ cls=IndentationError)
+
+ levels.append(depth)
+ yield INDENT(token.lineno)
+
+ elif token.at_line_start:
+ # Must be on the same level or one of the previous levels
+ if depth == levels[-1]:
+ # At the same level
+ pass
+ elif depth > levels[-1]:
+ raise_syntax_error("indent increase but not in new block",
+ filename, token.lexer.lineno,
+ token.lexer.lexpos, token.lexer.lexdata,
+ cls=IndentationError)
+ else:
+ # Back up; but only if it matches a previous level
+ try:
+ i = levels.index(depth)
+ except ValueError:
+ raise_syntax_error("inconsistent indentation",
+ filename, token.lexer.lineno,
+ token.lexer.lexpos, token.lexer.lexdata,
+ cls=IndentationError)
+ for _ in range(i+1, len(levels)):
+ yield DEDENT(token.lineno)
+ levels.pop()
+
+ yield token
+
+ ### Finished processing ###
+
+ # Must dedent any remaining levels
+ if len(levels) > 1:
+ assert token is not None
+ for _ in range(1, len(levels)):
+ yield DEDENT(token.lineno)
+
+
+# The top-level filter adds an ENDMARKER, if requested.
+# Python's grammar uses it.
+def filter(lexer, add_endmarker, filename):
+ token = None
+ tokens = iter(lexer.token, None)
+ tokens = python_colonify(lexer, tokens)
+ tokens = track_tokens_filter(lexer, tokens)
+ for token in indentation_filter(tokens, filename):
+ yield token
+
+ if add_endmarker:
+ lineno = 1
+ if token is not None:
+ lineno = token.lineno
+ yield _new_token("ENDMARKER", lineno)
+
+
+##### Lexer ######
+class Lexer:
+ tokens = (
+ "DEF",
+ "IF",
+ "THEN",
+ "ELSE",
+ "FOR",
+ "TO",
+ "DO",
+ "WHILE",
+ "BREAK",
+ "NAME",
+ "HEX", # hex numbers
+ "NUMBER", # Python decimals
+ "BINARY", # Python binary
+ "STRING", # single quoted strings only; syntax of raw strings
+ "LPAR",
+ "RPAR",
+ "LBRACK",
+ "RBRACK",
+ "COLON",
+ "EQ",
+ "ASSIGNEA",
+ "ASSIGN",
+ "LTU",
+ "GTU",
+ "NE",
+ "LE",
+ "LSHIFT",
+ "RSHIFT",
+ "GE",
+ "LT",
+ "GT",
+ "PLUS",
+ "MINUS",
+ "MULT",
+ "DIV",
+ "MOD",
+ "INVERT",
+ "APPEND",
+ "BITOR",
+ "BITAND",
+ "BITXOR",
+ "RETURN",
+ "SWITCH",
+ "CASE",
+ "DEFAULT",
+ "WS",
+ "NEWLINE",
+ "COMMA",
+ "QMARK",
+ "PERIOD",
+ "SEMICOLON",
+ "INDENT",
+ "DEDENT",
+ "ENDMARKER",
+ )
+
+ # Build the lexer
+ def build(self, **kwargs):
+ self.lexer = lex.lex(module=self, **kwargs)
+ self.filename = None
+
+ @lex.TOKEN(pc_ast.HexLiteral.__doc__)
+ def t_HEX(self, t):
+ t.value = pc_ast.HexLiteral(t.value)
+ return t
+
+ @lex.TOKEN(pc_ast.BinLiteral.__doc__)
+ def t_BINARY(self, t):
+ t.value = pc_ast.BinLiteral(t.value)
+ return t
+
+ @lex.TOKEN(pc_ast.DecLiteral.__doc__)
+ def t_NUMBER(self, t):
+ t.value = pc_ast.DecLiteral(t.value)
+ return t
+
+ @lex.TOKEN(pc_ast.StringLiteral.__doc__)
+ def t_STRING(self, t):
+ t.value = pc_ast.StringLiteral(t.value[1:-1])
+ return t
+
+ t_COLON = pc_ast.Colon.__doc__
+ t_EQ = pc_ast.Eq.__doc__
+ t_ASSIGNEA = pc_ast.AssignIEAOp.__doc__
+ t_ASSIGN = pc_ast.AssignOp.__doc__
+ t_LTU = pc_ast.LtU.__doc__
+ t_GTU = pc_ast.GtU.__doc__
+ t_NE = pc_ast.NotEq.__doc__
+ t_LE = pc_ast.Le.__doc__
+ t_GE = pc_ast.Ge.__doc__
+ t_LSHIFT = pc_ast.LShift.__doc__
+ t_RSHIFT = pc_ast.RShift.__doc__
+ t_LT = pc_ast.Lt.__doc__
+ t_GT = pc_ast.Gt.__doc__
+ t_PLUS = pc_ast.Add.__doc__
+ t_MINUS = pc_ast.Sub.__doc__
+ t_MULT = pc_ast.Mul.__doc__
+ t_DIV = pc_ast.Div.__doc__
+ t_MOD = pc_ast.Mod.__doc__
+ t_INVERT = pc_ast.Not.__doc__
+ t_COMMA = pc_ast.Comma.__doc__
+ t_PERIOD = pc_ast.Period.__doc__
+ t_SEMICOLON = pc_ast.Semicolon.__doc__
+ t_APPEND = pc_ast.BitConcat.__doc__
+ t_BITOR = pc_ast.BitOr.__doc__
+ t_BITAND = pc_ast.BitAnd.__doc__
+ t_BITXOR = pc_ast.BitXor.__doc__
+ t_QMARK = pc_ast.Question.__doc__
+
+ @lex.TOKEN(pc_ast.Symbol)
+ def t_NAME(self, t):
+ keywords = {
+ "def": ("DEF", pc_ast.FunctionKeyword),
+ "if": ("IF", pc_ast.IfKeyword),
+ "then": ("THEN", pc_ast.ThenKeyword),
+ "else": ("ELSE", pc_ast.ElseKeyword),
+ "leave": ("BREAK", pc_ast.LeaveKeyword),
+ "for": ("FOR", pc_ast.ForKeyword),
+ "to": ("TO", pc_ast.ToKeyword),
+ "while": ("WHILE", pc_ast.WhileKeyword),
+ "do": ("DO", pc_ast.DoKeyword),
+ "return": ("RETURN", pc_ast.ReturnKeyword),
+ "switch": ("SWITCH", pc_ast.SwitchKeyword),
+ "case": ("CASE", pc_ast.CaseKeyword),
+ "default": ("DEFAULT", pc_ast.DefaultKeyword),
+ }
+ (tt, tcls) = keywords.get(t.value, ("NAME", pc_ast.Symbol))
+ t.type = tt
+ t.value = tcls(t.value)
+ return t
+
+ # Putting this before t_WS let it consume lines with only comments in
+ # them so the latter code never sees the WS part. Not consuming the
+ # newline. Needed for "if 1: #comment"
+ @lex.TOKEN(pc_ast.Comment.__doc__)
+ def t_comment(self, t):
+ return None
+
+ # Whitespace
+ @lex.TOKEN(pc_ast.Whitespace.__doc__)
+ def t_WS(self, t):
+ if (t.lexer.at_line_start and
+ t.lexer.paren_count == 0 and \
+ t.lexer.brack_count == 0):
+ return t
+
+ # Don't generate newline tokens when inside of parenthesis, eg
+ # a = (1,
+ # 2, 3)
+ @lex.TOKEN(pc_ast.Linebreak.__doc__)
+ def t_newline(self, t):
+ t.lexer.lineno += len(t.value)
+ t.value = pc_ast.Linebreak(t.value)
+ t.type = "NEWLINE"
+ if t.lexer.paren_count == 0 and t.lexer.brack_count == 0:
+ return t
+
+ @lex.TOKEN(pc_ast.LBracket.__doc__)
+ def t_LBRACK(self, t):
+ t.lexer.brack_count += 1
+ t.value = pc_ast.LBracket(t.value)
+ return t
+
+ @lex.TOKEN(pc_ast.RBracket.__doc__)
+ def t_RBRACK(self, t):
+ t.lexer.brack_count -= 1
+ t.value = pc_ast.RBracket(t.value)
+ return t
+
+ @lex.TOKEN(pc_ast.LParenthesis.__doc__)
+ def t_LPAR(self, t):
+ t.lexer.paren_count += 1
+ t.value = pc_ast.LParenthesis(t.value)
+ return t
+
+ @lex.TOKEN(pc_ast.RParenthesis.__doc__)
+ def t_RPAR(self, t):
+ t.lexer.paren_count -= 1
+ t.value = pc_ast.RParenthesis(t.value)
+ return t
+
+ def t_error(self, t):
+ raise_syntax_error("Unknown symbol %r" % (t.value[0],),
+ self.filename, t.lexer.lineno,
+ t.lexer.lexpos, t.lexer.lexdata)
+ t.lexer.skip(1)
+
+
+# Combine Ply and my filters into a new lexer
+
+class IndentLexer(Lexer):
+ def __init__(self, debug=False, optimize=False, lextab="lextab"):
+ self.debug = debug
+ self.build(debug=debug, optimize=optimize, lextab=lextab)
+ self.token_stream = None
+
+ def input(self, s, add_endmarker=True):
+ s = annoying_case_hack_filter(s)
+ s += "\n"
+ self.lexer.paren_count = 0
+ self.lexer.brack_count = 0
+ self.lexer.lineno = 1
+ self.lexer.input(s)
+ self.token_stream = filter(self.lexer, add_endmarker, self.filename)
+
+ def token(self):
+ # The simplest way to convert "simple" tokens to classes.
+ # Functions won't work due to ply reliability on __code__.
+ # We end up with (LT+MINUS) instead of ASSIGN otherwise.
+ mapping = {
+ "COLON": pc_ast.Colon,
+ "EQ": pc_ast.Eq,
+ "ASSIGNEA": pc_ast.AssignIEAOp,
+ "ASSIGN": pc_ast.AssignOp,
+ "LTU": pc_ast.LtU,
+ "GTU": pc_ast.GtU,
+ "NE": pc_ast.NotEq,
+ "LE": pc_ast.Le,
+ "GE": pc_ast.Ge,
+ "LSHIFT": pc_ast.LShift,
+ "RSHIFT": pc_ast.RShift,
+ "LT": pc_ast.Lt,
+ "GT": pc_ast.Gt,
+ "PLUS": pc_ast.Add,
+ "MINUS": pc_ast.Sub,
+ "MULT": pc_ast.Mul,
+ "DIV": pc_ast.Div,
+ "MOD": pc_ast.Mod,
+ "INVERT": pc_ast.Not,
+ "COMMA": pc_ast.Comma,
+ "PERIOD": pc_ast.Period,
+ "SEMICOLON": pc_ast.Semicolon,
+ "APPEND": pc_ast.BitConcat,
+ "BITOR": pc_ast.BitOr,
+ "BITAND": pc_ast.BitAnd,
+ "BITXOR": pc_ast.BitXor,
+ "QMARK": pc_ast.Question,
+ }
+ try:
+ t = next(self.token_stream)
+ if t is not None:
+ if t.type in mapping:
+ t.value = mapping[t.type](t.value)
+ return t
+ except StopIteration:
+ return None
--- /dev/null
+import itertools
+
+from ply import yacc
+
+import openpower.oppc.pc_ast as pc_ast
+
+
+class SyntaxError2(Exception):
+ """
+ Class used to raise a syntax error but get ply to stop eating errors
+ since it catches and discards SyntaxError after setting a flag.
+ """
+
+ def __init__(self, *args, cls=SyntaxError):
+ super().__init__(*args)
+ self.cls = cls
+
+ def __repr__(self):
+ return repr(self.cls(*self.args))
+
+ def __str__(self):
+ return str(self.cls(*self.args))
+
+ def raise_syntax_error(self):
+ raise self.cls(*self.args) from self
+
+
+def raise_syntax_error(msg,
+ filename, lineno, lexpos, data,
+ cls=SyntaxError):
+ line_start = data.rfind('\n', 0, lexpos) + 1
+ line_end = data.find('\n', line_start)
+ col = (lexpos - line_start) + 1
+
+ raise SyntaxError2(str(msg),
+ (filename, lineno, col, data[line_start:line_end]),
+ cls=cls)
+
+
+binary_ops = {
+ "^": pc_ast.BitXor,
+ "&": pc_ast.BitAnd,
+ "|": pc_ast.BitOr,
+ "+": pc_ast.Add,
+ "-": pc_ast.Sub,
+ "<<": pc_ast.LShift,
+ ">>": pc_ast.RShift,
+ "*": pc_ast.Mul,
+ "/": pc_ast.Div,
+ "%": pc_ast.Mod,
+ "<=": pc_ast.Le,
+ ">=": pc_ast.Ge,
+ "<": pc_ast.Lt,
+ ">": pc_ast.Gt,
+ "=": pc_ast.Eq,
+ "!=": pc_ast.NotEq,
+}
+unary_ops = {
+ "+": pc_ast.Plus,
+ "-": pc_ast.Minus,
+ "¬": pc_ast.Not,
+}
+
+
+class Parser:
+ REGS = {}
+ REGS.update(map(lambda reg: (reg, pc_ast.GPR), pc_ast.GPR))
+ REGS.update(map(lambda reg: (reg, pc_ast.FPR), pc_ast.FPR))
+ REGS.update(map(lambda reg: (reg, pc_ast.CR3), pc_ast.CR3))
+ REGS.update(map(lambda reg: (reg, pc_ast.CR5), pc_ast.CR5))
+ REGS.update(map(lambda reg: (reg, pc_ast.XER), pc_ast.XER))
+
+ def __init__(self, lexer, debug=False, optimize=False, write_tables=True):
+ ignore = lambda token: token in ("WS", "THEN")
+ self.tokens = tuple(itertools.filterfalse(ignore, lexer.tokens))
+
+ self.__lexer = lexer
+ self.__parser = yacc.yacc(
+ module=self,
+ start="file_input_end",
+ debug=debug,
+ optimize=optimize,
+ write_tables=write_tables,
+ tabmodule="yacctab")
+
+ return super().__init__()
+
+ precedence = (
+ ("left", "EQ", "NE", "GT", "LT", "LE", "GE", "LTU", "GTU"),
+ ("left", "BITOR"),
+ ("left", "BITXOR"),
+ ("left", "BITAND"),
+ ("left", "LSHIFT", "RSHIFT"),
+ ("left", "PLUS", "MINUS"),
+ ("left", "MULT", "DIV", "MOD"),
+ ("left", "INVERT"),
+ )
+
+ def p_file_input_end(self, p):
+ """
+ file_input_end : file_input ENDMARKER
+ """
+ p[0] = p[1]
+
+ def p_file_input(self, p):
+ """
+ file_input : file_input NEWLINE
+ | file_input stmt
+ | NEWLINE
+ | stmt
+ """
+ if isinstance(p[len(p)-1], pc_ast.Linebreak):
+ if len(p) == 3:
+ p[0] = p[1]
+ else:
+ p[0] = pc_ast.Scope()
+ else:
+ if len(p) == 3:
+ stmt = p[2]
+ if not isinstance(stmt, pc_ast.Scope):
+ stmt = pc_ast.Scope([stmt])
+ p[0] = pc_ast.Scope(p[1] + stmt)
+ else:
+ p[0] = p[1]
+
+ # funcdef: [decorators] 'def' NAME parameters ':' suite
+ # ignoring decorators
+ def p_funcdef(self, p):
+ """
+ funcdef : DEF NAME parameters COLON suite
+ """
+ raise NotImplementedError()
+
+ # parameters: '(' [varargslist] ')'
+ def p_parameters(self, p):
+ """
+ parameters : LPAR RPAR
+ | LPAR varargslist RPAR
+ """
+ raise NotImplementedError()
+
+ # varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' '**' NAME] |
+ # '**' NAME) |
+ # highly simplified
+
+ def p_varargslist(self, p):
+ """
+ varargslist : varargslist COMMA NAME
+ | NAME
+ """
+ raise NotImplementedError()
+
+ # stmt: simple_stmt | compound_stmt
+ def p_stmt_simple(self, p):
+ """
+ stmt : simple_stmt
+ """
+ # simple_stmt is a list
+ p[0] = p[1]
+
+ def p_stmt_compound(self, p):
+ """
+ stmt : compound_stmt
+ """
+ p[0] = pc_ast.Scope([p[1]])
+
+ # simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
+ def p_simple_stmt(self, p):
+ """
+ simple_stmt : small_stmts NEWLINE
+ | small_stmts SEMICOLON NEWLINE
+ """
+ p[0] = p[1]
+
+ def p_small_stmts(self, p):
+ """
+ small_stmts : small_stmts SEMICOLON small_stmt
+ | small_stmt
+ """
+ if len(p) == 4:
+ p[0] = pc_ast.Scope(p[1] + (p[3],))
+ else:
+ p[0] = pc_ast.Scope([p[1]])
+
+ # small_stmt: expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt |
+ # import_stmt | global_stmt | exec_stmt | assert_stmt
+ def p_small_stmt(self, p):
+ """
+ small_stmt : flow_stmt
+ | break_stmt
+ | expr_stmt
+ """
+ p[0] = p[1]
+
+ # expr_stmt: testlist (augassign (yield_expr|testlist) |
+ # ('=' (yield_expr|testlist))*)
+ # augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
+ # '<<=' | '>>=' | '**=' | '//=')
+ def p_expr_stmt(self, p):
+ """
+ expr_stmt : testlist ASSIGNEA testlist
+ | testlist ASSIGN testlist
+ | testlist
+ """
+ if len(p) == 2:
+ p[0] = p[1]
+ else:
+ (lvalue, rvalue) = (p[1], p[3])
+ if isinstance(p[2], pc_ast.AssignOp):
+ cls = pc_ast.Assign
+ else:
+ cls = pc_ast.AssignIEA
+ if (isinstance(lvalue, pc_ast.Symbol) and
+ (str(lvalue) in self.__class__.REGS)):
+ lvalue = self.__class__.REGS[str(lvalue)](lvalue)
+ p[0] = cls(lvalue=lvalue, rvalue=rvalue)
+
+ def p_flow_stmt(self, p):
+ "flow_stmt : return_stmt"
+ p[0] = p[1]
+
+ # return_stmt: 'return' [testlist]
+ def p_return_stmt(self, p):
+ "return_stmt : RETURN testlist"
+ p[0] = pc_ast.Return(p[2])
+
+ def p_compound_stmt(self, p):
+ """
+ compound_stmt : if_stmt
+ | while_stmt
+ | switch_stmt
+ | for_stmt
+ | funcdef
+ """
+ p[0] = p[1]
+
+ def p_break_stmt(self, p):
+ """
+ break_stmt : BREAK
+ """
+ p[0] = p[1]
+
+ def p_for_stmt(self, p):
+ """
+ for_stmt : FOR atom EQ comparison TO comparison COLON suite
+ | DO atom EQ comparison TO comparison COLON suite
+ """
+ p[0] = pc_ast.ForExpr(subject=p[2], start=p[4], end=p[6], body=p[8])
+
+ def p_while_stmt(self, p):
+ """
+ while_stmt : DO WHILE test COLON suite ELSE COLON suite
+ | DO WHILE test COLON suite
+ """
+ if len(p) == 9:
+ p[0] = pc_ast.WhileExpr(test=p[3], body=p[5], orelse=p[8])
+ else:
+ p[0] = pc_ast.WhileExpr(test=p[3], body=p[5], orelse=pc_ast.Scope())
+
+ def p_switch_smt(self, p):
+ """
+ switch_stmt : SWITCH LPAR atom RPAR COLON NEWLINE INDENT cases DEDENT
+ """
+ p[0] = pc_ast.SwitchExpr(subject=p[3], cases=p[8])
+
+ def p_cases(self, p):
+ """
+ cases : switch_list switch_default
+ | switch_default
+ """
+ if len(p) == 3:
+ p[0] = pc_ast.Cases(p[1] + (p[2],))
+ else:
+ p[0] = pc_ast.Cases([p[1]])
+
+ def p_switch_list(self, p):
+ """
+ switch_list : switch_case switch_list
+ | switch_case
+ """
+ if len(p) == 3:
+ p[0] = pc_ast.Sequence((p[1],) + p[2])
+ else:
+ p[0] = pc_ast.Sequence([p[1]])
+
+ def p_switch_case(self, p):
+ """
+ switch_case : CASE LPAR labels RPAR COLON suite
+ """
+ p[0] = pc_ast.Case(labels=p[3], body=p[6])
+
+ def p_switch_default(self, p):
+ """
+ switch_default : DEFAULT COLON suite
+ """
+ p[0] = pc_ast.Case(body=p[3],
+ labels=pc_ast.Labels([pc_ast.DefaultLabel()]))
+
+ def p_labels(self, p):
+ """
+ labels : atom COMMA labels
+ | atom
+ """
+ if not isinstance(p[1], pc_ast.IntLiteral):
+ raise_syntax_error(str(p),
+ self.filename, p.lineno, p.lexpos,
+ self.input_text)
+ label = pc_ast.Label(str(p[1]))
+ if len(p) == 4:
+ p[0] = pc_ast.Labels((label,) + p[3])
+ else:
+ p[0] = pc_ast.Labels([label])
+
+ def p_if_stmt(self, p):
+ """
+ if_stmt : IF test COLON suite ELSE COLON if_stmt
+ | IF test COLON suite ELSE COLON suite
+ | IF test COLON suite
+ """
+ (test, body) = (p[2], p[4])
+ if len(p) == 8:
+ orelse = p[7]
+ else:
+ orelse = pc_ast.Scope()
+ if not isinstance(body, pc_ast.Scope):
+ body = pc_ast.Scope([body])
+ if not isinstance(orelse, pc_ast.Scope):
+ orelse = pc_ast.Scope([orelse])
+ p[0] = pc_ast.IfExpr(test=test,
+ body=body, orelse=orelse)
+
+ def p_suite(self, p):
+ """
+ suite : simple_stmt
+ | NEWLINE INDENT stmts DEDENT
+ """
+ if len(p) == 2:
+ p[0] = p[1]
+ else:
+ p[0] = p[3]
+
+ def p_stmts(self, p):
+ """
+ stmts : stmts stmt
+ | stmt
+ """
+ if len(p) == 3:
+ p[0] = pc_ast.Scope(p[1] + p[2])
+ else:
+ p[0] = p[1]
+
+ def p_comparison(self, p):
+ """
+ comparison : comparison PLUS comparison
+ | comparison MINUS comparison
+ | comparison MULT comparison
+ | comparison LSHIFT comparison
+ | comparison RSHIFT comparison
+ | comparison DIV comparison
+ | comparison MOD comparison
+ | comparison EQ comparison
+ | comparison NE comparison
+ | comparison LE comparison
+ | comparison GE comparison
+ | comparison LTU comparison
+ | comparison GTU comparison
+ | comparison LT comparison
+ | comparison GT comparison
+ | comparison BITOR comparison
+ | comparison BITXOR comparison
+ | comparison BITAND comparison
+ | PLUS comparison
+ | MINUS comparison
+ | INVERT comparison
+ | comparison APPEND comparison
+ | power
+ """
+ if len(p) == 4:
+ def reg0(left, op, right):
+ if (isinstance(left, (pc_ast.GPR, pc_ast.FPR)) and
+ isinstance(op, pc_ast.BitOr) and
+ (isinstance(right, pc_ast.DecLiteral) and (str(right) == "0"))):
+ if isinstance(left, pc_ast.GPR):
+ return pc_ast.GPR(f"{str(left)}0")
+ else:
+ return pc_ast.FPR(f"{str(left)}0")
+ return None
+
+ def repeat(left, op, right):
+ if (isinstance(left, pc_ast.Sequence) and
+ (len(left) == 1) and
+ isinstance(op, pc_ast.Mul)):
+ return pc_ast.RepeatExpr(subject=left[0], times=right)
+ return None
+
+ (left, op, right) = p[1:]
+ for hook in (reg0, repeat):
+ p[0] = hook(left, op, right)
+ if p[0] is not None:
+ break
+ else:
+ p[0] = pc_ast.BinaryExpr(left=left, op=op, right=right)
+
+ elif len(p) == 3:
+ (op, value) = p[1:]
+ p[0] = pc_ast.UnaryExpr(op=op, value=value)
+ else:
+ p[0] = p[1]
+
+ # power: atom trailer* ['**' factor]
+ # trailers enables function calls (and subscripts).
+ # so this is 'trailerlist'
+ def p_power(self, p):
+ """
+ power : atom
+ | atom trailerlist
+ """
+ if len(p) == 2:
+ p[0] = p[1]
+ else:
+ attribute_or_subscript = (
+ pc_ast.Attribute,
+ pc_ast.Subscript,
+ pc_ast.RangeSubscript,
+ )
+ if isinstance(p[2], attribute_or_subscript):
+ node = p[2]
+ while isinstance(node.subject, attribute_or_subscript):
+ node = node.subject
+ if isinstance(node.subject, pc_ast.Arguments):
+ node.subject = pc_ast.Call(name=p[1], args=node.subject)
+ else:
+ node.subject = p[1]
+ p[0] = p[2]
+ elif isinstance(p[2], pc_ast.Arguments):
+ p[0] = pc_ast.Call(name=p[1], args=p[2])
+ else:
+ raise NotImplementedError()
+
+ def p_atom_name(self, p):
+ """
+ atom : NAME
+ """
+ p[0] = p[1]
+
+ def p_atom_number(self, p):
+ """
+ atom : BINARY
+ | NUMBER
+ | HEX
+ | STRING
+ """
+ p[0] = p[1]
+
+ # '[' [listmaker] ']' |
+ def p_atom_listmaker(self, p):
+ """
+ atom : LBRACK listmaker RBRACK
+ """
+ p[0] = p[2]
+
+ def p_listmaker(self, p):
+ """
+ listmaker : test COMMA listmaker
+ | test
+ """
+ if len(p) == 2:
+ p[0] = pc_ast.Sequence([p[1]])
+ else:
+ p[0] = pc_ast.Sequence((p[0],) + p[1])
+
+ def p_atom_tuple(self, p):
+ """
+ atom : LPAR testlist RPAR
+ """
+ value = p[2]
+ if (isinstance(value, pc_ast.Symbol) and
+ (str(value) in self.__class__.REGS)):
+ value = self.__class__.REGS[str(value)](value)
+ p[0] = value
+
+ def p_trailerlist(self, p):
+ """
+ trailerlist : trailer trailerlist
+ | trailer
+ """
+ if len(p) == 2:
+ p[0] = p[1]
+ else:
+ attribute_or_subscript = (
+ pc_ast.Attribute,
+ pc_ast.Subscript,
+ pc_ast.RangeSubscript,
+ )
+ if isinstance(p[2], attribute_or_subscript):
+ node = p[2]
+ while isinstance(node.subject, attribute_or_subscript):
+ node = node.subject
+ node.subject = p[1]
+ p[0] = p[2]
+ else:
+ p[0] = pc_ast.Sequence(p[1] + (p[2],))
+
+ # trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
+ def p_trailer(self, p):
+ """
+ trailer : trailer_arglist
+ | trailer_subscript
+ | trailer_attr
+ """
+ p[0] = p[1]
+
+ def p_trailer_arglist(self, p):
+ """
+ trailer_arglist : LPAR arglist RPAR
+ | LPAR RPAR
+ """
+ if len(p) == 3:
+ p[0] = pc_ast.Arguments()
+ else:
+ p[0] = p[2]
+
+ def p_trailer_subscript(self, p):
+ """
+ trailer_subscript : LBRACK subscript RBRACK
+ """
+ p[0] = p[2]
+
+ def p_trailer_attr(self, p):
+ """
+ trailer_attr : PERIOD NAME
+ """
+ p[0] = pc_ast.Attribute(name=p[2])
+
+ # subscript: '.' '.' '.' | test | [test] ':' [test]
+ def p_subscript(self, p):
+ """subscript : test COLON test
+ | test
+ """
+ if len(p) == 4:
+ p[0] = pc_ast.RangeSubscript(start=p[1], end=p[3])
+ else:
+ p[0] = pc_ast.Subscript(index=p[1])
+
+ # testlist: test (',' test)* [',']
+ # Contains shift/reduce error
+ def p_testlist(self, p):
+ """
+ testlist : testlist_multi COMMA
+ | testlist_multi
+ """
+ if len(p) == 2:
+ p[0] = p[1]
+ else:
+ if isinstance(p[1], pc_ast.Sequence):
+ p[0] = p[1]
+ else:
+ p[0] = pc_ast.Sequence([p[1]])
+
+ def p_testlist_multi(self, p):
+ """
+ testlist_multi : testlist_multi COMMA test
+ | test
+ """
+ if len(p) == 2:
+ p[0] = p[1]
+ else:
+ if isinstance(p[1], pc_ast.Sequence):
+ p[0] = pc_ast.Sequence(p[1] + (p[3],))
+ else:
+ p[0] = pc_ast.Sequence([p[1], p[3]])
+
+ # test: or_test ['if' or_test 'else' test] | lambdef
+ # as I don't support 'and', 'or', and 'not' this works down to 'comparison'
+ def p_test(self, p):
+ """
+ test : comparison
+ | comparison QMARK test COLON test
+ """
+ if len(p) == 2:
+ p[0] = p[1]
+ else:
+ p[0] = pc_ast.IfExpr(test=p[1],
+ body=pc_ast.Scope([p[3]]),
+ orelse=pc_ast.Scope([p[5]]))
+
+ # arglist: (argument ',')* (argument [',']| '*' test [',' '**' test]
+ # | '**' test)
+ # XXX INCOMPLETE: this doesn't allow the trailing comma
+ def p_arglist(self, p):
+ """
+ arglist : arglist COMMA argument
+ | argument
+ """
+ if len(p) == 4:
+ p[0] = pc_ast.Arguments(p[1] + (p[3],))
+ else:
+ p[0] = pc_ast.Arguments([p[1]])
+
+ # argument: test [gen_for] | test '=' test # Really [keyword '='] test
+ def p_argument(self, p):
+ """
+ argument : test
+ """
+ p[0] = p[1]
+
+ def p_error(self, p):
+ raise_syntax_error(str(p.value),
+ self.filename, p.lineno, p.lexpos,
+ self.input_text)
+
+ def parse(self, code, filename=None, debug=False):
+ self.filename = filename
+ self.input_text = code
+ return self.__parser.parse(lexer=self.__lexer, debug=debug, input=code)
--- /dev/null
+import collections
+import contextlib
+import functools
+
+import mdis.dispatcher
+import mdis.visitor
+import mdis.walker
+
+import openpower.oppc.pc_ast as pc_ast
+
+
+class Code(list):
+ def __init__(self):
+ self.__level = 0
+ return super().__init__()
+
+ def __enter__(self):
+ self.__level += 1
+ return self
+
+ def __exit__(self, exc_type, exc_value, exc_traceback):
+ self.__level -= 1
+
+ def __str__(self):
+ if len(self) == 0:
+ raise ValueError("empty code")
+
+ lines = []
+ for (level, stmt) in self:
+ line = ((" " * level * 4) + stmt)
+ lines.append(line)
+
+ return "\n".join(lines)
+
+ def emit(self, stmt, level=0):
+ item = ((level + self.__level), stmt)
+ self.append(item)
+
+
+class Hook(mdis.dispatcher.Hook):
+ def __call__(self, call):
+ hook = super().__call__(call)
+
+ class ConcreteHook(hook.__class__):
+ @functools.wraps(hook.__call__)
+ @contextlib.contextmanager
+ def __call__(self, dispatcher, node, *args, **kwargs):
+ return hook(dispatcher, node, *args, **kwargs)
+
+ return ConcreteHook(*tuple(self))
+
+
+class PseudocodeVisitor(mdis.visitor.ContextVisitor):
+ def __init__(self, root):
+ self.__root = root
+ self.__code = collections.defaultdict(lambda: Code())
+
+ return super().__init__()
+
+ def __iter__(self):
+ yield from self.__code.items()
+
+ def __getitem__(self, node):
+ return self.__code[node]
+
+ @Hook(pc_ast.Scope)
+ def Scope(self, node):
+ yield node
+ if node is not self.__root:
+ with self[node]:
+ for subnode in node:
+ for (level, stmt) in self[subnode]:
+ self[node].emit(stmt=stmt, level=level)
+ else:
+ for subnode in node:
+ for (level, stmt) in self[subnode]:
+ self[node].emit(stmt=stmt, level=level)
+
+ @Hook(pc_ast.Call)
+ def Call(self, node):
+ yield node
+ args = []
+ for subnode in node.args:
+ for (level, stmt) in self[subnode]:
+ assert level == 0
+ args.append(stmt)
+ args = ", ".join(args)
+ stmt = f"{node.name}({args})"
+ self[node].emit(stmt=stmt)
+
+ @Hook(pc_ast.Assign, pc_ast.AssignIEA)
+ def Assign(self, node):
+ mapping = {
+ pc_ast.Assign: "<-",
+ pc_ast.AssignIEA: "<-iea",
+ }
+ yield node
+ lvalue = str(self[node.lvalue])
+ if (isinstance(node.lvalue, (pc_ast.GPR, pc_ast.FPR)) or
+ (isinstance(node.lvalue, (pc_ast.Subscript, pc_ast.RangeSubscript)) and
+ isinstance(node.lvalue.subject, (pc_ast.GPR, pc_ast.FPR)))):
+ lvalue = lvalue.replace("(", "").replace(")", "")
+ rvalue = str(self[node.rvalue])
+
+ if isinstance(node.rvalue, pc_ast.IfExpr):
+ # All right, this deserves an explanation.
+ # We basically convert T <- C ? A : B into this code:
+ #
+ # if C then
+ # T <- A
+ # else
+ # T <- B
+ #
+ # To make things work, we must ensure that objects are unique.
+ # Otherwise we'll reuse the bogus code already produced before.
+ (body, orelse) = map(lambda node: node.clone(),
+ (node.rvalue.body[0], node.rvalue.orelse[0]))
+ body = pc_ast.Scope([node.__class__(lvalue=node.lvalue.clone(), rvalue=body)])
+ orelse = pc_ast.Scope([node.__class__(lvalue=node.lvalue.clone(), rvalue=orelse)])
+ tmpnode = node.rvalue.clone(body=body, orelse=orelse)
+ walker = mdis.walker.Walker()
+ traverse(root=tmpnode, visitor=self, walker=walker)
+ for (level, stmt) in self[tmpnode]:
+ self[node].emit(stmt=stmt, level=level)
+ else:
+ stmt = " ".join([
+ lvalue,
+ mapping[node.__class__],
+ rvalue,
+ ])
+ self[node].emit(stmt=stmt)
+
+ @Hook(pc_ast.BinaryExpr)
+ def BinaryExpr(self, node):
+ yield node
+ stmt = " ".join([
+ str(self[node.left]),
+ str(self[node.op]),
+ str(self[node.right]),
+ ])
+ self[node].emit(stmt=f"({stmt})")
+
+ @Hook(pc_ast.IfExpr)
+ def IfExpr(self, node):
+ yield node
+ stmt = " ".join([
+ "if",
+ str(self[node.test]),
+ "then",
+ ])
+ self[node].emit(stmt=stmt)
+ for (level, stmt) in self[node.body]:
+ self[node].emit(stmt=stmt, level=level)
+ if node.orelse:
+ self[node].emit("else")
+ for (level, stmt) in self[node.orelse]:
+ self[node].emit(stmt=stmt, level=level)
+
+ @Hook(pc_ast.ForExpr)
+ def ForExpr(self, node):
+ yield node
+ stmt = " ".join([
+ "for",
+ str(self[node.subject]),
+ "=",
+ str(self[node.start]),
+ "to",
+ str(self[node.end]),
+ ])
+ self[node].emit(stmt=stmt)
+ for (level, stmt) in self[node.body]:
+ self[node].emit(stmt=stmt, level=level)
+
+ @Hook(pc_ast.WhileExpr)
+ def WhileExpr(self, node):
+ yield node
+ stmt = " ".join([
+ "do",
+ "while",
+ str(self[node.test]),
+ ])
+ self[node].emit(stmt=stmt)
+ for (level, stmt) in self[node.body]:
+ self[node].emit(stmt=stmt, level=level)
+ if node.orelse:
+ self[node].emit("else")
+ for (level, stmt) in self[node.orelse]:
+ self[node].emit(stmt=stmt, level=level)
+
+ @Hook(pc_ast.RepeatExpr)
+ def RepeatExpr(self, node):
+ yield node
+ stmt = " ".join([
+ f"[{str(self[node.subject])}]",
+ "*",
+ str(self[node.times]),
+ ])
+ self[node].emit(stmt=f"({stmt})")
+
+ @Hook(pc_ast.SwitchExpr)
+ def SwitchExpr(self, node):
+ yield node
+ self[node].emit(f"switch({str(self[node.subject])})")
+ with self[node]:
+ for (level, stmt) in self[node.cases]:
+ self[node].emit(stmt=stmt, level=level)
+
+ @Hook(pc_ast.Cases)
+ def Cases(self, node):
+ yield node
+ for subnode in node:
+ for (level, stmt) in self[subnode]:
+ self[node].emit(stmt=stmt, level=level)
+
+ @Hook(pc_ast.Case)
+ def Case(self, node):
+ yield node
+ for (level, stmt) in self[node.labels]:
+ self[node].emit(stmt=stmt, level=level)
+ for (level, stmt) in self[node.body]:
+ self[node].emit(stmt=stmt, level=level)
+
+ @Hook(pc_ast.Labels)
+ def Labels(self, node):
+ yield node
+ if ((len(node) == 1) and isinstance(node[-1], pc_ast.DefaultLabel)):
+ stmt = "default:"
+ else:
+ labels = ", ".join(map(lambda label: str(self[label]), node))
+ stmt = f"case ({labels}):"
+ self[node].emit(stmt=stmt)
+
+ @Hook(pc_ast.Label)
+ def Label(self, node):
+ yield node
+ self[node].emit(stmt=str(node))
+
+ @Hook(pc_ast.DefaultLabel)
+ def DefaultLabel(self, node):
+ yield node
+ self[node].emit(stmt="default:")
+
+ @Hook(pc_ast.UnaryExpr)
+ def UnaryExpr(self, node):
+ yield node
+ stmt = "".join([
+ str(self[node.op]),
+ f"({str(self[node.value])})",
+ ])
+ self[node].emit(stmt=stmt)
+
+ @Hook(pc_ast.BinLiteral, pc_ast.DecLiteral, pc_ast.HexLiteral)
+ def Integer(self, node):
+ yield node
+ self[node].emit(stmt=str(node))
+
+ @Hook(pc_ast.StringLiteral)
+ def StringLiteral(self, node):
+ yield node
+ self[node].emit(stmt=f"'{str(node)}'")
+
+ @Hook(pc_ast.Symbol)
+ def Symbol(self, node):
+ yield node
+ self[node].emit(stmt=str(node))
+
+ @Hook(pc_ast.Attribute)
+ def Attribute(self, node):
+ yield node
+ stmt = ".".join([
+ str(self[node.subject]),
+ str(self[node.name]),
+ ])
+ self[node].emit(stmt=stmt)
+
+ @Hook(pc_ast.Not, pc_ast.Add, pc_ast.Sub,
+ pc_ast.Mul, pc_ast.MulS, pc_ast.MulU,
+ pc_ast.Div, pc_ast.DivT, pc_ast.Mod,
+ pc_ast.Sqrt,
+ pc_ast.Eq, pc_ast.NotEq,
+ pc_ast.Lt, pc_ast.Le, pc_ast.LtU,
+ pc_ast.Gt, pc_ast.Ge, pc_ast.GtU,
+ pc_ast.LShift, pc_ast.RShift,
+ pc_ast.AssignOp, pc_ast.AssignIEAOp,
+ pc_ast.BitAnd, pc_ast.BitOr, pc_ast.BitXor,
+ pc_ast.BitConcat)
+ def Op(self, node):
+ yield node
+ mapping = {
+ pc_ast.Not: "¬",
+ pc_ast.Add: "+",
+ pc_ast.Sub: "-",
+ pc_ast.Mul: "*",
+ pc_ast.MulS: "*si",
+ pc_ast.MulU: "*ui",
+ pc_ast.Div: "/",
+ pc_ast.DivT: "÷",
+ pc_ast.Mod: "%",
+ pc_ast.Sqrt: "√",
+ pc_ast.Eq: "=",
+ pc_ast.NotEq: "!=",
+ pc_ast.Lt: "<",
+ pc_ast.Le: "<=",
+ pc_ast.LtU: "<u",
+ pc_ast.Gt: ">",
+ pc_ast.Ge: ">=",
+ pc_ast.GtU: ">u",
+ pc_ast.LShift: "<<",
+ pc_ast.RShift: ">>",
+ pc_ast.AssignOp: "<-",
+ pc_ast.AssignIEAOp: "<-iea",
+ pc_ast.BitAnd: "&",
+ pc_ast.BitOr: "|",
+ pc_ast.BitXor: "^",
+ pc_ast.BitConcat: "||",
+ }
+ stmt = mapping[node.__class__]
+ self[node].emit(stmt=stmt)
+
+ @Hook(pc_ast.LParenthesis, pc_ast.RParenthesis,
+ pc_ast.LBracket, pc_ast.RBracket)
+ def BracketOrParenthesis(self, node):
+ yield node
+ mapping = {
+ pc_ast.LParenthesis: "(",
+ pc_ast.RParenthesis: ")",
+ pc_ast.LBracket: "[",
+ pc_ast.RBracket: "]",
+ }
+ stmt = mapping[node.__class__]
+ self[node].emit(stmt=stmt)
+
+ @Hook(pc_ast.Subscript)
+ def Subscript(self, node):
+ yield node
+ stmt = "".join([
+ str(self[node.subject]),
+ "[",
+ str(self[node.index]),
+ "]",
+ ])
+ self[node].emit(stmt=stmt)
+
+ @Hook(pc_ast.RangeSubscript)
+ def RangeSubscript(self, node):
+ yield node
+ stmt = "".join([
+ str(self[node.subject]),
+ "[",
+ str(self[node.start]),
+ ":",
+ str(self[node.end]),
+ "]",
+ ])
+ self[node].emit(stmt=stmt)
+
+ @Hook(pc_ast.Colon)
+ def Colon(self, node):
+ yield node
+ self[node].emit(stmt=":")
+
+ @Hook(pc_ast.Linebreak, pc_ast.Endmarker)
+ def Ignore(self, node):
+ yield node
+
+ @Hook(pc_ast.Keyword)
+ def Keyword(self, node):
+ yield node
+ self[node].emit(stmt=node.__doc__)
+
+ @Hook(pc_ast.Sequence)
+ def Sequence(self, node):
+ yield node
+ stmt = ",".join(map(lambda subnode: str(self[subnode]), node))
+ self[node].emit(stmt=f"({stmt})")
+
+ @Hook(pc_ast.Literal)
+ def Literal(self, node):
+ yield node
+ self[node].emit(stmt=str(node))
+
+ @Hook(pc_ast.GPR, pc_ast.FPR)
+ def Reg(self, node):
+ yield node
+ if node.endswith("0"):
+ self[node].emit(stmt=f"({str(node)[:-1]}|0)")
+ else:
+ self[node].emit(stmt=f"({str(node)})")
+
+ @Hook(pc_ast.Node)
+ def Node(self, node):
+ raise NotImplementedError(type(node))
+
+
+def traverse(root, visitor, walker):
+ with visitor(root):
+ for node in walker(root):
+ traverse(root=node, visitor=visitor, walker=walker)
+
+
+def pseudocode(root):
+ walker = mdis.walker.Walker()
+ visitor = PseudocodeVisitor(root=root)
+ traverse(root=root, visitor=visitor, walker=walker)
+ for (level, stmt) in visitor[root]:
+ yield (level, stmt)