oppc: introduce openpower pseudocode
authorDmitry Selyutin <ghostmansd@gmail.com>
Sun, 7 Jan 2024 22:57:51 +0000 (01:57 +0300)
committerDmitry Selyutin <ghostmansd@gmail.com>
Sun, 7 Jan 2024 22:57:51 +0000 (01:57 +0300)
src/openpower/oppc/.gitignore [new file with mode: 0644]
src/openpower/oppc/__main__.py [new file with mode: 0644]
src/openpower/oppc/pc_ast.py [new file with mode: 0644]
src/openpower/oppc/pc_lexer.py [new file with mode: 0644]
src/openpower/oppc/pc_parser.py [new file with mode: 0644]
src/openpower/oppc/pc_util.py [new file with mode: 0644]

diff --git a/src/openpower/oppc/.gitignore b/src/openpower/oppc/.gitignore
new file mode 100644 (file)
index 0000000..b10f64e
--- /dev/null
@@ -0,0 +1,2 @@
+parser.out
+yacctab.py
diff --git a/src/openpower/oppc/__main__.py b/src/openpower/oppc/__main__.py
new file mode 100644 (file)
index 0000000..420b4d8
--- /dev/null
@@ -0,0 +1,68 @@
+import glob
+import io
+import re
+
+from openpower.decoder.power_enums import (
+    find_wiki_dir,
+)
+
+import openpower.oppc.pc_lexer as pc_lexer
+import openpower.oppc.pc_parser as pc_parser
+import openpower.oppc.pc_util as pc_util
+
+
+def dedent(line):
+    if line.startswith("    "):
+        return line[4:].rstrip()
+    return line.rstrip()
+
+
+def parse(parser, origin):
+    origin = tuple(origin)
+    tree = parser.parse(code="\n".join(origin))
+    stream = io.StringIO()
+    for (level, line) in pc_util.pseudocode(tree):
+        print(f"{' ' * 4 * level}{line}", file=stream)
+    stream.seek(0)
+    target = tuple(stream)
+    return (origin, target)
+
+
+lexer = pc_lexer.IndentLexer(debug=False)
+parser = pc_parser.Parser(lexer=lexer)
+pattern = re.compile(r"Pseudo-code:(.*?)(?:Special Registers Altered|Description):", re.DOTALL)
+for path in []: # glob.glob(f"{find_wiki_dir()}/../isa/*.mdwn"):
+    with open(path, "r", encoding="UTF-8") as stream:
+        data = stream.read()
+    for origin in pattern.findall(data):
+        try:
+            (stage0, stage1) = parse(parser, map(dedent, origin.split("\n")))
+            (stage2, stage3) = parse(parser, map(dedent, stage1))
+            stage1 = tuple(map(dedent, stage1))
+            stage3 = tuple(map(dedent, stage3))
+            assert stage1 == stage2 and stage2 == stage3
+        except AssertionError as exc:
+            print(stage0)
+            print(stage1)
+            print(stage3)
+            raise exc
+        except Exception as exc:
+            print(path)
+            print(origin)
+            raise exc
+
+code = """
+src <- [0]*64
+src[64-XLEN:63] <- (RS)
+result <- [0]*64
+do i = 0 to 1
+    n <- i * 32
+    result[n+0:n+7] <- 0
+    result[n+8:n+19] <- DPD_TO_BCD(src[n+12:n+21])
+    result[n+20:n+31] <- DPD_TO_BCD(src[n+22:n+31])
+RA <- result[64-XLEN:63]
+"""
+tree = parser.parse(code=code)
+print(tree)
+for (level, line) in pc_util.pseudocode(tree):
+    print(f"{' ' * 4 * level}{line}")
diff --git a/src/openpower/oppc/pc_ast.py b/src/openpower/oppc/pc_ast.py
new file mode 100644 (file)
index 0000000..0a51e62
--- /dev/null
@@ -0,0 +1,550 @@
+import copy
+import dataclasses
+
+
+class NodeMeta(type):
+    pass
+
+
+class Node(metaclass=NodeMeta):
+    def __repr__(self):
+        return f"{hex(id(self))}@{self.__class__.__name__}()"
+
+    def __eq__(self, other):
+        if not isinstance(other, self.__class__):
+            return NotImplemented
+        return (hex(id(self)) == id(other))
+
+    def clone(self):
+        return copy.deepcopy(self)
+
+
+class TokenMeta(NodeMeta):
+    pass
+
+
+class Token(Node, str, metaclass=TokenMeta):
+    def __new__(cls, value):
+        if isinstance(value, cls):
+            value = str(value)
+        if not isinstance(value, str):
+            raise ValueError(value)
+
+        return super().__new__(cls, value)
+
+    def __str__(self):
+        return super(Node, self).__str__()
+
+    def __hash__(self):
+        return super(Node, self).__hash__()
+
+    def __repr__(self):
+        return f"{hex(id(self))}@{self.__class__.__name__}({str(self)})"
+
+
+class SequenceMeta(NodeMeta):
+    __typeid__ = Node
+
+    def __new__(metacls, clsname, bases, ns, *, typeid=Node):
+        ns.setdefault("__typeid__", typeid)
+
+        return super().__new__(metacls, clsname, bases, ns)
+
+
+class Sequence(Node, tuple, metaclass=SequenceMeta):
+    def __new__(cls, iterable=tuple()):
+        def validate(item):
+            if not isinstance(item, cls.__typeid__):
+                raise ValueError(cls, item)
+            return item
+
+        return super().__new__(cls, map(validate, iterable))
+
+    def __hash__(self):
+        return super(Node, self).__hash__()
+
+    def __repr__(self):
+        return f"{hex(id(self))}@{self.__class__.__name__}({repr(list(self))})"
+
+
+class Arguments(Sequence):
+    pass
+
+
+class Scope(Sequence):
+    pass
+
+
+class Module(Sequence):
+    pass
+
+
+class DataclassMeta(NodeMeta):
+    def __new__(metacls, clsname, bases, ns):
+        cls = super().__new__(metacls, clsname, bases, ns)
+        wrap = dataclasses.dataclass(init=True, eq=False, unsafe_hash=True, frozen=False)
+        datacls = wrap(cls)
+        origin = datacls.__repr__
+        datacls.__repr__ = lambda self: f"{hex(id(self))}@{origin(self)}"
+
+        return datacls
+
+
+class Dataclass(Node, metaclass=DataclassMeta):
+    def __post_init__(self):
+        for field in dataclasses.fields(self):
+            key = field.name
+            value = getattr(self, key)
+            if not isinstance(value, field.type):
+                raise ValueError(f"{self.__class__.__name__}.{key}: {value!r}")
+
+    def clone(self, **kwargs):
+        return copy.deepcopy(dataclasses.replace(self, **kwargs))
+
+
+class LiteralMeta(TokenMeta):
+    def __new__(metacls, clsname, bases, ns, *, choices=()):
+        ns.setdefault("__choices__", choices)
+
+        return super().__new__(metacls, clsname, bases, ns)
+
+    def __iter__(cls):
+        yield from cls.__choices__
+
+
+class Literal(Token, metaclass=LiteralMeta):
+    __choices__ = ()
+
+    def __new__(cls, value):
+        choices = cls.__choices__
+        if isinstance(value, Token):
+            value = str(value)
+        if choices and value not in choices:
+            raise ValueError(value)
+
+        return super().__new__(cls, value)
+
+
+class GPR(Literal, choices=("RA", "RA0", "RB", "RB0", "RC", "RC0", "RS", "RSp", "RT", "RTp")):
+    pass
+
+
+class FPR(Literal, choices=("FPR", "FRA", "FRAp", "FRB", "FRBp", "FRC", "FRS", "FRSp", "FRT", "FRTp")):
+    pass
+
+
+class CR3(Literal, choices=("BF", "BFA")):
+    pass
+
+
+class CR5(Literal, choices=("BA", "BB", "BC", "BI", "BT")):
+    pass
+
+
+class XER(Literal, choices=("OV", "OV32", "CA", "CA32", "SO")):
+    pass
+
+
+class IntLiteral(Literal):
+    pass
+
+
+class BinLiteral(IntLiteral):
+    r"""0b[01_]+"""
+    pass
+
+
+class HexLiteral(IntLiteral):
+    r"""0x[0-9A-Fa-f_]+"""
+    pass
+
+
+class DecLiteral(IntLiteral):
+    r"""(\d+(\.\d*)?|\.\d+)([eE][-+]? \d+)?"""
+    pass
+
+
+class Symbol(Token):
+    r"""[A-Za-z_]+[A-Za-z0-9_]*"""
+    pass
+
+
+class Attribute(Dataclass):
+    name: Symbol
+    subject: Node = Node()
+
+
+class StringLiteral(Literal):
+    __STRING_ESCAPE = r"""(\\[0-9a-zA-Z._~!=&\^\-\\?'"])"""
+    __STRING_CHAR = (r"""([^"\\\n]|""" + __STRING_ESCAPE + ")")
+    __STRING = ("[\"']" + __STRING_CHAR + "*" + "[\"']")
+
+    __doc__ = __STRING
+
+    def __repr__(self):
+        return f"{hex(id(self))}@{self.__class__.__name__}({self.__str__()!r})"
+
+
+class Whitespace(Token):
+    r"""[^\S\r\n]+"""
+
+
+class Linebreak(Token):
+    r"""[\r\n]+"""
+
+
+class Comment(Token):
+    r"""[ ]*(?:\043|<!--)[^\n]*"""
+    pass
+
+
+class Indent(Node):
+    pass
+
+
+class Dedent(Node):
+    pass
+
+
+class LBracket(Token):
+    r"""\["""
+    pass
+
+
+class RBracket(Token):
+    r"""\]"""
+    pass
+
+
+class LParenthesis(Token):
+    r"""\("""
+    pass
+
+
+class RParenthesis(Token):
+    r"""\)"""
+    pass
+
+
+class Call(Dataclass):
+    name: Symbol
+    args: Arguments
+
+
+class AssignOp(Token):
+    r"""<-"""
+    pass
+
+
+class AssignIEAOp(Token):
+    r"""<-iea"""
+    pass
+
+
+class Assign(Dataclass):
+    lvalue: Node
+    rvalue: Node
+
+
+class AssignIEA(Assign):
+    lvalue: Node
+    rvalue: Node
+
+
+class Not(Token):
+    r"""(?:¬|~)"""
+    pass
+
+
+class Add(Token):
+    r"""\+"""
+    pass
+
+
+class Sub(Token):
+    r"""\-"""
+    pass
+
+
+class Plus(Node):
+    pass
+
+
+class Minus(Node):
+    pass
+
+
+class Mul(Token):
+    r"""(?:×|\*)"""
+    pass
+
+
+class MulS(Token):
+    r"""(?:×si|×s|\*si|\*s)"""
+    pass
+
+
+class MulU(Token):
+    r"""(?:×ui|×u|\*ui|\*u)"""
+    pass
+
+
+class Div(Token):
+    r"""/"""
+    pass
+
+
+class DivT(Token):
+    r"""÷"""
+    pass
+
+
+class Mod(Token):
+    r"""%"""
+    pass
+
+
+class Sqrt(Token):
+    r"""√"""
+    pass
+
+
+class Eq(Token):
+    r"""="""
+    pass
+
+
+class NotEq(Token):
+    r"""(?:≠|!=)"""
+    pass
+
+
+class Lt(Token):
+    r"""<"""
+    pass
+
+
+class Le(Token):
+    r"""<="""
+    pass
+
+
+class Ge(Token):
+    r""">="""
+    pass
+
+
+class Gt(Token):
+    r""">"""
+    pass
+
+
+class LtU(Token):
+    r"""<u"""
+    pass
+
+
+class GtU(Token):
+    r""">u"""
+    pass
+
+
+class BitAnd(Token):
+    r"""&"""
+    pass
+
+
+class BitOr(Token):
+    r"""\|"""
+    pass
+
+
+class BitXor(Token):
+    r"""(?:⊕|≡|\^)"""
+    pass
+
+
+class UnaryExpr(Dataclass):
+    op: Node
+    value: Node
+
+
+class BinaryExpr(Dataclass):
+    left: Node
+    op: Node
+    right: Node
+
+
+class Keyword(Token):
+    pass
+
+
+class FunctionKeyword(Token):
+    f"""def"""
+    pass
+
+
+class IfKeyword(Keyword):
+    r"""if"""
+    pass
+
+
+class ThenKeyword(Keyword):
+    r"""then"""
+    pass
+
+
+class ElseKeyword(Keyword):
+    r"""else"""
+    pass
+
+
+class LeaveKeyword(Keyword):
+    r"""leave"""
+    pass
+
+
+class ForKeyword(Keyword):
+    r"""for"""
+    pass
+
+
+class ToKeyword(Keyword):
+    r"""to"""
+    pass
+
+
+class WhileKeyword(Keyword):
+    r"""while"""
+    pass
+
+
+class DoKeyword(Keyword):
+    r"""while"""
+    pass
+
+
+class ReturnKeyword(Keyword):
+    r"""return"""
+    pass
+
+
+class SwitchKeyword(Keyword):
+    r"""switch"""
+    pass
+
+
+class CaseKeyword(Keyword):
+    r"""case"""
+    pass
+
+
+class DefaultKeyword(Keyword):
+    r"""while"""
+    pass
+
+
+class Colon(Token):
+    r""":"""
+    pass
+
+
+class LShift(Token):
+    r"""<<"""
+    pass
+
+
+class RShift(Token):
+    r""">>"""
+    pass
+
+
+class Comma(Token):
+    r""","""
+    pass
+
+
+class Period(Token):
+    r"""\."""
+    pass
+
+
+class Semicolon(Token):
+    r""";"""
+    pass
+
+
+class BitConcat(Token):
+    r"""\|\|"""
+    pass
+
+
+class Question(Token):
+    r"""\?"""
+    pass
+
+
+class Endmarker(Node):
+    pass
+
+
+class IfExpr(Dataclass):
+    test: Node
+    body: Scope
+    orelse: Scope
+
+
+class ForExpr(Dataclass):
+    subject: Node
+    start: Node
+    end: Node
+    body: Scope
+
+
+class WhileExpr(Dataclass):
+    test: Node
+    body: Scope
+    orelse: Scope
+
+
+class RepeatExpr(Dataclass):
+    subject: Node
+    times: Node
+
+
+class Subscript(Dataclass):
+    index: Node
+    subject: Node = Node()
+
+
+class RangeSubscript(Dataclass):
+    start: Node
+    end: Node = Node()
+    subject: Node = Node()
+
+
+class Label(Literal):
+    pass
+
+
+class DefaultLabel(Label):
+    def __new__(cls):
+        return super().__new__(cls, "default")
+
+
+class Labels(Sequence, typeid=Label):
+    pass
+
+
+class Case(Dataclass):
+    labels: Labels
+    body: Scope
+
+
+class Cases(Sequence, typeid=Case):
+    pass
+
+
+class SwitchExpr(Dataclass):
+    subject: Node
+    cases: Cases
diff --git a/src/openpower/oppc/pc_lexer.py b/src/openpower/oppc/pc_lexer.py
new file mode 100644 (file)
index 0000000..bfa2c3b
--- /dev/null
@@ -0,0 +1,569 @@
+# Based on GardenSnake - a parser generator demonstration program
+# GardenSnake was released into the Public Domain by Andrew Dalke.
+
+# Portions of this work are derived from Python's Grammar definition
+# and may be covered under the Python copyright and license
+#
+#          Andrew Dalke / Dalke Scientific Software, LLC
+#             30 August 2006 / Cape Town, South Africa
+
+# Modifications for inclusion in PLY distribution
+
+from copy import copy
+
+from ply import lex
+
+import openpower.oppc.pc_ast as pc_ast
+
+
+def bind(cls):
+    def wrapper(self, t):
+        t.value = cls(t.value)
+        return t
+
+    wrapper.__name__ = cls.__name__
+    wrapper.__doc__ = cls.__doc__
+
+    return wrapper
+
+
+class SyntaxError2(Exception):
+    """ class used to raise a syntax error but get ply to stop eating errors
+    since it catches and discards SyntaxError after setting a flag.
+    """
+
+    def __init__(self, *args, cls=SyntaxError):
+        super().__init__(*args)
+        self.cls = cls
+
+    def __repr__(self):
+        return repr(self.cls(*self.args))
+
+    def __str__(self):
+        return str(self.cls(*self.args))
+
+    def raise_syntax_error(self):
+        raise self.cls(*self.args) from self
+
+
+def raise_syntax_error(msg, filename, lineno, lexpos, input_text,
+                       cls=SyntaxError):
+    line_start = input_text.rfind("\n", 0, lexpos) + 1
+    line_end = input_text.find("\n", line_start)
+    col = (lexpos - line_start) + 1
+    raise SyntaxError2(str(msg), (filename, lineno, col,
+                                  input_text[line_start:line_end]), cls=cls)
+
+# I implemented INDENT / DEDENT generation as a post-processing filter
+
+# The original lex token stream contains WS and NEWLINE characters.
+# WS will only occur before any other tokens on a line.
+
+# I have three filters.  One tags tokens by adding two attributes.
+# "must_indent" is True if the token must be indented from the
+# previous code.  The other is "at_line_start" which is True for WS
+# and the first non-WS/non-NEWLINE on a line.  It flags the check so
+# see if the new line has changed indication level.
+
+# Python's syntax has three INDENT states
+#  0) no colon hence no need to indent
+#  1) "if 1: go()" - simple statements have a COLON but no need for an indent
+#  2) "if 1:\n  go()" - complex statements have a COLON NEWLINE and must indent
+NO_INDENT = 0
+MAY_INDENT = 1
+MUST_INDENT = 2
+
+# turn into python-like colon syntax from pseudo-code syntax.
+# identify tokens which tell us whether a "hidden colon" is needed.
+# this in turn means that track_tokens_filter "works" without needing
+# complex grammar rules
+
+
+def python_colonify(lexer, tokens):
+    implied_colon_needed = False
+    for token in tokens:
+        if token.type == "THEN":
+            # turn then into colon
+            token.type = "COLON"
+            token.value = pc_ast.Colon(str(token.value))
+            yield token
+        elif token.type == "ELSE":
+            yield token
+            token = copy(token)
+            token.type = "COLON"
+            token.value = pc_ast.Colon(str(token.value))
+            yield token
+        elif token.type in ["DO", "WHILE", "FOR", "SWITCH"]:
+            implied_colon_needed = True
+            yield token
+        elif token.type == "NEWLINE":
+            if implied_colon_needed:
+                ctok = copy(token)
+                ctok.type = "COLON"
+                ctok.value = pc_ast.Colon(str(token.value))
+                yield ctok
+                implied_colon_needed = False
+            yield token
+        else:
+            yield token
+
+
+# only care about whitespace at the start of a line
+def track_tokens_filter(lexer, tokens):
+    oldignore = lexer.lexignore
+    lexer.at_line_start = at_line_start = True
+    indent = NO_INDENT
+    saw_colon = False
+    for token in tokens:
+        token.at_line_start = at_line_start
+
+        if token.type == "COLON":
+            at_line_start = False
+            indent = MAY_INDENT
+            token.must_indent = False
+
+        elif token.type == "NEWLINE":
+            at_line_start = True
+            if indent == MAY_INDENT:
+                indent = MUST_INDENT
+            token.must_indent = False
+
+        elif token.type == "WS":
+            assert token.at_line_start == True
+            at_line_start = True
+            token.must_indent = False
+
+        else:
+            # A real token; only indent after COLON NEWLINE
+            if indent == MUST_INDENT:
+                token.must_indent = True
+            else:
+                token.must_indent = False
+            at_line_start = False
+            indent = NO_INDENT
+
+        # really bad hack that changes ignore lexer state.
+        # when "must indent" is seen (basically "real tokens" seen)
+        # then ignore whitespace.
+        if token.must_indent:
+            lexer.lexignore = ("ignore", " ")
+        else:
+            lexer.lexignore = oldignore
+
+        token.indent = indent
+        yield token
+        lexer.at_line_start = at_line_start
+
+
+def _new_token(type, lineno):
+    cls = {
+        "ENDMARKER": pc_ast.Endmarker,
+        "INDENT": pc_ast.Indent,
+        "DEDENT": pc_ast.Dedent,
+    }[type]
+    tok = lex.LexToken()
+    tok.type = type
+    tok.value = cls()
+    tok.lineno = lineno
+    tok.lexpos = -1
+    return tok
+
+# Synthesize a DEDENT tag
+
+
+def DEDENT(lineno):
+    return _new_token("DEDENT", lineno)
+
+# Synthesize an INDENT tag
+
+
+def INDENT(lineno):
+    return _new_token("INDENT", lineno)
+
+
+def count_spaces(l):
+    for i in range(len(l)):
+        if l[i] != " ":
+            return i
+    return 0
+
+
+def annoying_case_hack_filter(code):
+    """add annoying "silent keyword" (fallthrough)
+
+    this which tricks the parser into taking the (silent) case statement
+    as a "small expression".  it can then be spotted and used to indicate
+    "fall through" to the next case (in the parser)
+
+    also skips blank lines
+
+    bugs: any function that starts with the letters "case" or "default"
+    will be detected erroneously.  fixing that involves doing a token
+    lexer which spots the fact that "case" and "default" are words,
+    separating them from space, colon, bracket etc.
+
+    http://bugs.libre-riscv.org/show_bug.cgi?id=280
+    """
+    res = []
+    prev_spc_count = None
+    for l in code.split("\n"):
+        spc_count = count_spaces(l)
+        nwhite = l[spc_count:]
+        if len(nwhite) == 0:  # skip blank lines
+            res.append("")
+            continue
+        if nwhite.startswith("case") or nwhite.startswith("default"):
+            if (prev_spc_count is not None and
+                prev_spc_count == spc_count and
+                    (res[-1].endswith(":") or res[-1].endswith(": fallthrough"))):
+                res[-1] += " fallthrough"  # add to previous line
+            prev_spc_count = spc_count
+        else:
+            prev_spc_count = None
+        res.append(l)
+    return "\n".join(res)
+
+
+# Track the indentation level and emit the right INDENT / DEDENT events.
+def indentation_filter(tokens, filename):
+    # A stack of indentation levels; will never pop item 0
+    levels = [0]
+    token = None
+    depth = 0
+    prev_was_ws = False
+    for token in tokens:
+        # WS only occurs at the start of the line
+        # There may be WS followed by NEWLINE so
+        # only track the depth here.  Don't indent/dedent
+        # until there's something real.
+        if token.type == "WS":
+            assert depth == 0
+            depth = len(token.value)
+            prev_was_ws = True
+            # WS tokens are never passed to the parser
+            continue
+
+        if token.type == "NEWLINE":
+            depth = 0
+            if prev_was_ws or token.at_line_start:
+                # ignore blank lines
+                continue
+            # pass the other cases on through
+            yield token
+            continue
+
+        # then it must be a real token (not WS, not NEWLINE)
+        # which can affect the indentation level
+
+        prev_was_ws = False
+        if token.must_indent:
+            # The current depth must be larger than the previous level
+            if not (depth > levels[-1]):
+                raise_syntax_error("expected an indented block",
+                                   filename, token.lexer.lineno,
+                                   token.lexer.lexpos, token.lexer.lexdata,
+                                   cls=IndentationError)
+
+            levels.append(depth)
+            yield INDENT(token.lineno)
+
+        elif token.at_line_start:
+            # Must be on the same level or one of the previous levels
+            if depth == levels[-1]:
+                # At the same level
+                pass
+            elif depth > levels[-1]:
+                raise_syntax_error("indent increase but not in new block",
+                                   filename, token.lexer.lineno,
+                                   token.lexer.lexpos, token.lexer.lexdata,
+                                   cls=IndentationError)
+            else:
+                # Back up; but only if it matches a previous level
+                try:
+                    i = levels.index(depth)
+                except ValueError:
+                    raise_syntax_error("inconsistent indentation",
+                                       filename, token.lexer.lineno,
+                                       token.lexer.lexpos, token.lexer.lexdata,
+                                       cls=IndentationError)
+                for _ in range(i+1, len(levels)):
+                    yield DEDENT(token.lineno)
+                    levels.pop()
+
+        yield token
+
+    ### Finished processing ###
+
+    # Must dedent any remaining levels
+    if len(levels) > 1:
+        assert token is not None
+        for _ in range(1, len(levels)):
+            yield DEDENT(token.lineno)
+
+
+# The top-level filter adds an ENDMARKER, if requested.
+# Python's grammar uses it.
+def filter(lexer, add_endmarker, filename):
+    token = None
+    tokens = iter(lexer.token, None)
+    tokens = python_colonify(lexer, tokens)
+    tokens = track_tokens_filter(lexer, tokens)
+    for token in indentation_filter(tokens, filename):
+        yield token
+
+    if add_endmarker:
+        lineno = 1
+        if token is not None:
+            lineno = token.lineno
+        yield _new_token("ENDMARKER", lineno)
+
+
+##### Lexer ######
+class Lexer:
+    tokens = (
+        "DEF",
+        "IF",
+        "THEN",
+        "ELSE",
+        "FOR",
+        "TO",
+        "DO",
+        "WHILE",
+        "BREAK",
+        "NAME",
+        "HEX",     # hex numbers
+        "NUMBER",  # Python decimals
+        "BINARY",  # Python binary
+        "STRING",  # single quoted strings only; syntax of raw strings
+        "LPAR",
+        "RPAR",
+        "LBRACK",
+        "RBRACK",
+        "COLON",
+        "EQ",
+        "ASSIGNEA",
+        "ASSIGN",
+        "LTU",
+        "GTU",
+        "NE",
+        "LE",
+        "LSHIFT",
+        "RSHIFT",
+        "GE",
+        "LT",
+        "GT",
+        "PLUS",
+        "MINUS",
+        "MULT",
+        "DIV",
+        "MOD",
+        "INVERT",
+        "APPEND",
+        "BITOR",
+        "BITAND",
+        "BITXOR",
+        "RETURN",
+        "SWITCH",
+        "CASE",
+        "DEFAULT",
+        "WS",
+        "NEWLINE",
+        "COMMA",
+        "QMARK",
+        "PERIOD",
+        "SEMICOLON",
+        "INDENT",
+        "DEDENT",
+        "ENDMARKER",
+    )
+
+    # Build the lexer
+    def build(self, **kwargs):
+        self.lexer = lex.lex(module=self, **kwargs)
+        self.filename = None
+
+    @lex.TOKEN(pc_ast.HexLiteral.__doc__)
+    def t_HEX(self, t):
+        t.value = pc_ast.HexLiteral(t.value)
+        return t
+
+    @lex.TOKEN(pc_ast.BinLiteral.__doc__)
+    def t_BINARY(self, t):
+        t.value = pc_ast.BinLiteral(t.value)
+        return t
+
+    @lex.TOKEN(pc_ast.DecLiteral.__doc__)
+    def t_NUMBER(self, t):
+        t.value = pc_ast.DecLiteral(t.value)
+        return t
+
+    @lex.TOKEN(pc_ast.StringLiteral.__doc__)
+    def t_STRING(self, t):
+        t.value = pc_ast.StringLiteral(t.value[1:-1])
+        return t
+
+    t_COLON = pc_ast.Colon.__doc__
+    t_EQ = pc_ast.Eq.__doc__
+    t_ASSIGNEA = pc_ast.AssignIEAOp.__doc__
+    t_ASSIGN = pc_ast.AssignOp.__doc__
+    t_LTU = pc_ast.LtU.__doc__
+    t_GTU = pc_ast.GtU.__doc__
+    t_NE = pc_ast.NotEq.__doc__
+    t_LE = pc_ast.Le.__doc__
+    t_GE = pc_ast.Ge.__doc__
+    t_LSHIFT = pc_ast.LShift.__doc__
+    t_RSHIFT = pc_ast.RShift.__doc__
+    t_LT = pc_ast.Lt.__doc__
+    t_GT = pc_ast.Gt.__doc__
+    t_PLUS = pc_ast.Add.__doc__
+    t_MINUS = pc_ast.Sub.__doc__
+    t_MULT = pc_ast.Mul.__doc__
+    t_DIV = pc_ast.Div.__doc__
+    t_MOD = pc_ast.Mod.__doc__
+    t_INVERT = pc_ast.Not.__doc__
+    t_COMMA = pc_ast.Comma.__doc__
+    t_PERIOD = pc_ast.Period.__doc__
+    t_SEMICOLON = pc_ast.Semicolon.__doc__
+    t_APPEND = pc_ast.BitConcat.__doc__
+    t_BITOR = pc_ast.BitOr.__doc__
+    t_BITAND = pc_ast.BitAnd.__doc__
+    t_BITXOR = pc_ast.BitXor.__doc__
+    t_QMARK = pc_ast.Question.__doc__
+
+    @lex.TOKEN(pc_ast.Symbol)
+    def t_NAME(self, t):
+        keywords = {
+            "def": ("DEF", pc_ast.FunctionKeyword),
+            "if": ("IF", pc_ast.IfKeyword),
+            "then": ("THEN", pc_ast.ThenKeyword),
+            "else": ("ELSE", pc_ast.ElseKeyword),
+            "leave": ("BREAK", pc_ast.LeaveKeyword),
+            "for": ("FOR", pc_ast.ForKeyword),
+            "to": ("TO", pc_ast.ToKeyword),
+            "while": ("WHILE", pc_ast.WhileKeyword),
+            "do": ("DO", pc_ast.DoKeyword),
+            "return": ("RETURN", pc_ast.ReturnKeyword),
+            "switch": ("SWITCH", pc_ast.SwitchKeyword),
+            "case": ("CASE", pc_ast.CaseKeyword),
+            "default": ("DEFAULT", pc_ast.DefaultKeyword),
+        }
+        (tt, tcls) = keywords.get(t.value, ("NAME", pc_ast.Symbol))
+        t.type = tt
+        t.value = tcls(t.value)
+        return t
+
+    # Putting this before t_WS let it consume lines with only comments in
+    # them so the latter code never sees the WS part.  Not consuming the
+    # newline.  Needed for "if 1: #comment"
+    @lex.TOKEN(pc_ast.Comment.__doc__)
+    def t_comment(self, t):
+        return None
+
+    # Whitespace
+    @lex.TOKEN(pc_ast.Whitespace.__doc__)
+    def t_WS(self, t):
+        if (t.lexer.at_line_start and
+                t.lexer.paren_count == 0 and \
+                t.lexer.brack_count == 0):
+            return t
+
+    # Don't generate newline tokens when inside of parenthesis, eg
+    #   a = (1,
+    #        2, 3)
+    @lex.TOKEN(pc_ast.Linebreak.__doc__)
+    def t_newline(self, t):
+        t.lexer.lineno += len(t.value)
+        t.value = pc_ast.Linebreak(t.value)
+        t.type = "NEWLINE"
+        if t.lexer.paren_count == 0 and t.lexer.brack_count == 0:
+            return t
+
+    @lex.TOKEN(pc_ast.LBracket.__doc__)
+    def t_LBRACK(self, t):
+        t.lexer.brack_count += 1
+        t.value = pc_ast.LBracket(t.value)
+        return t
+
+    @lex.TOKEN(pc_ast.RBracket.__doc__)
+    def t_RBRACK(self, t):
+        t.lexer.brack_count -= 1
+        t.value = pc_ast.RBracket(t.value)
+        return t
+
+    @lex.TOKEN(pc_ast.LParenthesis.__doc__)
+    def t_LPAR(self, t):
+        t.lexer.paren_count += 1
+        t.value = pc_ast.LParenthesis(t.value)
+        return t
+
+    @lex.TOKEN(pc_ast.RParenthesis.__doc__)
+    def t_RPAR(self, t):
+        t.lexer.paren_count -= 1
+        t.value = pc_ast.RParenthesis(t.value)
+        return t
+
+    def t_error(self, t):
+        raise_syntax_error("Unknown symbol %r" % (t.value[0],),
+                           self.filename, t.lexer.lineno,
+                           t.lexer.lexpos, t.lexer.lexdata)
+        t.lexer.skip(1)
+
+
+# Combine Ply and my filters into a new lexer
+
+class IndentLexer(Lexer):
+    def __init__(self, debug=False, optimize=False, lextab="lextab"):
+        self.debug = debug
+        self.build(debug=debug, optimize=optimize, lextab=lextab)
+        self.token_stream = None
+
+    def input(self, s, add_endmarker=True):
+        s = annoying_case_hack_filter(s)
+        s += "\n"
+        self.lexer.paren_count = 0
+        self.lexer.brack_count = 0
+        self.lexer.lineno = 1
+        self.lexer.input(s)
+        self.token_stream = filter(self.lexer, add_endmarker, self.filename)
+
+    def token(self):
+        # The simplest way to convert "simple" tokens to classes.
+        # Functions won't work due to ply reliability on __code__.
+        # We end up with (LT+MINUS) instead of ASSIGN otherwise.
+        mapping = {
+            "COLON": pc_ast.Colon,
+            "EQ": pc_ast.Eq,
+            "ASSIGNEA": pc_ast.AssignIEAOp,
+            "ASSIGN": pc_ast.AssignOp,
+            "LTU": pc_ast.LtU,
+            "GTU": pc_ast.GtU,
+            "NE": pc_ast.NotEq,
+            "LE": pc_ast.Le,
+            "GE": pc_ast.Ge,
+            "LSHIFT": pc_ast.LShift,
+            "RSHIFT": pc_ast.RShift,
+            "LT": pc_ast.Lt,
+            "GT": pc_ast.Gt,
+            "PLUS": pc_ast.Add,
+            "MINUS": pc_ast.Sub,
+            "MULT": pc_ast.Mul,
+            "DIV": pc_ast.Div,
+            "MOD": pc_ast.Mod,
+            "INVERT": pc_ast.Not,
+            "COMMA": pc_ast.Comma,
+            "PERIOD": pc_ast.Period,
+            "SEMICOLON": pc_ast.Semicolon,
+            "APPEND": pc_ast.BitConcat,
+            "BITOR": pc_ast.BitOr,
+            "BITAND": pc_ast.BitAnd,
+            "BITXOR": pc_ast.BitXor,
+            "QMARK": pc_ast.Question,
+        }
+        try:
+            t = next(self.token_stream)
+            if t is not None:
+                if t.type in mapping:
+                    t.value = mapping[t.type](t.value)
+            return t
+        except StopIteration:
+            return None
diff --git a/src/openpower/oppc/pc_parser.py b/src/openpower/oppc/pc_parser.py
new file mode 100644 (file)
index 0000000..aa8cd1c
--- /dev/null
@@ -0,0 +1,615 @@
+import itertools
+
+from ply import yacc
+
+import openpower.oppc.pc_ast as pc_ast
+
+
+class SyntaxError2(Exception):
+    """
+    Class used to raise a syntax error but get ply to stop eating errors
+    since it catches and discards SyntaxError after setting a flag.
+    """
+
+    def __init__(self, *args, cls=SyntaxError):
+        super().__init__(*args)
+        self.cls = cls
+
+    def __repr__(self):
+        return repr(self.cls(*self.args))
+
+    def __str__(self):
+        return str(self.cls(*self.args))
+
+    def raise_syntax_error(self):
+        raise self.cls(*self.args) from self
+
+
+def raise_syntax_error(msg,
+        filename, lineno, lexpos, data,
+        cls=SyntaxError):
+    line_start = data.rfind('\n', 0, lexpos) + 1
+    line_end = data.find('\n', line_start)
+    col = (lexpos - line_start) + 1
+
+    raise SyntaxError2(str(msg),
+        (filename, lineno, col, data[line_start:line_end]),
+        cls=cls)
+
+
+binary_ops = {
+    "^": pc_ast.BitXor,
+    "&": pc_ast.BitAnd,
+    "|": pc_ast.BitOr,
+    "+": pc_ast.Add,
+    "-": pc_ast.Sub,
+    "<<": pc_ast.LShift,
+    ">>": pc_ast.RShift,
+    "*": pc_ast.Mul,
+    "/": pc_ast.Div,
+    "%": pc_ast.Mod,
+    "<=": pc_ast.Le,
+    ">=": pc_ast.Ge,
+    "<": pc_ast.Lt,
+    ">": pc_ast.Gt,
+    "=": pc_ast.Eq,
+    "!=": pc_ast.NotEq,
+}
+unary_ops = {
+    "+": pc_ast.Plus,
+    "-": pc_ast.Minus,
+    "¬": pc_ast.Not,
+}
+
+
+class Parser:
+    REGS = {}
+    REGS.update(map(lambda reg: (reg, pc_ast.GPR), pc_ast.GPR))
+    REGS.update(map(lambda reg: (reg, pc_ast.FPR), pc_ast.FPR))
+    REGS.update(map(lambda reg: (reg, pc_ast.CR3), pc_ast.CR3))
+    REGS.update(map(lambda reg: (reg, pc_ast.CR5), pc_ast.CR5))
+    REGS.update(map(lambda reg: (reg, pc_ast.XER), pc_ast.XER))
+
+    def __init__(self, lexer, debug=False, optimize=False, write_tables=True):
+        ignore = lambda token: token in ("WS", "THEN")
+        self.tokens = tuple(itertools.filterfalse(ignore, lexer.tokens))
+
+        self.__lexer = lexer
+        self.__parser = yacc.yacc(
+            module=self,
+            start="file_input_end",
+            debug=debug,
+            optimize=optimize,
+            write_tables=write_tables,
+            tabmodule="yacctab")
+
+        return super().__init__()
+
+    precedence = (
+        ("left", "EQ", "NE", "GT", "LT", "LE", "GE", "LTU", "GTU"),
+        ("left", "BITOR"),
+        ("left", "BITXOR"),
+        ("left", "BITAND"),
+        ("left", "LSHIFT", "RSHIFT"),
+        ("left", "PLUS", "MINUS"),
+        ("left", "MULT", "DIV", "MOD"),
+        ("left", "INVERT"),
+    )
+
+    def p_file_input_end(self, p):
+        """
+        file_input_end  : file_input ENDMARKER
+        """
+        p[0] = p[1]
+
+    def p_file_input(self, p):
+        """
+        file_input  : file_input NEWLINE
+                    | file_input stmt
+                    | NEWLINE
+                    | stmt
+        """
+        if isinstance(p[len(p)-1], pc_ast.Linebreak):
+            if len(p) == 3:
+                p[0] = p[1]
+            else:
+                p[0] = pc_ast.Scope()
+        else:
+            if len(p) == 3:
+                stmt = p[2]
+                if not isinstance(stmt, pc_ast.Scope):
+                    stmt = pc_ast.Scope([stmt])
+                p[0] = pc_ast.Scope(p[1] + stmt)
+            else:
+                p[0] = p[1]
+
+    # funcdef: [decorators] 'def' NAME parameters ':' suite
+    # ignoring decorators
+    def p_funcdef(self, p):
+        """
+        funcdef : DEF NAME parameters COLON suite
+        """
+        raise NotImplementedError()
+
+    # parameters: '(' [varargslist] ')'
+    def p_parameters(self, p):
+        """
+        parameters : LPAR RPAR
+                   | LPAR varargslist RPAR
+        """
+        raise NotImplementedError()
+
+    # varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' '**' NAME] |
+    # '**' NAME) |
+    # highly simplified
+
+    def p_varargslist(self, p):
+        """
+        varargslist : varargslist COMMA NAME
+                    | NAME
+        """
+        raise NotImplementedError()
+
+    # stmt: simple_stmt | compound_stmt
+    def p_stmt_simple(self, p):
+        """
+        stmt    : simple_stmt
+        """
+        # simple_stmt is a list
+        p[0] = p[1]
+
+    def p_stmt_compound(self, p):
+        """
+        stmt    : compound_stmt
+        """
+        p[0] = pc_ast.Scope([p[1]])
+
+    # simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
+    def p_simple_stmt(self, p):
+        """
+        simple_stmt : small_stmts NEWLINE
+                    | small_stmts SEMICOLON NEWLINE
+        """
+        p[0] = p[1]
+
+    def p_small_stmts(self, p):
+        """
+        small_stmts : small_stmts SEMICOLON small_stmt
+                    | small_stmt
+        """
+        if len(p) == 4:
+            p[0] = pc_ast.Scope(p[1] + (p[3],))
+        else:
+            p[0] = pc_ast.Scope([p[1]])
+
+    # small_stmt: expr_stmt | print_stmt  | del_stmt | pass_stmt | flow_stmt |
+    #    import_stmt | global_stmt | exec_stmt | assert_stmt
+    def p_small_stmt(self, p):
+        """
+        small_stmt  : flow_stmt
+                    | break_stmt
+                    | expr_stmt
+        """
+        p[0] = p[1]
+
+    # expr_stmt: testlist (augassign (yield_expr|testlist) |
+    #                      ('=' (yield_expr|testlist))*)
+    # augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
+    #             '<<=' | '>>=' | '**=' | '//=')
+    def p_expr_stmt(self, p):
+        """
+        expr_stmt   : testlist ASSIGNEA testlist
+                    | testlist ASSIGN testlist
+                    | testlist
+        """
+        if len(p) == 2:
+            p[0] = p[1]
+        else:
+            (lvalue, rvalue) = (p[1], p[3])
+            if isinstance(p[2], pc_ast.AssignOp):
+                cls = pc_ast.Assign
+            else:
+                cls = pc_ast.AssignIEA
+            if (isinstance(lvalue, pc_ast.Symbol) and
+                    (str(lvalue) in self.__class__.REGS)):
+                lvalue = self.__class__.REGS[str(lvalue)](lvalue)
+            p[0] = cls(lvalue=lvalue, rvalue=rvalue)
+
+    def p_flow_stmt(self, p):
+        "flow_stmt : return_stmt"
+        p[0] = p[1]
+
+    # return_stmt: 'return' [testlist]
+    def p_return_stmt(self, p):
+        "return_stmt : RETURN testlist"
+        p[0] = pc_ast.Return(p[2])
+
+    def p_compound_stmt(self, p):
+        """
+        compound_stmt   : if_stmt
+                        | while_stmt
+                        | switch_stmt
+                        | for_stmt
+                        | funcdef
+        """
+        p[0] = p[1]
+
+    def p_break_stmt(self, p):
+        """
+        break_stmt  : BREAK
+        """
+        p[0] = p[1]
+
+    def p_for_stmt(self, p):
+        """
+        for_stmt    : FOR atom EQ comparison TO comparison COLON suite
+                    | DO atom EQ comparison TO comparison COLON suite
+        """
+        p[0] = pc_ast.ForExpr(subject=p[2], start=p[4], end=p[6], body=p[8])
+
+    def p_while_stmt(self, p):
+        """
+        while_stmt  : DO WHILE test COLON suite ELSE COLON suite
+                    | DO WHILE test COLON suite
+        """
+        if len(p) == 9:
+            p[0] = pc_ast.WhileExpr(test=p[3], body=p[5], orelse=p[8])
+        else:
+            p[0] = pc_ast.WhileExpr(test=p[3], body=p[5], orelse=pc_ast.Scope())
+
+    def p_switch_smt(self, p):
+        """
+        switch_stmt : SWITCH LPAR atom RPAR COLON NEWLINE INDENT cases DEDENT
+        """
+        p[0] = pc_ast.SwitchExpr(subject=p[3], cases=p[8])
+
+    def p_cases(self, p):
+        """
+        cases   : switch_list switch_default
+                | switch_default
+        """
+        if len(p) == 3:
+            p[0] = pc_ast.Cases(p[1] + (p[2],))
+        else:
+            p[0] = pc_ast.Cases([p[1]])
+
+    def p_switch_list(self, p):
+        """
+        switch_list : switch_case switch_list
+                    | switch_case
+        """
+        if len(p) == 3:
+            p[0] = pc_ast.Sequence((p[1],) + p[2])
+        else:
+            p[0] = pc_ast.Sequence([p[1]])
+
+    def p_switch_case(self, p):
+        """
+        switch_case : CASE LPAR labels RPAR COLON suite
+        """
+        p[0] = pc_ast.Case(labels=p[3], body=p[6])
+
+    def p_switch_default(self, p):
+        """
+        switch_default  : DEFAULT COLON suite
+        """
+        p[0] = pc_ast.Case(body=p[3],
+            labels=pc_ast.Labels([pc_ast.DefaultLabel()]))
+
+    def p_labels(self, p):
+        """
+        labels  : atom COMMA labels
+                | atom
+        """
+        if not isinstance(p[1], pc_ast.IntLiteral):
+            raise_syntax_error(str(p),
+                self.filename, p.lineno, p.lexpos,
+                self.input_text)
+        label = pc_ast.Label(str(p[1]))
+        if len(p) == 4:
+            p[0] = pc_ast.Labels((label,) + p[3])
+        else:
+            p[0] = pc_ast.Labels([label])
+
+    def p_if_stmt(self, p):
+        """
+        if_stmt : IF test COLON suite ELSE COLON if_stmt
+                | IF test COLON suite ELSE COLON suite
+                | IF test COLON suite
+        """
+        (test, body) = (p[2], p[4])
+        if len(p) == 8:
+            orelse = p[7]
+        else:
+            orelse = pc_ast.Scope()
+        if not isinstance(body, pc_ast.Scope):
+            body = pc_ast.Scope([body])
+        if not isinstance(orelse, pc_ast.Scope):
+            orelse = pc_ast.Scope([orelse])
+        p[0] = pc_ast.IfExpr(test=test,
+            body=body, orelse=orelse)
+
+    def p_suite(self, p):
+        """
+        suite   : simple_stmt
+                | NEWLINE INDENT stmts DEDENT
+        """
+        if len(p) == 2:
+            p[0] = p[1]
+        else:
+            p[0] = p[3]
+
+    def p_stmts(self, p):
+        """
+        stmts   : stmts stmt
+                | stmt
+        """
+        if len(p) == 3:
+            p[0] = pc_ast.Scope(p[1] + p[2])
+        else:
+            p[0] = p[1]
+
+    def p_comparison(self, p):
+        """
+        comparison  : comparison PLUS comparison
+                    | comparison MINUS comparison
+                    | comparison MULT comparison
+                    | comparison LSHIFT comparison
+                    | comparison RSHIFT comparison
+                    | comparison DIV comparison
+                    | comparison MOD comparison
+                    | comparison EQ comparison
+                    | comparison NE comparison
+                    | comparison LE comparison
+                    | comparison GE comparison
+                    | comparison LTU comparison
+                    | comparison GTU comparison
+                    | comparison LT comparison
+                    | comparison GT comparison
+                    | comparison BITOR comparison
+                    | comparison BITXOR comparison
+                    | comparison BITAND comparison
+                    | PLUS comparison
+                    | MINUS comparison
+                    | INVERT comparison
+                    | comparison APPEND comparison
+                    | power
+        """
+        if len(p) == 4:
+            def reg0(left, op, right):
+                if (isinstance(left, (pc_ast.GPR, pc_ast.FPR)) and
+                        isinstance(op, pc_ast.BitOr) and
+                        (isinstance(right, pc_ast.DecLiteral) and (str(right) == "0"))):
+                    if isinstance(left, pc_ast.GPR):
+                        return pc_ast.GPR(f"{str(left)}0")
+                    else:
+                        return pc_ast.FPR(f"{str(left)}0")
+                return None
+
+            def repeat(left, op, right):
+                if (isinstance(left, pc_ast.Sequence) and
+                        (len(left) == 1) and
+                        isinstance(op, pc_ast.Mul)):
+                    return pc_ast.RepeatExpr(subject=left[0], times=right)
+                return None
+
+            (left, op, right) = p[1:]
+            for hook in (reg0, repeat):
+                p[0] = hook(left, op, right)
+                if p[0] is not None:
+                    break
+            else:
+                p[0] = pc_ast.BinaryExpr(left=left, op=op, right=right)
+
+        elif len(p) == 3:
+            (op, value) = p[1:]
+            p[0] = pc_ast.UnaryExpr(op=op, value=value)
+        else:
+            p[0] = p[1]
+
+    # power: atom trailer* ['**' factor]
+    # trailers enables function calls (and subscripts).
+    # so this is 'trailerlist'
+    def p_power(self, p):
+        """
+        power   : atom
+                | atom trailerlist
+        """
+        if len(p) == 2:
+            p[0] = p[1]
+        else:
+            attribute_or_subscript = (
+                pc_ast.Attribute,
+                pc_ast.Subscript,
+                pc_ast.RangeSubscript,
+            )
+            if isinstance(p[2], attribute_or_subscript):
+                node = p[2]
+                while isinstance(node.subject, attribute_or_subscript):
+                    node = node.subject
+                if isinstance(node.subject, pc_ast.Arguments):
+                    node.subject = pc_ast.Call(name=p[1], args=node.subject)
+                else:
+                    node.subject = p[1]
+                p[0] = p[2]
+            elif isinstance(p[2], pc_ast.Arguments):
+                p[0] = pc_ast.Call(name=p[1], args=p[2])
+            else:
+                raise NotImplementedError()
+
+    def p_atom_name(self, p):
+        """
+        atom : NAME
+        """
+        p[0] = p[1]
+
+    def p_atom_number(self, p):
+        """
+        atom    : BINARY
+                | NUMBER
+                | HEX
+                | STRING
+        """
+        p[0] = p[1]
+
+    # '[' [listmaker] ']' |
+    def p_atom_listmaker(self, p):
+        """
+        atom : LBRACK listmaker RBRACK
+        """
+        p[0] = p[2]
+
+    def p_listmaker(self, p):
+        """
+        listmaker   : test COMMA listmaker
+                    | test
+        """
+        if len(p) == 2:
+            p[0] = pc_ast.Sequence([p[1]])
+        else:
+            p[0] = pc_ast.Sequence((p[0],) + p[1])
+
+    def p_atom_tuple(self, p):
+        """
+        atom    : LPAR testlist RPAR
+        """
+        value = p[2]
+        if (isinstance(value, pc_ast.Symbol) and
+                    (str(value) in self.__class__.REGS)):
+            value = self.__class__.REGS[str(value)](value)
+        p[0] = value
+
+    def p_trailerlist(self, p):
+        """
+        trailerlist : trailer trailerlist
+                    | trailer
+        """
+        if len(p) == 2:
+            p[0] = p[1]
+        else:
+            attribute_or_subscript = (
+                pc_ast.Attribute,
+                pc_ast.Subscript,
+                pc_ast.RangeSubscript,
+            )
+            if isinstance(p[2], attribute_or_subscript):
+                node = p[2]
+                while isinstance(node.subject, attribute_or_subscript):
+                    node = node.subject
+                node.subject = p[1]
+                p[0] = p[2]
+            else:
+                p[0] = pc_ast.Sequence(p[1] + (p[2],))
+
+    # trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
+    def p_trailer(self, p):
+        """
+        trailer : trailer_arglist
+                | trailer_subscript
+                | trailer_attr
+        """
+        p[0] = p[1]
+
+    def p_trailer_arglist(self, p):
+        """
+        trailer_arglist : LPAR arglist RPAR
+                        | LPAR RPAR
+        """
+        if len(p) == 3:
+            p[0] = pc_ast.Arguments()
+        else:
+            p[0] = p[2]
+
+    def p_trailer_subscript(self, p):
+        """
+        trailer_subscript : LBRACK subscript RBRACK
+        """
+        p[0] = p[2]
+
+    def p_trailer_attr(self, p):
+        """
+        trailer_attr    : PERIOD NAME
+        """
+        p[0] = pc_ast.Attribute(name=p[2])
+
+    # subscript: '.' '.' '.' | test | [test] ':' [test]
+    def p_subscript(self, p):
+        """subscript : test COLON test
+                     | test
+        """
+        if len(p) == 4:
+            p[0] = pc_ast.RangeSubscript(start=p[1], end=p[3])
+        else:
+            p[0] = pc_ast.Subscript(index=p[1])
+
+    # testlist: test (',' test)* [',']
+    # Contains shift/reduce error
+    def p_testlist(self, p):
+        """
+        testlist    : testlist_multi COMMA
+                    | testlist_multi
+        """
+        if len(p) == 2:
+            p[0] = p[1]
+        else:
+            if isinstance(p[1], pc_ast.Sequence):
+                p[0] = p[1]
+            else:
+                p[0] = pc_ast.Sequence([p[1]])
+
+    def p_testlist_multi(self, p):
+        """
+        testlist_multi  : testlist_multi COMMA test
+                        | test
+        """
+        if len(p) == 2:
+            p[0] = p[1]
+        else:
+            if isinstance(p[1], pc_ast.Sequence):
+                p[0] = pc_ast.Sequence(p[1] + (p[3],))
+            else:
+                p[0] = pc_ast.Sequence([p[1], p[3]])
+
+    # test: or_test ['if' or_test 'else' test] | lambdef
+    #  as I don't support 'and', 'or', and 'not' this works down to 'comparison'
+    def p_test(self, p):
+        """
+        test    : comparison
+                | comparison QMARK test COLON test
+        """
+        if len(p) == 2:
+            p[0] = p[1]
+        else:
+            p[0] = pc_ast.IfExpr(test=p[1],
+                body=pc_ast.Scope([p[3]]),
+                orelse=pc_ast.Scope([p[5]]))
+
+    # arglist: (argument ',')* (argument [',']| '*' test [',' '**' test]
+    # | '**' test)
+    # XXX INCOMPLETE: this doesn't allow the trailing comma
+    def p_arglist(self, p):
+        """
+        arglist : arglist COMMA argument
+                | argument
+        """
+        if len(p) == 4:
+            p[0] = pc_ast.Arguments(p[1] + (p[3],))
+        else:
+            p[0] = pc_ast.Arguments([p[1]])
+
+    # argument: test [gen_for] | test '=' test  # Really [keyword '='] test
+    def p_argument(self, p):
+        """
+        argument    : test
+        """
+        p[0] = p[1]
+
+    def p_error(self, p):
+        raise_syntax_error(str(p.value),
+            self.filename, p.lineno, p.lexpos,
+            self.input_text)
+
+    def parse(self, code, filename=None, debug=False):
+        self.filename = filename
+        self.input_text = code
+        return self.__parser.parse(lexer=self.__lexer, debug=debug, input=code)
diff --git a/src/openpower/oppc/pc_util.py b/src/openpower/oppc/pc_util.py
new file mode 100644 (file)
index 0000000..7ab3892
--- /dev/null
@@ -0,0 +1,406 @@
+import collections
+import contextlib
+import functools
+
+import mdis.dispatcher
+import mdis.visitor
+import mdis.walker
+
+import openpower.oppc.pc_ast as pc_ast
+
+
+class Code(list):
+    def __init__(self):
+        self.__level = 0
+        return super().__init__()
+
+    def __enter__(self):
+        self.__level += 1
+        return self
+
+    def __exit__(self, exc_type, exc_value, exc_traceback):
+        self.__level -= 1
+
+    def __str__(self):
+        if len(self) == 0:
+            raise ValueError("empty code")
+
+        lines = []
+        for (level, stmt) in self:
+            line = ((" " * level * 4) + stmt)
+            lines.append(line)
+
+        return "\n".join(lines)
+
+    def emit(self, stmt, level=0):
+        item = ((level + self.__level), stmt)
+        self.append(item)
+
+
+class Hook(mdis.dispatcher.Hook):
+    def __call__(self, call):
+        hook = super().__call__(call)
+
+        class ConcreteHook(hook.__class__):
+            @functools.wraps(hook.__call__)
+            @contextlib.contextmanager
+            def __call__(self, dispatcher, node, *args, **kwargs):
+                return hook(dispatcher, node, *args, **kwargs)
+
+        return ConcreteHook(*tuple(self))
+
+
+class PseudocodeVisitor(mdis.visitor.ContextVisitor):
+    def __init__(self, root):
+        self.__root = root
+        self.__code = collections.defaultdict(lambda: Code())
+
+        return super().__init__()
+
+    def __iter__(self):
+        yield from self.__code.items()
+
+    def __getitem__(self, node):
+        return self.__code[node]
+
+    @Hook(pc_ast.Scope)
+    def Scope(self, node):
+        yield node
+        if node is not self.__root:
+            with self[node]:
+                for subnode in node:
+                    for (level, stmt) in self[subnode]:
+                        self[node].emit(stmt=stmt, level=level)
+        else:
+            for subnode in node:
+                for (level, stmt) in self[subnode]:
+                    self[node].emit(stmt=stmt, level=level)
+
+    @Hook(pc_ast.Call)
+    def Call(self, node):
+        yield node
+        args = []
+        for subnode in node.args:
+            for (level, stmt) in self[subnode]:
+                assert level == 0
+                args.append(stmt)
+        args = ", ".join(args)
+        stmt = f"{node.name}({args})"
+        self[node].emit(stmt=stmt)
+
+    @Hook(pc_ast.Assign, pc_ast.AssignIEA)
+    def Assign(self, node):
+        mapping = {
+            pc_ast.Assign: "<-",
+            pc_ast.AssignIEA: "<-iea",
+        }
+        yield node
+        lvalue = str(self[node.lvalue])
+        if (isinstance(node.lvalue, (pc_ast.GPR, pc_ast.FPR)) or
+                (isinstance(node.lvalue, (pc_ast.Subscript, pc_ast.RangeSubscript)) and
+                    isinstance(node.lvalue.subject, (pc_ast.GPR, pc_ast.FPR)))):
+            lvalue = lvalue.replace("(", "").replace(")", "")
+        rvalue = str(self[node.rvalue])
+
+        if isinstance(node.rvalue, pc_ast.IfExpr):
+            # All right, this deserves an explanation.
+            # We basically convert T <- C ? A : B into this code:
+            #
+            # if C then
+            #     T <- A
+            # else
+            #     T <- B
+            #
+            # To make things work, we must ensure that objects are unique.
+            # Otherwise we'll reuse the bogus code already produced before.
+            (body, orelse) = map(lambda node: node.clone(),
+                (node.rvalue.body[0], node.rvalue.orelse[0]))
+            body = pc_ast.Scope([node.__class__(lvalue=node.lvalue.clone(), rvalue=body)])
+            orelse = pc_ast.Scope([node.__class__(lvalue=node.lvalue.clone(), rvalue=orelse)])
+            tmpnode = node.rvalue.clone(body=body, orelse=orelse)
+            walker = mdis.walker.Walker()
+            traverse(root=tmpnode, visitor=self, walker=walker)
+            for (level, stmt) in self[tmpnode]:
+                self[node].emit(stmt=stmt, level=level)
+        else:
+            stmt = " ".join([
+                lvalue,
+                mapping[node.__class__],
+                rvalue,
+            ])
+            self[node].emit(stmt=stmt)
+
+    @Hook(pc_ast.BinaryExpr)
+    def BinaryExpr(self, node):
+        yield node
+        stmt = " ".join([
+            str(self[node.left]),
+            str(self[node.op]),
+            str(self[node.right]),
+        ])
+        self[node].emit(stmt=f"({stmt})")
+
+    @Hook(pc_ast.IfExpr)
+    def IfExpr(self, node):
+        yield node
+        stmt = " ".join([
+            "if",
+            str(self[node.test]),
+            "then",
+        ])
+        self[node].emit(stmt=stmt)
+        for (level, stmt) in self[node.body]:
+            self[node].emit(stmt=stmt, level=level)
+        if node.orelse:
+            self[node].emit("else")
+            for (level, stmt) in self[node.orelse]:
+                self[node].emit(stmt=stmt, level=level)
+
+    @Hook(pc_ast.ForExpr)
+    def ForExpr(self, node):
+        yield node
+        stmt = " ".join([
+            "for",
+            str(self[node.subject]),
+            "=",
+            str(self[node.start]),
+            "to",
+            str(self[node.end]),
+        ])
+        self[node].emit(stmt=stmt)
+        for (level, stmt) in self[node.body]:
+            self[node].emit(stmt=stmt, level=level)
+
+    @Hook(pc_ast.WhileExpr)
+    def WhileExpr(self, node):
+        yield node
+        stmt = " ".join([
+            "do",
+            "while",
+            str(self[node.test]),
+        ])
+        self[node].emit(stmt=stmt)
+        for (level, stmt) in self[node.body]:
+            self[node].emit(stmt=stmt, level=level)
+        if node.orelse:
+            self[node].emit("else")
+            for (level, stmt) in self[node.orelse]:
+                self[node].emit(stmt=stmt, level=level)
+
+    @Hook(pc_ast.RepeatExpr)
+    def RepeatExpr(self, node):
+        yield node
+        stmt = " ".join([
+            f"[{str(self[node.subject])}]",
+            "*",
+            str(self[node.times]),
+        ])
+        self[node].emit(stmt=f"({stmt})")
+
+    @Hook(pc_ast.SwitchExpr)
+    def SwitchExpr(self, node):
+        yield node
+        self[node].emit(f"switch({str(self[node.subject])})")
+        with self[node]:
+            for (level, stmt) in self[node.cases]:
+                self[node].emit(stmt=stmt, level=level)
+
+    @Hook(pc_ast.Cases)
+    def Cases(self, node):
+        yield node
+        for subnode in node:
+            for (level, stmt) in self[subnode]:
+                self[node].emit(stmt=stmt, level=level)
+
+    @Hook(pc_ast.Case)
+    def Case(self, node):
+        yield node
+        for (level, stmt) in self[node.labels]:
+            self[node].emit(stmt=stmt, level=level)
+        for (level, stmt) in self[node.body]:
+            self[node].emit(stmt=stmt, level=level)
+
+    @Hook(pc_ast.Labels)
+    def Labels(self, node):
+        yield node
+        if ((len(node) == 1) and isinstance(node[-1], pc_ast.DefaultLabel)):
+            stmt = "default:"
+        else:
+            labels = ", ".join(map(lambda label: str(self[label]), node))
+            stmt = f"case ({labels}):"
+        self[node].emit(stmt=stmt)
+
+    @Hook(pc_ast.Label)
+    def Label(self, node):
+        yield node
+        self[node].emit(stmt=str(node))
+
+    @Hook(pc_ast.DefaultLabel)
+    def DefaultLabel(self, node):
+        yield node
+        self[node].emit(stmt="default:")
+
+    @Hook(pc_ast.UnaryExpr)
+    def UnaryExpr(self, node):
+        yield node
+        stmt = "".join([
+            str(self[node.op]),
+            f"({str(self[node.value])})",
+        ])
+        self[node].emit(stmt=stmt)
+
+    @Hook(pc_ast.BinLiteral, pc_ast.DecLiteral, pc_ast.HexLiteral)
+    def Integer(self, node):
+        yield node
+        self[node].emit(stmt=str(node))
+
+    @Hook(pc_ast.StringLiteral)
+    def StringLiteral(self, node):
+        yield node
+        self[node].emit(stmt=f"'{str(node)}'")
+
+    @Hook(pc_ast.Symbol)
+    def Symbol(self, node):
+        yield node
+        self[node].emit(stmt=str(node))
+
+    @Hook(pc_ast.Attribute)
+    def Attribute(self, node):
+        yield node
+        stmt = ".".join([
+            str(self[node.subject]),
+            str(self[node.name]),
+        ])
+        self[node].emit(stmt=stmt)
+
+    @Hook(pc_ast.Not, pc_ast.Add, pc_ast.Sub,
+            pc_ast.Mul, pc_ast.MulS, pc_ast.MulU,
+            pc_ast.Div, pc_ast.DivT, pc_ast.Mod,
+            pc_ast.Sqrt,
+            pc_ast.Eq, pc_ast.NotEq,
+            pc_ast.Lt, pc_ast.Le, pc_ast.LtU,
+            pc_ast.Gt, pc_ast.Ge, pc_ast.GtU,
+            pc_ast.LShift, pc_ast.RShift,
+            pc_ast.AssignOp, pc_ast.AssignIEAOp,
+            pc_ast.BitAnd, pc_ast.BitOr, pc_ast.BitXor,
+            pc_ast.BitConcat)
+    def Op(self, node):
+        yield node
+        mapping = {
+            pc_ast.Not: "¬",
+            pc_ast.Add: "+",
+            pc_ast.Sub: "-",
+            pc_ast.Mul: "*",
+            pc_ast.MulS: "*si",
+            pc_ast.MulU: "*ui",
+            pc_ast.Div: "/",
+            pc_ast.DivT: "÷",
+            pc_ast.Mod: "%",
+            pc_ast.Sqrt: "√",
+            pc_ast.Eq: "=",
+            pc_ast.NotEq: "!=",
+            pc_ast.Lt: "<",
+            pc_ast.Le: "<=",
+            pc_ast.LtU: "<u",
+            pc_ast.Gt: ">",
+            pc_ast.Ge: ">=",
+            pc_ast.GtU: ">u",
+            pc_ast.LShift: "<<",
+            pc_ast.RShift: ">>",
+            pc_ast.AssignOp: "<-",
+            pc_ast.AssignIEAOp: "<-iea",
+            pc_ast.BitAnd: "&",
+            pc_ast.BitOr: "|",
+            pc_ast.BitXor: "^",
+            pc_ast.BitConcat: "||",
+        }
+        stmt = mapping[node.__class__]
+        self[node].emit(stmt=stmt)
+
+    @Hook(pc_ast.LParenthesis, pc_ast.RParenthesis,
+            pc_ast.LBracket, pc_ast.RBracket)
+    def BracketOrParenthesis(self, node):
+        yield node
+        mapping = {
+            pc_ast.LParenthesis: "(",
+            pc_ast.RParenthesis: ")",
+            pc_ast.LBracket: "[",
+            pc_ast.RBracket: "]",
+        }
+        stmt = mapping[node.__class__]
+        self[node].emit(stmt=stmt)
+
+    @Hook(pc_ast.Subscript)
+    def Subscript(self, node):
+        yield node
+        stmt = "".join([
+            str(self[node.subject]),
+            "[",
+            str(self[node.index]),
+            "]",
+        ])
+        self[node].emit(stmt=stmt)
+
+    @Hook(pc_ast.RangeSubscript)
+    def RangeSubscript(self, node):
+        yield node
+        stmt = "".join([
+            str(self[node.subject]),
+            "[",
+            str(self[node.start]),
+            ":",
+            str(self[node.end]),
+            "]",
+        ])
+        self[node].emit(stmt=stmt)
+
+    @Hook(pc_ast.Colon)
+    def Colon(self, node):
+        yield node
+        self[node].emit(stmt=":")
+
+    @Hook(pc_ast.Linebreak, pc_ast.Endmarker)
+    def Ignore(self, node):
+        yield node
+
+    @Hook(pc_ast.Keyword)
+    def Keyword(self, node):
+        yield node
+        self[node].emit(stmt=node.__doc__)
+
+    @Hook(pc_ast.Sequence)
+    def Sequence(self, node):
+        yield node
+        stmt = ",".join(map(lambda subnode: str(self[subnode]), node))
+        self[node].emit(stmt=f"({stmt})")
+
+    @Hook(pc_ast.Literal)
+    def Literal(self, node):
+        yield node
+        self[node].emit(stmt=str(node))
+
+    @Hook(pc_ast.GPR, pc_ast.FPR)
+    def Reg(self, node):
+        yield node
+        if node.endswith("0"):
+            self[node].emit(stmt=f"({str(node)[:-1]}|0)")
+        else:
+            self[node].emit(stmt=f"({str(node)})")
+
+    @Hook(pc_ast.Node)
+    def Node(self, node):
+        raise NotImplementedError(type(node))
+
+
+def traverse(root, visitor, walker):
+    with visitor(root):
+        for node in walker(root):
+            traverse(root=node, visitor=visitor, walker=walker)
+
+
+def pseudocode(root):
+    walker = mdis.walker.Walker()
+    visitor = PseudocodeVisitor(root=root)
+    traverse(root=root, visitor=visitor, walker=walker)
+    for (level, stmt) in visitor[root]:
+        yield (level, stmt)