Revert "remove fixedlogical.patch - added gprs to PowerParser p_atom_name"

[soc.git] / src / soc / decoder / pseudo / lexer.py
diff --git a/src/soc/decoder/pseudo/lexer.py b/src/soc/decoder/pseudo/lexer.py

index 3f666c4be189cdcc23a0bbe4f84e501fa89852ef..d05e8845985617d964fb71f88a3c02a0d06f1ae5 100644 (file)
--- a/src/soc/decoder/pseudo/lexer.py
+++ b/src/soc/decoder/pseudo/lexer.py
@@ -31,10 +31,13 @@ NO_INDENT = 0
  MAY_INDENT = 1
  MUST_INDENT = 2
  
-# turn into python-like colon syntax from pseudo-code syntax
+# turn into python-like colon syntax from pseudo-code syntax.
+# identify tokens which tell us whether a "hidden colon" is needed.
+# this in turn means that track_tokens_filter "works" without needing
+# complex grammar rules
  def python_colonify(lexer, tokens):
  
-    forwhile_seen = False
+    implied_colon_needed = False
      for token in tokens:
          #print ("track colon token", token, token.type)
  
@@ -47,15 +50,15 @@ def python_colonify(lexer, tokens):
              token = copy(token)
              token.type = "COLON"
              yield token
-        elif token.type in ['DO', 'WHILE', 'FOR']:
-            forwhile_seen = True
+        elif token.type in ['DO', 'WHILE', 'FOR', 'SWITCH']:
+            implied_colon_needed = True
              yield token
          elif token.type == 'NEWLINE':
-            if forwhile_seen:
+            if implied_colon_needed:
                  ctok = copy(token)
                  ctok.type = "COLON"
                  yield ctok
-                forwhile_seen = False
+                implied_colon_needed = False
              yield token
          else:
              yield token
@@ -124,6 +127,48 @@ def DEDENT(lineno):
  def INDENT(lineno):
      return _new_token("INDENT", lineno)
  
+def count_spaces(l):
+    for i in range(len(l)):
+        if l[i] != ' ':
+            return i
+    return 0
+
+def annoying_case_hack_filter(code):
+    """add annoying "silent keyword" (fallthrough)
+
+    this which tricks the parser into taking the (silent) case statement
+    as a "small expression".  it can then be spotted and used to indicate
+    "fall through" to the next case (in the parser)
+
+    also skips blank lines
+
+    bugs: any function that starts with the letters "case" or "default"
+    will be detected erroneously.  fixing that involves doing a token
+    lexer which spots the fact that "case" and "default" are words,
+    separating them from space, colon, bracket etc.
+
+    http://bugs.libre-riscv.org/show_bug.cgi?id=280
+    """
+    res = []
+    prev_spc_count = None
+    for l in code.split("\n"):
+        spc_count = count_spaces(l)
+        nwhite = l[spc_count:]
+        if len(nwhite) == 0: # skip blank lines
+            continue
+        if nwhite.startswith("case") or nwhite.startswith("default"):
+            #print ("case/default", nwhite, spc_count, prev_spc_count)
+            if (prev_spc_count is not None and
+                prev_spc_count == spc_count and
+                (res[-1].endswith(":") or res[-1].endswith(": fallthrough"))):
+                res[-1] += " fallthrough" # add to previous line
+            prev_spc_count = spc_count
+        else:
+            #print ("notstarts", spc_count, nwhite)
+            prev_spc_count = None
+        res.append(l)
+    return '\n'.join(res)
+
  
  # Track the indentation level and emit the right INDENT / DEDENT events.
  def indentation_filter(tokens):
@@ -231,6 +276,7 @@ class PowerLexer:
          'WHILE',
          'BREAK',
          'NAME',
+        'HEX',     # hex numbers
          'NUMBER',  # Python decimals
          'BINARY',  # Python binary
          'STRING',  # single quoted strings only; syntax of raw strings
@@ -244,6 +290,7 @@ class PowerLexer:
          'ASSIGN',
          'LTU',
          'GTU',
+        'NE',
          'LE',
          'GE',
          'LT',
@@ -259,6 +306,9 @@ class PowerLexer:
          'BITAND',
          'BITXOR',
          'RETURN',
+        'SWITCH',
+        'CASE',
+        'DEFAULT',
          'WS',
          'NEWLINE',
          'COMMA',
@@ -272,6 +322,12 @@ class PowerLexer:
      def build(self,**kwargs):
           self.lexer = lex.lex(module=self, **kwargs)
  
+    def t_HEX(self, t):
+        r"""0x[0-9a-fA-F_]+"""
+        val = t.value.replace("_", "")
+        t.value = SelectableInt(int(val, 16), (len(val)-2)*16)
+        return t
+
      def t_BINARY(self, t):
          r"""0b[01]+"""
          t.value = SelectableInt(int(t.value, 2), len(t.value)-2)
@@ -296,6 +352,7 @@ class PowerLexer:
      t_ASSIGN = r'<-'
      t_LTU = r'<u'
      t_GTU = r'>u'
+    t_NE = r'!='
      t_LE = r'<='
      t_GE = r'>='
      t_LT = r'<'
@@ -326,6 +383,9 @@ class PowerLexer:
        "while": "WHILE",
        "do": "DO",
        "return": "RETURN",
+      "switch": "SWITCH",
+      "case": "CASE",
+      "default": "DEFAULT",
        }
  
      def t_NAME(self, t):
@@ -392,11 +452,16 @@ class PowerLexer:
  
  class IndentLexer(PowerLexer):
      def __init__(self, debug=0, optimize=0, lextab='lextab', reflags=0):
+        self.debug = debug
          self.build(debug=debug, optimize=optimize,
                                  lextab=lextab, reflags=reflags)
          self.token_stream = None
  
      def input(self, s, add_endmarker=True):
+        s = annoying_case_hack_filter(s)
+        if self.debug:
+            print (s)
+        s += "\n"
          self.lexer.paren_count = 0
          self.lexer.brack_count = 0
          self.lexer.input(s)
@@ -408,20 +473,37 @@ class IndentLexer(PowerLexer):
          except StopIteration:
              return None
  
+switchtest = """
+switch (n)
+    case(1): x <- 5
+    case(3): x <- 2
+    case(2):
+
+    case(4):
+        x <- 3
+    case(9):
+
+    default:
+        x <- 9
+print (5)
+"""
+
+cnttzd = """
+n  <- 0
+do while n < 64
+   if (RS)[63-n] = 0b1 then
+        leave
+   n  <- n + 1
+RA <- EXTZ64(n)
+print (RA)
+"""
+
  if __name__ == '__main__':
  
      # quick test/demo
-    cnttzd = """
-    n  <- 0
-    do while n < 64
-       if (RS)[63-n] = 0b1 then
-            leave
-       n  <- n + 1
-    RA <- EXTZ64(n)
-    print (RA)
-    """
-
-    code = cnttzd
+    #code = cnttzd
+    code = switchtest
+    print (code)
  
      lexer = IndentLexer(debug=1)
      # Give the lexer some input
@@ -429,3 +511,7 @@ if __name__ == '__main__':
      print (code)
      lexer.input(code)
  
+    tokens = iter(lexer.token, None)
+    for token in tokens:
+        print (token)
+