MAY_INDENT = 1
MUST_INDENT = 2
-# turn into python-like colon syntax from pseudo-code syntax
+# turn into python-like colon syntax from pseudo-code syntax.
+# identify tokens which tell us whether a "hidden colon" is needed.
+# this in turn means that track_tokens_filter "works" without needing
+# complex grammar rules
def python_colonify(lexer, tokens):
- forwhile_seen = False
+ implied_colon_needed = False
for token in tokens:
#print ("track colon token", token, token.type)
token = copy(token)
token.type = "COLON"
yield token
- elif token.type in ['WHILE', 'FOR']:
- forwhile_seen = True
+ elif token.type in ['DO', 'WHILE', 'FOR', 'SWITCH']:
+ implied_colon_needed = True
yield token
elif token.type == 'NEWLINE':
- if forwhile_seen:
+ if implied_colon_needed:
ctok = copy(token)
ctok.type = "COLON"
yield ctok
- forwhile_seen = False
+ implied_colon_needed = False
yield token
else:
yield token
def INDENT(lineno):
return _new_token("INDENT", lineno)
+def count_spaces(l):
+ for i in range(len(l)):
+ if l[i] != ' ':
+ return i
+ return 0
+
+def annoying_case_hack_filter(code):
+ """add annoying "silent keyword" (fallthrough)
+
+ this which tricks the parser into taking the (silent) case statement
+ as a "small expression". it can then be spotted and used to indicate
+ "fall through" to the next case (in the parser)
+
+ also skips blank lines
+
+ bugs: any function that starts with the letters "case" or "default"
+ will be detected erroneously. fixing that involves doing a token
+ lexer which spots the fact that "case" and "default" are words,
+ separating them from space, colon, bracket etc.
+
+ http://bugs.libre-riscv.org/show_bug.cgi?id=280
+ """
+ res = []
+ prev_spc_count = None
+ for l in code.split("\n"):
+ spc_count = count_spaces(l)
+ nwhite = l[spc_count:]
+ if len(nwhite) == 0: # skip blank lines
+ continue
+ if nwhite.startswith("case") or nwhite.startswith("default"):
+ #print ("case/default", nwhite, spc_count, prev_spc_count)
+ if (prev_spc_count is not None and
+ prev_spc_count == spc_count and
+ (res[-1].endswith(":") or res[-1].endswith(": fallthrough"))):
+ res[-1] += " fallthrough" # add to previous line
+ prev_spc_count = spc_count
+ else:
+ #print ("notstarts", spc_count, nwhite)
+ prev_spc_count = None
+ res.append(l)
+ return '\n'.join(res)
+
# Track the indentation level and emit the right INDENT / DEDENT events.
def indentation_filter(tokens):
'WHILE',
'BREAK',
'NAME',
+ 'HEX', # hex numbers
'NUMBER', # Python decimals
'BINARY', # Python binary
'STRING', # single quoted strings only; syntax of raw strings
'RBRACK',
'COLON',
'EQ',
+ 'ASSIGNEA',
'ASSIGN',
+ 'LTU',
+ 'GTU',
+ 'NE',
+ 'LE',
+ 'GE',
'LT',
'GT',
'PLUS',
'MINUS',
'MULT',
'DIV',
+ 'MOD',
+ 'INVERT',
'APPEND',
+ 'BITOR',
+ 'BITAND',
+ 'BITXOR',
'RETURN',
+ 'SWITCH',
+ 'CASE',
+ 'DEFAULT',
'WS',
'NEWLINE',
'COMMA',
def build(self,**kwargs):
self.lexer = lex.lex(module=self, **kwargs)
+ def t_HEX(self, t):
+ r"""0x[0-9a-fA-F_]+"""
+ val = t.value.replace("_", "")
+ t.value = SelectableInt(int(val, 16), (len(val)-2)*16)
+ return t
+
def t_BINARY(self, t):
r"""0b[01]+"""
t.value = SelectableInt(int(t.value, 2), len(t.value)-2)
t_COLON = r':'
t_EQ = r'='
+ t_ASSIGNEA = r'<-iea'
t_ASSIGN = r'<-'
+ t_LTU = r'<u'
+ t_GTU = r'>u'
+ t_NE = r'!='
+ t_LE = r'<='
+ t_GE = r'>='
t_LT = r'<'
t_GT = r'>'
t_PLUS = r'\+'
t_MINUS = r'-'
t_MULT = r'\*'
t_DIV = r'/'
+ t_MOD = r'%'
+ t_INVERT = r'¬'
t_COMMA = r','
t_SEMICOLON = r';'
t_APPEND = r'\|\|'
+ t_BITOR = r'\|'
+ t_BITAND = r'\&'
+ t_BITXOR = r'\^'
# Ply nicely documented how to do this.
"while": "WHILE",
"do": "DO",
"return": "RETURN",
+ "switch": "SWITCH",
+ "case": "CASE",
+ "default": "DEFAULT",
}
def t_NAME(self, t):
class IndentLexer(PowerLexer):
def __init__(self, debug=0, optimize=0, lextab='lextab', reflags=0):
+ self.debug = debug
self.build(debug=debug, optimize=optimize,
lextab=lextab, reflags=reflags)
self.token_stream = None
def input(self, s, add_endmarker=True):
+ s = annoying_case_hack_filter(s)
+ if self.debug:
+ print (s)
+ s += "\n"
self.lexer.paren_count = 0
self.lexer.brack_count = 0
self.lexer.input(s)
except StopIteration:
return None
+switchtest = """
+switch (n)
+ case(1): x <- 5
+ case(3): x <- 2
+ case(2):
+
+ case(4):
+ x <- 3
+ case(9):
+
+ default:
+ x <- 9
+print (5)
+"""
+
+cnttzd = """
+n <- 0
+do while n < 64
+ if (RS)[63-n] = 0b1 then
+ leave
+ n <- n + 1
+RA <- EXTZ64(n)
+print (RA)
+"""
+
if __name__ == '__main__':
# quick test/demo
- cnttzd = """
- n <- 0
- do while n < 64
- if (RS)[63-n] = 0b1 then
- leave
- n <- n + 1
- RA <- EXTZ64(n)
- print (RA)
- """
-
- code = cnttzd
+ #code = cnttzd
+ code = switchtest
+ print (code)
lexer = IndentLexer(debug=1)
# Give the lexer some input
print (code)
lexer.input(code)
+ tokens = iter(lexer.token, None)
+ for token in tokens:
+ print (token)
+