X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fsoc%2Fdecoder%2Fpseudo%2Flexer.py;h=3e96a3a0d627f105e1f84c96243fe73cea004a21;hb=fd026d1d37841698f30068bbe1006578fce1fd94;hp=3f666c4be189cdcc23a0bbe4f84e501fa89852ef;hpb=aafcdcf51621d7533e96742119757c70d628b438;p=soc.git diff --git a/src/soc/decoder/pseudo/lexer.py b/src/soc/decoder/pseudo/lexer.py index 3f666c4b..3e96a3a0 100644 --- a/src/soc/decoder/pseudo/lexer.py +++ b/src/soc/decoder/pseudo/lexer.py @@ -31,10 +31,13 @@ NO_INDENT = 0 MAY_INDENT = 1 MUST_INDENT = 2 -# turn into python-like colon syntax from pseudo-code syntax +# turn into python-like colon syntax from pseudo-code syntax. +# identify tokens which tell us whether a "hidden colon" is needed. +# this in turn means that track_tokens_filter "works" without needing +# complex grammar rules def python_colonify(lexer, tokens): - forwhile_seen = False + implied_colon_needed = False for token in tokens: #print ("track colon token", token, token.type) @@ -47,15 +50,15 @@ def python_colonify(lexer, tokens): token = copy(token) token.type = "COLON" yield token - elif token.type in ['DO', 'WHILE', 'FOR']: - forwhile_seen = True + elif token.type in ['DO', 'WHILE', 'FOR', 'SWITCH']: + implied_colon_needed = True yield token elif token.type == 'NEWLINE': - if forwhile_seen: + if implied_colon_needed: ctok = copy(token) ctok.type = "COLON" yield ctok - forwhile_seen = False + implied_colon_needed = False yield token else: yield token @@ -124,6 +127,48 @@ def DEDENT(lineno): def INDENT(lineno): return _new_token("INDENT", lineno) +def count_spaces(l): + for i in range(len(l)): + if l[i] != ' ': + return i + return 0 + +def annoying_case_hack_filter(code): + """add annoying "silent keyword" (fallthrough) + + this which tricks the parser into taking the (silent) case statement + as a "small expression". it can then be spotted and used to indicate + "fall through" to the next case (in the parser) + + also skips blank lines + + bugs: any function that starts with the letters "case" or "default" + will be detected erroneously. fixing that involves doing a token + lexer which spots the fact that "case" and "default" are words, + separating them from space, colon, bracket etc. + + http://bugs.libre-riscv.org/show_bug.cgi?id=280 + """ + res = [] + prev_spc_count = None + for l in code.split("\n"): + spc_count = count_spaces(l) + nwhite = l[spc_count:] + if len(nwhite) == 0: # skip blank lines + continue + if nwhite.startswith("case") or nwhite.startswith("default"): + #print ("case/default", nwhite, spc_count, prev_spc_count) + if (prev_spc_count is not None and + prev_spc_count == spc_count and + (res[-1].endswith(":") or res[-1].endswith(": fallthrough"))): + res[-1] += " fallthrough" # add to previous line + prev_spc_count = spc_count + else: + #print ("notstarts", spc_count, nwhite) + prev_spc_count = None + res.append(l) + return '\n'.join(res) + # Track the indentation level and emit the right INDENT / DEDENT events. def indentation_filter(tokens): @@ -231,6 +276,7 @@ class PowerLexer: 'WHILE', 'BREAK', 'NAME', + 'HEX', # hex numbers 'NUMBER', # Python decimals 'BINARY', # Python binary 'STRING', # single quoted strings only; syntax of raw strings @@ -244,6 +290,7 @@ class PowerLexer: 'ASSIGN', 'LTU', 'GTU', + 'NE', 'LE', 'GE', 'LT', @@ -259,6 +306,9 @@ class PowerLexer: 'BITAND', 'BITXOR', 'RETURN', + 'SWITCH', + 'CASE', + 'DEFAULT', 'WS', 'NEWLINE', 'COMMA', @@ -272,6 +322,12 @@ class PowerLexer: def build(self,**kwargs): self.lexer = lex.lex(module=self, **kwargs) + def t_HEX(self, t): + r"""0x[0-9a-fA-F_]+""" + val = t.value.replace("_", "") + t.value = SelectableInt(int(val, 16), (len(val)-2)*16) + return t + def t_BINARY(self, t): r"""0b[01]+""" t.value = SelectableInt(int(t.value, 2), len(t.value)-2) @@ -296,6 +352,7 @@ class PowerLexer: t_ASSIGN = r'<-' t_LTU = r'u' + t_NE = r'!=' t_LE = r'<=' t_GE = r'>=' t_LT = r'<' @@ -326,6 +383,9 @@ class PowerLexer: "while": "WHILE", "do": "DO", "return": "RETURN", + "switch": "SWITCH", + "case": "CASE", + "default": "DEFAULT", } def t_NAME(self, t): @@ -392,11 +452,15 @@ class PowerLexer: class IndentLexer(PowerLexer): def __init__(self, debug=0, optimize=0, lextab='lextab', reflags=0): + self.debug = debug self.build(debug=debug, optimize=optimize, lextab=lextab, reflags=reflags) self.token_stream = None def input(self, s, add_endmarker=True): + s = annoying_case_hack_filter(s) + if self.debug: + print (s) self.lexer.paren_count = 0 self.lexer.brack_count = 0 self.lexer.input(s) @@ -408,20 +472,37 @@ class IndentLexer(PowerLexer): except StopIteration: return None +switchtest = """ +switch (n) + case(1): x <- 5 + case(3): x <- 2 + case(2): + + case(4): + x <- 3 + case(9): + + default: + x <- 9 +print (5) +""" + +cnttzd = """ +n <- 0 +do while n < 64 + if (RS)[63-n] = 0b1 then + leave + n <- n + 1 +RA <- EXTZ64(n) +print (RA) +""" + if __name__ == '__main__': # quick test/demo - cnttzd = """ - n <- 0 - do while n < 64 - if (RS)[63-n] = 0b1 then - leave - n <- n + 1 - RA <- EXTZ64(n) - print (RA) - """ - - code = cnttzd + #code = cnttzd + code = switchtest + print (code) lexer = IndentLexer(debug=1) # Give the lexer some input @@ -429,3 +510,7 @@ if __name__ == '__main__': print (code) lexer.input(code) + tokens = iter(lexer.token, None) + for token in tokens: + print (token) +