From 3d252f8e5fa2ec3f55730ab6d5d1a4a1b21b2cdf Mon Sep 17 00:00:00 2001 From: Nathan Binkert Date: Tue, 5 Jul 2011 18:30:04 -0700 Subject: [PATCH] grammar: better encapsulation of a grammar and parsing This makes it possible to use the grammar multiple times and use the multiple instances concurrently. This makes implementing an include statement as part of a grammar possible. --- src/arch/isa_parser.py | 2 +- src/mem/slicc/ast/AST.py | 2 +- src/mem/slicc/parser.py | 18 ++-- src/python/m5/util/grammar.py | 178 ++++++++++++++++++---------------- 4 files changed, 105 insertions(+), 95 deletions(-) diff --git a/src/arch/isa_parser.py b/src/arch/isa_parser.py index 97b1bcecc..95bbd15e4 100755 --- a/src/arch/isa_parser.py +++ b/src/arch/isa_parser.py @@ -1957,7 +1957,7 @@ StaticInstPtr # Parse it. (isa_name, namespace, global_code, namespace_code) = \ - self.parse(isa_desc) + self.parse_string(isa_desc) # grab the last three path components of isa_desc_file to put in # the output diff --git a/src/mem/slicc/ast/AST.py b/src/mem/slicc/ast/AST.py index 8f0d06171..d098c8642 100644 --- a/src/mem/slicc/ast/AST.py +++ b/src/mem/slicc/ast/AST.py @@ -30,7 +30,7 @@ from slicc.util import PairContainer, Location class AST(PairContainer): def __init__(self, slicc, pairs=None): self.slicc = slicc - self.location = Location(slicc.current_file, slicc.lexer.lineno) + self.location = Location(slicc.current_source, slicc.current_line) self.pairs = {} if pairs: self.pairs.update(getattr(pairs, "pairs", pairs)) diff --git a/src/mem/slicc/parser.py b/src/mem/slicc/parser.py index aeda218f7..eb10c2dc4 100644 --- a/src/mem/slicc/parser.py +++ b/src/mem/slicc/parser.py @@ -31,7 +31,7 @@ import re import sys from m5.util import code_formatter -from m5.util.grammar import Grammar, TokenError, ParseError +from m5.util.grammar import Grammar, ParseError import slicc.ast as ast import slicc.util as util @@ -52,9 +52,7 @@ def read_slicc(sources): class SLICC(Grammar): def __init__(self, protocol, **kwargs): - super(SLICC, self).__init__(**kwargs) self.decl_list_vec = [] - self.current_file = None self.protocol = protocol self.symtab = SymbolTable(self) @@ -64,15 +62,11 @@ class SLICC(Grammar): return code def parse(self, filename): - self.current_file = filename - f = file(filename, 'r') - text = f.read() try: - decl_list = super(SLICC, self).parse(text) - except (TokenError, ParseError), e: - sys.exit("%s: %s:%d" % (e, filename, e.token.lineno)) + decl_list = self.parse_file(filename) + except ParseError, e: + sys.exit(str(e)) self.decl_list_vec.append(decl_list) - self.current_file = None def _load(self, *filenames): filenames = list(filenames) @@ -238,7 +232,7 @@ class SLICC(Grammar): try: t.value = float(t.value) except ValueError: - raise TokenError("Illegal float", t) + raise ParseError("Illegal float", t) return t def t_NUMBER(self, t): @@ -246,7 +240,7 @@ class SLICC(Grammar): try: t.value = int(t.value) except ValueError: - raise TokenError("Illegal number", t) + raise ParseError("Illegal number", t) return t def t_STRING1(self, t): diff --git a/src/python/m5/util/grammar.py b/src/python/m5/util/grammar.py index ab5f35868..07702cfad 100644 --- a/src/python/m5/util/grammar.py +++ b/src/python/m5/util/grammar.py @@ -1,4 +1,4 @@ -# Copyright (c) 2006-2009 Nathan Binkert +# Copyright (c) 2006-2011 Nathan Binkert # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -24,100 +24,116 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -from ply import lex, yacc +import os -class TokenError(lex.LexError): - def __init__(self, msg, t): - super(TokenError, self).__init__(msg) - self.token = t +import ply.lex +import ply.yacc -class ParseError(yacc.YaccError): +class ParseError(Exception): def __init__(self, message, token=None): - super(ParseError, self).__init__(message) + Exception.__init__(self, message) self.token = token -class Tokenizer(object): - def __init__(self, lexer, data): - if isinstance(data, basestring): - indata = [ data ] - elif isinstance(data, file): - indata = data.xreadlines() - else: - indata = data - - def _input(): - for i,line in enumerate(indata): - lexer.lineno = i + 1 - lexer.input(line) - while True: - tok = lexer.token() - if not tok: - break - yield tok - self.input = _input() - self.lexer = lexer - - def next(self): - return self.input.next() - - def __iter__(self): - return self - - def token(self): - try: - return self.next() - except StopIteration: - return None - - def __getattr__(self, attr): - return getattr(self.lexer, attr) - class Grammar(object): - def __init__(self, output=None, debug=False): - self.yacc_args = {} - self.yacc_args['debug'] = debug + def setupLexerFactory(self, **kwargs): + if 'module' in kwargs: + raise AttributeError, "module is an illegal attribute" + self.lex_kwargs = kwargs - if output: - import os + def setupParserFactory(self, **kwargs): + if 'module' in kwargs: + raise AttributeError, "module is an illegal attribute" + if 'output' in kwargs: dir,tab = os.path.split(output) if not tab.endswith('.py'): - raise AttributeError, 'The output file must end with .py' - self.yacc_args['outputdir'] = dir - self.yacc_args['tabmodule'] = tab[:-3] + raise AttributeError, \ + 'The output file must end with .py' + kwargs['outputdir'] = dir + kwargs['tabmodule'] = tab[:-3] - def t_error(self, t): - raise lex.LexError("Illegal character %s @ %d:%d" % \ - (`t.value[0]`, t.lineno, t.lexpos), `t.value[0]`) + self.yacc_kwargs = kwargs + + def __getattr__(self, attr): + if attr == 'lexers': + self.lexers = [] + return self.lexers + + if attr == 'lex_kwargs': + self.setupLexerFactory() + return self.lex_kwargs + + if attr == 'yacc_kwargs': + self.setupParserFactory() + return self.yacc_kwargs + + if attr == 'lex': + self.lex = ply.lex.lex(module=self, **self.lex_kwargs) + return self.lex + + if attr == 'yacc': + self.yacc = ply.yacc.yacc(module=self, **self.yacc_kwargs) + return self.yacc + + if attr == 'current_lexer': + if not self.lexers: + return None + return self.lexers[-1][0] + + if attr == 'current_source': + if not self.lexers: + return '' + return self.lexers[-1][1] + + if attr == 'current_line': + if not self.lexers: + return -1 + return self.current_lexer.lineno + + raise AttributeError, \ + "'%s' object has no attribute '%s'" % (type(self), attr) + + def parse_string(self, data, source='', debug=None, tracking=0): + if not isinstance(data, basestring): + raise AttributeError, \ + "argument must be a string, was '%s'" % type(f) + + import new + lexer = self.lex.clone() + lexer.input(data) + self.lexers.append((lexer, source)) + dict = { + 'productions' : self.yacc.productions, + 'action' : self.yacc.action, + 'goto' : self.yacc.goto, + 'errorfunc' : self.yacc.errorfunc, + } + parser = new.instance(ply.yacc.LRParser, dict) + result = parser.parse(lexer=lexer, debug=debug, tracking=tracking) + self.lexers.pop() + return result + + def parse_file(self, f, **kwargs): + if isinstance(f, basestring): + source = f + f = file(f, 'r') + elif isinstance(f, file): + source = f.name + else: + raise AttributeError, \ + "argument must be either a string or file, was '%s'" % type(f) + + return self.parse_string(f.read(), source, **kwargs) def p_error(self, t): if t: - msg = "Syntax error at %d:%d\n>>%s<<" % \ - (t.lineno, t.lexpos + 1, t.value) + msg = "Syntax error at %s:%d:%d\n>>%s<<" % \ + (self.current_source, t.lineno, t.lexpos + 1, t.value) else: - msg = "Syntax error at end of input" + msg = "Syntax error at end of %s" % (self.current_source, ) raise ParseError(msg, t) - def __getattr__(self, attr): - if attr == 'parser': - import ply.yacc - parser = ply.yacc.yacc(module=self, **self.yacc_args) - self.parser = parser - return parser - - if attr == 'lexer': - import ply.lex - lexer = ply.lex.lex(module=self) - self.lexer = lexer - return lexer - - raise AttributeError, "'%s' object has no attribute '%s'" % \ - (self.__class__.__name__, attr) - - def parse(self, stmt, **kwargs): - self.lexer.lineno = 1 - result = self.parser.parse(lexer=Tokenizer(self.lexer, stmt), **kwargs) - self.parser.restart() - - return result - + def t_error(self, t): + msg = "Illegal character %s @ %d:%d" % \ + (`t.value[0]`, t.lineno, t.lexpos) + raise ParseError(msg, t) -- 2.30.2