2 # Copyright (C) 2006-2015 the Mako authors and contributors <see AUTHORS file>
4 # This module is part of Mako and is released under
5 # the MIT License: http://www.opensource.org/licenses/mit-license.php
7 """provides the Lexer class for parsing template strings into parse trees."""
11 from mako
import parsetree
, exceptions
, compat
12 from mako
.pygen
import adjust_whitespace
17 def __init__(self
, text
, filename
=None,
18 disable_unicode
=False,
19 input_encoding
=None, preprocessor
=None):
21 self
.filename
= filename
22 self
.template
= parsetree
.TemplateNode(self
.filename
)
23 self
.matched_lineno
= 1
24 self
.matched_charpos
= 0
26 self
.match_position
= 0
28 self
.control_line
= []
29 self
.ternary_stack
= []
30 self
.disable_unicode
= disable_unicode
31 self
.encoding
= input_encoding
33 if compat
.py3k
and disable_unicode
:
34 raise exceptions
.UnsupportedError(
35 "Mako for Python 3 does not "
36 "support disabling Unicode")
38 if preprocessor
is None:
39 self
.preprocessor
= []
40 elif not hasattr(preprocessor
, '__iter__'):
41 self
.preprocessor
= [preprocessor
]
43 self
.preprocessor
= preprocessor
46 def exception_kwargs(self
):
47 return {'source': self
.text
,
48 'lineno': self
.matched_lineno
,
49 'pos': self
.matched_charpos
,
50 'filename': self
.filename
}
52 def match(self
, regexp
, flags
=None):
53 """compile the given regexp, cache the reg, and call match_reg()."""
56 reg
= _regexp_cache
[(regexp
, flags
)]
59 reg
= re
.compile(regexp
, flags
)
61 reg
= re
.compile(regexp
)
62 _regexp_cache
[(regexp
, flags
)] = reg
64 return self
.match_reg(reg
)
66 def match_reg(self
, reg
):
67 """match the given regular expression object to the current text
70 if a match occurs, update the current text and line position.
74 mp
= self
.match_position
76 match
= reg
.match(self
.text
, self
.match_position
)
78 (start
, end
) = match
.span()
80 self
.match_position
= end
+ 1
82 self
.match_position
= end
83 self
.matched_lineno
= self
.lineno
84 lines
= re
.findall(r
"\n", self
.text
[mp
:self
.match_position
])
86 while (cp
>= 0 and cp
< self
.textlength
and self
.text
[cp
] != '\n'):
88 self
.matched_charpos
= mp
- cp
89 self
.lineno
+= len(lines
)
90 #print "MATCHED:", match.group(0), "LINE START:",
91 # self.matched_lineno, "LINE END:", self.lineno
92 #print "MATCH:", regexp, "\n", self.text[mp : mp + 15], \
93 # (match and "TRUE" or "FALSE")
96 def parse_until_text(self
, *text
):
97 startpos
= self
.match_position
98 text_re
= r
'|'.join(text
)
101 match
= self
.match(r
'#.*\n')
104 match
= self
.match(r
'(\"\"\"|\'\'\'|
\"|
\')((?
<!\\)\\\
1|
.)*?\
1',
108 match = self.match(r'(%s)' % text_re)
110 if match.group(1) == '}' and brace_level > 0:
115 self.match_position - len(match.group(1))],\
117 match = self.match(r"(.*?)(?=\"|\'|#|%s)" % text_re, re.S)
119 brace_level += match.group(1).count('{')
120 brace_level -= match.group(1).count('}')
122 raise exceptions.SyntaxException(
125 **self.exception_kwargs)
127 def append_node(self, nodecls, *args, **kwargs):
128 kwargs.setdefault('source
', self.text)
129 kwargs.setdefault('lineno
', self.matched_lineno)
130 kwargs.setdefault('pos
', self.matched_charpos)
131 kwargs['filename
'] = self.filename
132 node = nodecls(*args, **kwargs)
134 self.tag[-1].nodes.append(node)
136 self.template.nodes.append(node)
137 # build a set of child nodes for the control line
138 # (used for loop variable detection)
139 # also build a set of child nodes on ternary control lines
140 # (used for determining if a pass needs to be auto-inserted
141 if self.control_line:
142 control_frame = self.control_line[-1]
143 control_frame.nodes.append(node)
144 if not (isinstance(node, parsetree.ControlLine) and
145 control_frame.is_ternary(node.keyword)):
146 if self.ternary_stack and self.ternary_stack[-1]:
147 self.ternary_stack[-1][-1].nodes.append(node)
148 if isinstance(node, parsetree.Tag):
150 node.parent = self.tag[-1]
151 self.tag.append(node)
152 elif isinstance(node, parsetree.ControlLine):
154 self.control_line.pop()
155 self.ternary_stack.pop()
156 elif node.is_primary:
157 self.control_line.append(node)
158 self.ternary_stack.append([])
159 elif self.control_line and \
160 self.control_line[-1].is_ternary(node.keyword):
161 self.ternary_stack[-1].append(node)
162 elif self.control_line and \
163 not self.control_line[-1].is_ternary(node.keyword):
164 raise exceptions.SyntaxException(
165 "Keyword '%s' not a legal ternary for keyword '%s'" %
166 (node.keyword, self.control_line[-1].keyword),
167 **self.exception_kwargs)
169 _coding_re = re.compile(r'#.*coding[:=]\s*([-\w.]+).*\r?\n')
171 def decode_raw_stream(self
, text
, decode_raw
, known_encoding
, filename
):
172 """given string/unicode or bytes/string, determine encoding
173 from magic encoding comment, return body as unicode
174 or raw if decode_raw=False
177 if isinstance(text
, compat
.text_type
):
178 m
= self
._coding
_re
.match(text
)
179 encoding
= m
and m
.group(1) or known_encoding
or 'ascii'
180 return encoding
, text
182 if text
.startswith(codecs
.BOM_UTF8
):
183 text
= text
[len(codecs
.BOM_UTF8
):]
184 parsed_encoding
= 'utf-8'
185 m
= self
._coding
_re
.match(text
.decode('utf-8', 'ignore'))
186 if m
is not None and m
.group(1) != 'utf-8':
187 raise exceptions
.CompileException(
188 "Found utf-8 BOM in file, with conflicting "
189 "magic encoding comment of '%s'" % m
.group(1),
190 text
.decode('utf-8', 'ignore'),
193 m
= self
._coding
_re
.match(text
.decode('utf-8', 'ignore'))
195 parsed_encoding
= m
.group(1)
197 parsed_encoding
= known_encoding
or 'ascii'
201 text
= text
.decode(parsed_encoding
)
202 except UnicodeDecodeError:
203 raise exceptions
.CompileException(
204 "Unicode decode operation of encoding '%s' failed" %
206 text
.decode('utf-8', 'ignore'),
209 return parsed_encoding
, text
212 self
.encoding
, self
.text
= self
.decode_raw_stream(self
.text
,
213 not self
.disable_unicode
,
217 for preproc
in self
.preprocessor
:
218 self
.text
= preproc(self
.text
)
220 # push the match marker past the
222 self
.match_reg(self
._coding
_re
)
224 self
.textlength
= len(self
.text
)
227 if self
.match_position
> self
.textlength
:
232 if self
.match_expression():
234 if self
.match_control_line():
236 if self
.match_comment():
238 if self
.match_tag_start():
240 if self
.match_tag_end():
242 if self
.match_python_block():
244 if self
.match_text():
247 if self
.match_position
> self
.textlength
:
249 raise exceptions
.CompileException("assertion failed")
252 raise exceptions
.SyntaxException("Unclosed tag: <%%%s>" %
253 self
.tag
[-1].keyword
,
254 **self
.exception_kwargs
)
255 if len(self
.control_line
):
256 raise exceptions
.SyntaxException(
257 "Unterminated control keyword: '%s'" %
258 self
.control_line
[-1].keyword
,
260 self
.control_line
[-1].lineno
,
261 self
.control_line
[-1].pos
, self
.filename
)
264 def match_tag_start(self
):
265 match
= self
.match(r
'''
268 ([\w\.\:]+) # keyword
270 ((?:\s+\w+|\s*=\s*|".*?"|'.*?')*) # attrname, = \
271 # sign, string expression
273 \s* # more whitespace
282 keyword
, attr
, isend
= match
.groups()
283 self
.keyword
= keyword
286 for att
in re
.findall(
287 r
"\s*(\w+)\s*=\s*(?:'([^']*)'|\"([^
\"]*)\")", attr):
288 key, val1, val2 = att
290 text = text.replace('\r\n', '\n')
291 attributes[key] = text
292 self.append_node(parsetree.Tag, keyword, attributes)
296 if keyword == 'text':
297 match = self.match(r'(.*?)(?=\</%text>)', re.S)
299 raise exceptions.SyntaxException(
300 "Unclosed tag
: <%%%s>" %
301 self.tag[-1].keyword,
302 **self.exception_kwargs)
303 self.append_node(parsetree.Text, match.group(1))
304 return self.match_tag_end()
309 def match_tag_end(self):
310 match = self.match(r'\</%[\t ]*(.+?)[\t ]*>')
312 if not len(self.tag):
313 raise exceptions.SyntaxException(
314 "Closing tag without opening tag
: </%%%s>" %
316 **self.exception_kwargs)
317 elif self.tag[-1].keyword != match.group(1):
318 raise exceptions.SyntaxException(
319 "Closing tag
</%%%s> does
not match tag
: <%%%s>" %
320 (match.group(1), self.tag[-1].keyword),
321 **self.exception_kwargs)
328 match = self.match(r'\Z', re.S)
330 string = match.group()
338 def match_text(self):
339 match = self.match(r"""
340 (.*?) # anything, followed by:
342 (?<=\n)(?=[ \t]*(?=%|\#\#)) # an eval or line-based
343 # comment preceded by a
344 # consumed newline and whitespace
346 (?=\${) # an expression
348 (?=</?[%&]) # a substitution or block or call start or end
351 (\\\r?\n) # an escaped newline - throw away
357 text = match.group(1)
359 self.append_node(parsetree.Text, text)
364 def match_python_block(self):
365 match = self.match(r"<%(!)?
")
367 line, pos = self.matched_lineno, self.matched_charpos
368 text, end = self.parse_until_text(r'%>')
369 # the trailing newline helps
370 # compiler.parse() not complain about indentation
371 text = adjust_whitespace(text) + "\n"
375 match.group(1) == '!', lineno=line, pos=pos)
380 def match_expression(self):
381 match = self.match(r"\$
{")
383 line, pos = self.matched_lineno, self.matched_charpos
384 text, end = self.parse_until_text(r'\|', r'}')
386 escapes, end = self.parse_until_text(r'}')
389 text = text.replace('\r\n', '\n')
391 parsetree.Expression,
392 text, escapes.strip(),
393 lineno=line, pos=pos)
398 def match_control_line(self):
400 r"(?
<=^
)[\t ]*(%(?
!%)|
##)[\t ]*((?:(?:\\r?\n)|[^\r\n])*)"
401 r
"(?:\r?\n|\Z)", re
.M
)
403 operator
= match
.group(1)
404 text
= match
.group(2)
406 m2
= re
.match(r
'(end)?(\w+)\s*(.*)', text
)
408 raise exceptions
.SyntaxException(
409 "Invalid control line: '%s'" %
411 **self
.exception_kwargs
)
412 isend
, keyword
= m2
.group(1, 2)
413 isend
= (isend
is not None)
416 if not len(self
.control_line
):
417 raise exceptions
.SyntaxException(
418 "No starting keyword '%s' for '%s'" %
420 **self
.exception_kwargs
)
421 elif self
.control_line
[-1].keyword
!= keyword
:
422 raise exceptions
.SyntaxException(
423 "Keyword '%s' doesn't match keyword '%s'" %
424 (text
, self
.control_line
[-1].keyword
),
425 **self
.exception_kwargs
)
426 self
.append_node(parsetree
.ControlLine
, keyword
, isend
, text
)
428 self
.append_node(parsetree
.Comment
, text
)
433 def match_comment(self
):
434 """matches the multiline version of a comment"""
435 match
= self
.match(r
"<%doc>(.*?)</%doc>", re
.S
)
437 self
.append_node(parsetree
.Comment
, match
.group(1))