misc: string.join has been removed in python3
[gem5.git] / src / arch / micro_asm.py
1 # Copyright (c) 2003-2005 The Regents of The University of Michigan
2 # All rights reserved.
3 #
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions are
6 # met: redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer;
8 # redistributions in binary form must reproduce the above copyright
9 # notice, this list of conditions and the following disclaimer in the
10 # documentation and/or other materials provided with the distribution;
11 # neither the name of the copyright holders nor the names of its
12 # contributors may be used to endorse or promote products derived from
13 # this software without specific prior written permission.
14 #
15 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27 from __future__ import print_function
28
29 import os
30 import sys
31 import re
32 import traceback
33 # get type names
34 from types import *
35
36 from ply import lex
37 from ply import yacc
38
39 ##########################################################################
40 #
41 # Base classes for use outside of the assembler
42 #
43 ##########################################################################
44
45 class Micro_Container(object):
46 def __init__(self, name):
47 self.microops = []
48 self.name = name
49 self.directives = {}
50 self.micro_classes = {}
51 self.labels = {}
52
53 def add_microop(self, mnemonic, microop):
54 self.microops.append(microop)
55
56 def __str__(self):
57 string = "%s:\n" % self.name
58 for microop in self.microops:
59 string += " %s\n" % microop
60 return string
61
62 class Combinational_Macroop(Micro_Container):
63 pass
64
65 class Rom_Macroop(object):
66 def __init__(self, name, target):
67 self.name = name
68 self.target = target
69
70 def __str__(self):
71 return "%s: %s\n" % (self.name, self.target)
72
73 class Rom(Micro_Container):
74 def __init__(self, name):
75 super(Rom, self).__init__(name)
76 self.externs = {}
77
78 ##########################################################################
79 #
80 # Support classes
81 #
82 ##########################################################################
83
84 class Label(object):
85 def __init__(self):
86 self.extern = False
87 self.name = ""
88
89 class Block(object):
90 def __init__(self):
91 self.statements = []
92
93 class Statement(object):
94 def __init__(self):
95 self.is_microop = False
96 self.is_directive = False
97 self.params = ""
98
99 class Microop(Statement):
100 def __init__(self):
101 super(Microop, self).__init__()
102 self.mnemonic = ""
103 self.labels = []
104 self.is_microop = True
105
106 class Directive(Statement):
107 def __init__(self):
108 super(Directive, self).__init__()
109 self.name = ""
110 self.is_directive = True
111
112 ##########################################################################
113 #
114 # Functions that handle common tasks
115 #
116 ##########################################################################
117
118 def print_error(message):
119 print()
120 print("*** %s" % message)
121 print()
122
123 def handle_statement(parser, container, statement):
124 if statement.is_microop:
125 if statement.mnemonic not in parser.microops.keys():
126 raise Exception, "Unrecognized mnemonic: %s" % statement.mnemonic
127 parser.symbols["__microopClassFromInsideTheAssembler"] = \
128 parser.microops[statement.mnemonic]
129 try:
130 microop = eval('__microopClassFromInsideTheAssembler(%s)' %
131 statement.params, {}, parser.symbols)
132 except:
133 print_error("Error creating microop object with mnemonic %s." % \
134 statement.mnemonic)
135 raise
136 try:
137 for label in statement.labels:
138 container.labels[label.text] = microop
139 if label.is_extern:
140 container.externs[label.text] = microop
141 container.add_microop(statement.mnemonic, microop)
142 except:
143 print_error("Error adding microop.")
144 raise
145 elif statement.is_directive:
146 if statement.name not in container.directives.keys():
147 raise Exception, "Unrecognized directive: %s" % statement.name
148 parser.symbols["__directiveFunctionFromInsideTheAssembler"] = \
149 container.directives[statement.name]
150 try:
151 eval('__directiveFunctionFromInsideTheAssembler(%s)' %
152 statement.params, {}, parser.symbols)
153 except:
154 print_error("Error executing directive.")
155 print(container.directives)
156 raise
157 else:
158 raise Exception, "Didn't recognize the type of statement", statement
159
160 ##########################################################################
161 #
162 # Lexer specification
163 #
164 ##########################################################################
165
166 # Error handler. Just call exit. Output formatted to work under
167 # Emacs compile-mode. Optional 'print_traceback' arg, if set to True,
168 # prints a Python stack backtrace too (can be handy when trying to
169 # debug the parser itself).
170 def error(lineno, string, print_traceback = False):
171 # Print a Python stack backtrace if requested.
172 if (print_traceback):
173 traceback.print_exc()
174 if lineno != 0:
175 line_str = "%d:" % lineno
176 else:
177 line_str = ""
178 sys.exit("%s %s" % (line_str, string))
179
180 reserved = ('DEF', 'MACROOP', 'ROM', 'EXTERN')
181
182 tokens = reserved + (
183 # identifier
184 'ID',
185 # arguments for microops and directives
186 'PARAMS',
187
188 'LPAREN', 'RPAREN',
189 'LBRACE', 'RBRACE',
190 'COLON', 'SEMI', 'DOT',
191 'NEWLINE'
192 )
193
194 # New lines are ignored at the top level, but they end statements in the
195 # assembler
196 states = (
197 ('asm', 'exclusive'),
198 ('params', 'exclusive'),
199 )
200
201 reserved_map = { }
202 for r in reserved:
203 reserved_map[r.lower()] = r
204
205 # Ignore comments
206 def t_ANY_COMMENT(t):
207 r'\#[^\n]*(?=\n)'
208
209 def t_ANY_MULTILINECOMMENT(t):
210 r'/\*([^/]|((?<!\*)/))*\*/'
211
212 # A colon marks the end of a label. It should follow an ID which will
213 # put the lexer in the "params" state. Seeing the colon will put it back
214 # in the "asm" state since it knows it saw a label and not a mnemonic.
215 def t_params_COLON(t):
216 r':'
217 t.lexer.begin('asm')
218 return t
219
220 # Parameters are a string of text which don't contain an unescaped statement
221 # statement terminator, ie a newline or semi colon.
222 def t_params_PARAMS(t):
223 r'([^\n;\\]|(\\[\n;\\]))+'
224 t.lineno += t.value.count('\n')
225 unescapeParamsRE = re.compile(r'(\\[\n;\\])')
226 def unescapeParams(mo):
227 val = mo.group(0)
228 return val[1]
229 t.value = unescapeParamsRE.sub(unescapeParams, t.value)
230 t.lexer.begin('asm')
231 return t
232
233 # An "ID" in the micro assembler is either a label, directive, or mnemonic
234 # If it's either a directive or a mnemonic, it will be optionally followed by
235 # parameters. If it's a label, the following colon will make the lexer stop
236 # looking for parameters.
237 def t_asm_ID(t):
238 r'[A-Za-z_]\w*'
239 t.type = reserved_map.get(t.value, 'ID')
240 # If the ID is really "extern", we shouldn't start looking for parameters
241 # yet. The real ID, the label itself, is coming up.
242 if t.type != 'EXTERN':
243 t.lexer.begin('params')
244 return t
245
246 # If there is a label and you're -not- in the assembler (which would be caught
247 # above), don't start looking for parameters.
248 def t_ANY_ID(t):
249 r'[A-Za-z_]\w*'
250 t.type = reserved_map.get(t.value, 'ID')
251 return t
252
253 # Braces enter and exit micro assembly
254 def t_INITIAL_LBRACE(t):
255 r'\{'
256 t.lexer.begin('asm')
257 return t
258
259 def t_asm_RBRACE(t):
260 r'\}'
261 t.lexer.begin('INITIAL')
262 return t
263
264 # At the top level, keep track of newlines only for line counting.
265 def t_INITIAL_NEWLINE(t):
266 r'\n+'
267 t.lineno += t.value.count('\n')
268
269 # In the micro assembler, do line counting but also return a token. The
270 # token is needed by the parser to detect the end of a statement.
271 def t_asm_NEWLINE(t):
272 r'\n+'
273 t.lineno += t.value.count('\n')
274 return t
275
276 # A newline or semi colon when looking for params signals that the statement
277 # is over and the lexer should go back to looking for regular assembly.
278 def t_params_NEWLINE(t):
279 r'\n+'
280 t.lineno += t.value.count('\n')
281 t.lexer.begin('asm')
282 return t
283
284 def t_params_SEMI(t):
285 r';'
286 t.lexer.begin('asm')
287 return t
288
289 # Basic regular expressions to pick out simple tokens
290 t_ANY_LPAREN = r'\('
291 t_ANY_RPAREN = r'\)'
292 t_ANY_SEMI = r';'
293 t_ANY_DOT = r'\.'
294
295 t_ANY_ignore = ' \t\x0c'
296
297 def t_ANY_error(t):
298 error(t.lineno, "illegal character '%s'" % t.value[0])
299 t.skip(1)
300
301 ##########################################################################
302 #
303 # Parser specification
304 #
305 ##########################################################################
306
307 # Start symbol for a file which may have more than one macroop or rom
308 # specification.
309 def p_file(t):
310 'file : opt_rom_or_macros'
311
312 def p_opt_rom_or_macros_0(t):
313 'opt_rom_or_macros : '
314
315 def p_opt_rom_or_macros_1(t):
316 'opt_rom_or_macros : rom_or_macros'
317
318 def p_rom_or_macros_0(t):
319 'rom_or_macros : rom_or_macro'
320
321 def p_rom_or_macros_1(t):
322 'rom_or_macros : rom_or_macros rom_or_macro'
323
324 def p_rom_or_macro_0(t):
325 '''rom_or_macro : rom_block
326 | macroop_def'''
327
328 # Defines a section of microcode that should go in the current ROM
329 def p_rom_block(t):
330 'rom_block : DEF ROM block SEMI'
331 if not t.parser.rom:
332 print_error("Rom block found, but no Rom object specified.")
333 raise TypeError, "Rom block found, but no Rom object was specified."
334 for statement in t[3].statements:
335 handle_statement(t.parser, t.parser.rom, statement)
336 t[0] = t.parser.rom
337
338 # Defines a macroop that jumps to an external label in the ROM
339 def p_macroop_def_0(t):
340 'macroop_def : DEF MACROOP ID LPAREN ID RPAREN SEMI'
341 if not t.parser.rom_macroop_type:
342 print_error("ROM based macroop found, but no ROM macroop class was specified.")
343 raise TypeError, "ROM based macroop found, but no ROM macroop class was specified."
344 macroop = t.parser.rom_macroop_type(t[3], t[5])
345 t.parser.macroops[t[3]] = macroop
346
347
348 # Defines a macroop that is combinationally generated
349 def p_macroop_def_1(t):
350 'macroop_def : DEF MACROOP ID block SEMI'
351 try:
352 curop = t.parser.macro_type(t[3])
353 except TypeError:
354 print_error("Error creating macroop object.")
355 raise
356 for statement in t[4].statements:
357 handle_statement(t.parser, curop, statement)
358 t.parser.macroops[t[3]] = curop
359
360 # A block of statements
361 def p_block(t):
362 'block : LBRACE statements RBRACE'
363 block = Block()
364 block.statements = t[2]
365 t[0] = block
366
367 def p_statements_0(t):
368 'statements : statement'
369 if t[1]:
370 t[0] = [t[1]]
371 else:
372 t[0] = []
373
374 def p_statements_1(t):
375 'statements : statements statement'
376 if t[2]:
377 t[1].append(t[2])
378 t[0] = t[1]
379
380 def p_statement(t):
381 'statement : content_of_statement end_of_statement'
382 t[0] = t[1]
383
384 # A statement can be a microop or an assembler directive
385 def p_content_of_statement_0(t):
386 '''content_of_statement : microop
387 | directive'''
388 t[0] = t[1]
389
390 # Ignore empty statements
391 def p_content_of_statement_1(t):
392 'content_of_statement : '
393 pass
394
395 # Statements are ended by newlines or a semi colon
396 def p_end_of_statement(t):
397 '''end_of_statement : NEWLINE
398 | SEMI'''
399 pass
400
401 # Different flavors of microop to avoid shift/reduce errors
402 def p_microop_0(t):
403 'microop : labels ID'
404 microop = Microop()
405 microop.labels = t[1]
406 microop.mnemonic = t[2]
407 t[0] = microop
408
409 def p_microop_1(t):
410 'microop : ID'
411 microop = Microop()
412 microop.mnemonic = t[1]
413 t[0] = microop
414
415 def p_microop_2(t):
416 'microop : labels ID PARAMS'
417 microop = Microop()
418 microop.labels = t[1]
419 microop.mnemonic = t[2]
420 microop.params = t[3]
421 t[0] = microop
422
423 def p_microop_3(t):
424 'microop : ID PARAMS'
425 microop = Microop()
426 microop.mnemonic = t[1]
427 microop.params = t[2]
428 t[0] = microop
429
430 # Labels in the microcode
431 def p_labels_0(t):
432 'labels : label'
433 t[0] = [t[1]]
434
435 def p_labels_1(t):
436 'labels : labels label'
437 t[1].append(t[2])
438 t[0] = t[1]
439
440 # labels on lines by themselves are attached to the following instruction.
441 def p_labels_2(t):
442 'labels : labels NEWLINE'
443 t[0] = t[1]
444
445 def p_label_0(t):
446 'label : ID COLON'
447 label = Label()
448 label.is_extern = False
449 label.text = t[1]
450 t[0] = label
451
452 def p_label_1(t):
453 'label : EXTERN ID COLON'
454 label = Label()
455 label.is_extern = True
456 label.text = t[2]
457 t[0] = label
458
459 # Directives for the macroop
460 def p_directive_0(t):
461 'directive : DOT ID'
462 directive = Directive()
463 directive.name = t[2]
464 t[0] = directive
465
466 def p_directive_1(t):
467 'directive : DOT ID PARAMS'
468 directive = Directive()
469 directive.name = t[2]
470 directive.params = t[3]
471 t[0] = directive
472
473 # Parse error handler. Note that the argument here is the offending
474 # *token*, not a grammar symbol (hence the need to use t.value)
475 def p_error(t):
476 if t:
477 error(t.lineno, "syntax error at '%s'" % t.value)
478 else:
479 error(0, "unknown syntax error", True)
480
481 class MicroAssembler(object):
482
483 def __init__(self, macro_type, microops,
484 rom = None, rom_macroop_type = None):
485 self.lexer = lex.lex()
486 self.parser = yacc.yacc()
487 self.parser.macro_type = macro_type
488 self.parser.macroops = {}
489 self.parser.microops = microops
490 self.parser.rom = rom
491 self.parser.rom_macroop_type = rom_macroop_type
492 self.parser.symbols = {}
493 self.symbols = self.parser.symbols
494
495 def assemble(self, asm):
496 self.parser.parse(asm, lexer=self.lexer)
497 macroops = self.parser.macroops
498 self.parser.macroops = {}
499 return macroops