X86: Make the microassembler accept lines which are just labels.
[gem5.git] / src / arch / micro_asm.py
1 # Copyright (c) 2003-2005 The Regents of The University of Michigan
2 # All rights reserved.
3 #
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions are
6 # met: redistributions of source code must retain the above copyright
7 # notice, this list of conditions and the following disclaimer;
8 # redistributions in binary form must reproduce the above copyright
9 # notice, this list of conditions and the following disclaimer in the
10 # documentation and/or other materials provided with the distribution;
11 # neither the name of the copyright holders nor the names of its
12 # contributors may be used to endorse or promote products derived from
13 # this software without specific prior written permission.
14 #
15 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 #
27 # Authors: Gabe Black
28
29 import os
30 import sys
31 import re
32 import string
33 import traceback
34 # get type names
35 from types import *
36
37 # Prepend the directory where the PLY lex & yacc modules are found
38 # to the search path.
39 sys.path[0:0] = [os.environ['M5_PLY']]
40
41 from ply import lex
42 from ply import yacc
43
44 ##########################################################################
45 #
46 # Base classes for use outside of the assembler
47 #
48 ##########################################################################
49
50 class Micro_Container(object):
51 def __init__(self, name):
52 self.microops = []
53 self.name = name
54 self.directives = {}
55 self.micro_classes = {}
56 self.labels = {}
57
58 def add_microop(self, microop):
59 self.microops.append(microop)
60
61 def __str__(self):
62 string = "%s:\n" % self.name
63 for microop in self.microops:
64 string += " %s\n" % microop
65 return string
66
67 class Combinational_Macroop(Micro_Container):
68 pass
69
70 class Rom_Macroop(object):
71 def __init__(self, name, target):
72 self.name = name
73 self.target = target
74
75 def __str__(self):
76 return "%s: %s\n" % (self.name, self.target)
77
78 class Rom(Micro_Container):
79 def __init__(self, name):
80 super(Rom, self).__init__(name)
81 self.externs = {}
82
83 ##########################################################################
84 #
85 # Support classes
86 #
87 ##########################################################################
88
89 class Label(object):
90 def __init__(self):
91 self.extern = False
92 self.name = ""
93
94 class Block(object):
95 def __init__(self):
96 self.statements = []
97
98 class Statement(object):
99 def __init__(self):
100 self.is_microop = False
101 self.is_directive = False
102 self.params = ""
103
104 class Microop(Statement):
105 def __init__(self):
106 super(Microop, self).__init__()
107 self.mnemonic = ""
108 self.labels = []
109 self.is_microop = True
110
111 class Directive(Statement):
112 def __init__(self):
113 super(Directive, self).__init__()
114 self.name = ""
115 self.is_directive = True
116
117 ##########################################################################
118 #
119 # Functions that handle common tasks
120 #
121 ##########################################################################
122
123 def print_error(message):
124 print
125 print "*** %s" % message
126 print
127
128 def handle_statement(parser, container, statement):
129 if statement.is_microop:
130 if statement.mnemonic not in parser.microops.keys():
131 raise Exception, "Unrecognized mnemonic: %s" % statement.mnemonic
132 parser.symbols["__microopClassFromInsideTheAssembler"] = \
133 parser.microops[statement.mnemonic]
134 try:
135 microop = eval('__microopClassFromInsideTheAssembler(%s)' %
136 statement.params, {}, parser.symbols)
137 except:
138 print_error("Error creating microop object with mnemonic %s." % \
139 statement.mnemonic)
140 raise
141 try:
142 for label in statement.labels:
143 container.labels[label.text] = microop
144 if label.extern:
145 container.externs[label.text] = microop
146 container.add_microop(microop)
147 except:
148 print_error("Error adding microop.")
149 raise
150 elif statement.is_directive:
151 if statement.name not in container.directives.keys():
152 raise Exception, "Unrecognized directive: %s" % statement.name
153 parser.symbols["__directiveFunctionFromInsideTheAssembler"] = \
154 container.directives[statement.name]
155 try:
156 eval('__directiveFunctionFromInsideTheAssembler(%s)' %
157 statement.params, {}, parser.symbols)
158 except:
159 print_error("Error executing directive.")
160 print container.directives
161 raise
162 else:
163 raise Exception, "Didn't recognize the type of statement", statement
164
165 ##########################################################################
166 #
167 # Lexer specification
168 #
169 ##########################################################################
170
171 # Error handler. Just call exit. Output formatted to work under
172 # Emacs compile-mode. Optional 'print_traceback' arg, if set to True,
173 # prints a Python stack backtrace too (can be handy when trying to
174 # debug the parser itself).
175 def error(lineno, string, print_traceback = False):
176 # Print a Python stack backtrace if requested.
177 if (print_traceback):
178 traceback.print_exc()
179 if lineno != 0:
180 line_str = "%d:" % lineno
181 else:
182 line_str = ""
183 sys.exit("%s %s" % (line_str, string))
184
185 reserved = ('DEF', 'MACROOP', 'ROM', 'EXTERN')
186
187 tokens = reserved + (
188 # identifier
189 'ID',
190 # arguments for microops and directives
191 'PARAMS',
192
193 'LPAREN', 'RPAREN',
194 'LBRACE', 'RBRACE',
195 'COLON', 'SEMI', 'DOT',
196 'NEWLINE'
197 )
198
199 # New lines are ignored at the top level, but they end statements in the
200 # assembler
201 states = (
202 ('asm', 'exclusive'),
203 ('params', 'exclusive'),
204 )
205
206 reserved_map = { }
207 for r in reserved:
208 reserved_map[r.lower()] = r
209
210 # Ignore comments
211 def t_ANY_COMMENT(t):
212 r'\#[^\n]*(?=\n)'
213
214 def t_ANY_MULTILINECOMMENT(t):
215 r'/\*([^/]|((?<!\*)/))*\*/'
216
217 # A colon marks the end of a label. It should follow an ID which will
218 # put the lexer in the "params" state. Seeing the colon will put it back
219 # in the "asm" state since it knows it saw a label and not a mnemonic.
220 def t_params_COLON(t):
221 r':'
222 t.lexer.begin('asm')
223 return t
224
225 # Parameters are a string of text which don't contain an unescaped statement
226 # statement terminator, ie a newline or semi colon.
227 def t_params_PARAMS(t):
228 r'([^\n;\\]|(\\[\n;\\]))+'
229 t.lineno += t.value.count('\n')
230 unescapeParamsRE = re.compile(r'(\\[\n;\\])')
231 def unescapeParams(mo):
232 val = mo.group(0)
233 return val[1]
234 t.value = unescapeParamsRE.sub(unescapeParams, t.value)
235 t.lexer.begin('asm')
236 return t
237
238 # An "ID" in the micro assembler is either a label, directive, or mnemonic
239 # If it's either a directive or a mnemonic, it will be optionally followed by
240 # parameters. If it's a label, the following colon will make the lexer stop
241 # looking for parameters.
242 def t_asm_ID(t):
243 r'[A-Za-z_]\w*'
244 t.type = reserved_map.get(t.value, 'ID')
245 t.lexer.begin('params')
246 return t
247
248 # If there is a label and you're -not- in the assembler (which would be caught
249 # above), don't start looking for parameters.
250 def t_ANY_ID(t):
251 r'[A-Za-z_]\w*'
252 t.type = reserved_map.get(t.value, 'ID')
253 return t
254
255 # Braces enter and exit micro assembly
256 def t_INITIAL_LBRACE(t):
257 r'\{'
258 t.lexer.begin('asm')
259 return t
260
261 def t_asm_RBRACE(t):
262 r'\}'
263 t.lexer.begin('INITIAL')
264 return t
265
266 # At the top level, keep track of newlines only for line counting.
267 def t_INITIAL_NEWLINE(t):
268 r'\n+'
269 t.lineno += t.value.count('\n')
270
271 # In the micro assembler, do line counting but also return a token. The
272 # token is needed by the parser to detect the end of a statement.
273 def t_asm_NEWLINE(t):
274 r'\n+'
275 t.lineno += t.value.count('\n')
276 return t
277
278 # A newline or semi colon when looking for params signals that the statement
279 # is over and the lexer should go back to looking for regular assembly.
280 def t_params_NEWLINE(t):
281 r'\n+'
282 t.lineno += t.value.count('\n')
283 t.lexer.begin('asm')
284 return t
285
286 def t_params_SEMI(t):
287 r';'
288 t.lexer.begin('asm')
289 return t
290
291 # Basic regular expressions to pick out simple tokens
292 t_ANY_LPAREN = r'\('
293 t_ANY_RPAREN = r'\)'
294 t_ANY_SEMI = r';'
295 t_ANY_DOT = r'\.'
296
297 t_ANY_ignore = ' \t\x0c'
298
299 def t_ANY_error(t):
300 error(t.lineno, "illegal character '%s'" % t.value[0])
301 t.skip(1)
302
303 ##########################################################################
304 #
305 # Parser specification
306 #
307 ##########################################################################
308
309 # Start symbol for a file which may have more than one macroop or rom
310 # specification.
311 def p_file(t):
312 'file : opt_rom_or_macros'
313
314 def p_opt_rom_or_macros_0(t):
315 'opt_rom_or_macros : '
316
317 def p_opt_rom_or_macros_1(t):
318 'opt_rom_or_macros : rom_or_macros'
319
320 def p_rom_or_macros_0(t):
321 'rom_or_macros : rom_or_macro'
322
323 def p_rom_or_macros_1(t):
324 'rom_or_macros : rom_or_macros rom_or_macro'
325
326 def p_rom_or_macro_0(t):
327 '''rom_or_macro : rom_block
328 | macroop_def'''
329
330 # Defines a section of microcode that should go in the current ROM
331 def p_rom_block(t):
332 'rom_block : DEF ROM block SEMI'
333 if not t.parser.rom:
334 print_error("Rom block found, but no Rom object specified.")
335 raise TypeError, "Rom block found, but no Rom object was specified."
336 for statement in t[3].statements:
337 handle_statement(t.parser, t.parser.rom, statement)
338 t[0] = t.parser.rom
339
340 # Defines a macroop that jumps to an external label in the ROM
341 def p_macroop_def_0(t):
342 'macroop_def : DEF MACROOP ID LPAREN ID RPAREN SEMI'
343 if not t.parser.rom_macroop_type:
344 print_error("ROM based macroop found, but no ROM macroop class was specified.")
345 raise TypeError, "ROM based macroop found, but no ROM macroop class was specified."
346 macroop = t.parser.rom_macroop_type(t[3], t[5])
347 t.parser.macroops[t[3]] = macroop
348
349
350 # Defines a macroop that is combinationally generated
351 def p_macroop_def_1(t):
352 'macroop_def : DEF MACROOP ID block SEMI'
353 try:
354 curop = t.parser.macro_type(t[3])
355 except TypeError:
356 print_error("Error creating macroop object.")
357 raise
358 for statement in t[4].statements:
359 handle_statement(t.parser, curop, statement)
360 t.parser.macroops[t[3]] = curop
361
362 # A block of statements
363 def p_block(t):
364 'block : LBRACE statements RBRACE'
365 block = Block()
366 block.statements = t[2]
367 t[0] = block
368
369 def p_statements_0(t):
370 'statements : statement'
371 if t[1]:
372 t[0] = [t[1]]
373 else:
374 t[0] = []
375
376 def p_statements_1(t):
377 'statements : statements statement'
378 if t[2]:
379 t[1].append(t[2])
380 t[0] = t[1]
381
382 def p_statement(t):
383 'statement : content_of_statement end_of_statement'
384 t[0] = t[1]
385
386 # A statement can be a microop or an assembler directive
387 def p_content_of_statement_0(t):
388 '''content_of_statement : microop
389 | directive'''
390 t[0] = t[1]
391
392 # Ignore empty statements
393 def p_content_of_statement_1(t):
394 'content_of_statement : '
395 pass
396
397 # Statements are ended by newlines or a semi colon
398 def p_end_of_statement(t):
399 '''end_of_statement : NEWLINE
400 | SEMI'''
401 pass
402
403 # Different flavors of microop to avoid shift/reduce errors
404 def p_microop_0(t):
405 'microop : labels ID'
406 microop = Microop()
407 microop.labels = t[1]
408 microop.mnemonic = t[2]
409 t[0] = microop
410
411 def p_microop_1(t):
412 'microop : ID'
413 microop = Microop()
414 microop.mnemonic = t[1]
415 t[0] = microop
416
417 def p_microop_2(t):
418 'microop : labels ID PARAMS'
419 microop = Microop()
420 microop.labels = t[1]
421 microop.mnemonic = t[2]
422 microop.params = t[3]
423 t[0] = microop
424
425 def p_microop_3(t):
426 'microop : ID PARAMS'
427 microop = Microop()
428 microop.mnemonic = t[1]
429 microop.params = t[2]
430 t[0] = microop
431
432 # Labels in the microcode
433 def p_labels_0(t):
434 'labels : label'
435 t[0] = [t[1]]
436
437 def p_labels_1(t):
438 'labels : labels label'
439 t[1].append(t[2])
440 t[0] = t[1]
441
442 # labels on lines by themselves are attached to the following instruction.
443 def p_labels_2(t):
444 'labels : labels NEWLINE'
445 t[0] = t[1]
446
447 def p_label_0(t):
448 'label : ID COLON'
449 label = Label()
450 label.is_extern = False
451 label.text = t[1]
452 t[0] = label
453
454 def p_label_1(t):
455 'label : EXTERN ID COLON'
456 label = Label()
457 label.is_extern = True
458 label.text = t[2]
459 t[0] = label
460
461 # Directives for the macroop
462 def p_directive_0(t):
463 'directive : DOT ID'
464 directive = Directive()
465 directive.name = t[2]
466 t[0] = directive
467
468 def p_directive_1(t):
469 'directive : DOT ID PARAMS'
470 directive = Directive()
471 directive.name = t[2]
472 directive.params = t[3]
473 t[0] = directive
474
475 # Parse error handler. Note that the argument here is the offending
476 # *token*, not a grammar symbol (hence the need to use t.value)
477 def p_error(t):
478 if t:
479 error(t.lineno, "syntax error at '%s'" % t.value)
480 else:
481 error(0, "unknown syntax error", True)
482
483 class MicroAssembler(object):
484
485 def __init__(self, macro_type, microops,
486 rom = None, rom_macroop_type = None):
487 self.lexer = lex.lex()
488 self.parser = yacc.yacc()
489 self.parser.macro_type = macro_type
490 self.parser.macroops = {}
491 self.parser.microops = microops
492 self.parser.rom = rom
493 self.parser.rom_macroop_type = rom_macroop_type
494 self.parser.symbols = {}
495 self.symbols = self.parser.symbols
496
497 def assemble(self, asm):
498 self.parser.parse(asm, lexer=self.lexer)
499 macroops = self.parser.macroops
500 self.parser.macroops = {}
501 return macroops