misc: string.join has been removed in python3

[gem5.git] / src / arch / micro_asm.py
diff --git a/src/arch/micro_asm.py b/src/arch/micro_asm.py

index 3d9e83648fbbc53224e64a633a186cdb25d1d011..536f70994a9fc7775257c3ccf843cb1cb40f595e 100644 (file)
--- a/src/arch/micro_asm.py
+++ b/src/arch/micro_asm.py
@@ -23,21 +23,16 @@
  # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#
-# Authors: Gabe Black
+
+from __future__ import print_function
  
  import os
  import sys
  import re
-import string
  import traceback
  # get type names
  from types import *
  
-# Prepend the directory where the PLY lex & yacc modules are found
-# to the search path.
-sys.path[0:0] = [os.environ['M5_PLY']]
-
  from ply import lex
  from ply import yacc
  
@@ -55,7 +50,7 @@ class Micro_Container(object):
          self.micro_classes = {}
          self.labels = {}
  
-    def add_microop(self, microop):
+    def add_microop(self, mnemonic, microop):
          self.microops.append(microop)
  
      def __str__(self):
@@ -64,9 +59,17 @@ class Micro_Container(object):
              string += "  %s\n" % microop
          return string
  
-class Macroop(Micro_Container):
+class Combinational_Macroop(Micro_Container):
      pass
  
+class Rom_Macroop(object):
+    def __init__(self, name, target):
+        self.name = name
+        self.target = target
+
+    def __str__(self):
+        return "%s: %s\n" % (self.name, self.target)
+
  class Rom(Micro_Container):
      def __init__(self, name):
          super(Rom, self).__init__(name)
@@ -91,6 +94,7 @@ class Statement(object):
      def __init__(self):
          self.is_microop = False
          self.is_directive = False
+        self.params = ""
  
  class Microop(Statement):
      def __init__(self):
@@ -98,7 +102,6 @@ class Microop(Statement):
          self.mnemonic = ""
          self.labels = []
          self.is_microop = True
-        self.params = ""
  
  class Directive(Statement):
      def __init__(self):
@@ -113,33 +116,43 @@ class Directive(Statement):
  ##########################################################################
  
  def print_error(message):
-    print
-    print "*** %s" % message
-    print
+    print()
+    print("*** %s" % message)
+    print()
  
  def handle_statement(parser, container, statement):
      if statement.is_microop:
+        if statement.mnemonic not in parser.microops.keys():
+            raise Exception, "Unrecognized mnemonic: %s" % statement.mnemonic
+        parser.symbols["__microopClassFromInsideTheAssembler"] = \
+            parser.microops[statement.mnemonic]
          try:
-            microop = eval('parser.microops[statement.mnemonic](%s)' %
-                    statement.params)
+            microop = eval('__microopClassFromInsideTheAssembler(%s)' %
+                    statement.params, {}, parser.symbols)
          except:
-            print_error("Error creating microop object.")
+            print_error("Error creating microop object with mnemonic %s." % \
+                    statement.mnemonic)
              raise
          try:
              for label in statement.labels:
-                container.labels[label.name] = microop
-                if label.extern:
-                    container.externs[label.name] = microop
-            container.add_microop(microop)
+                container.labels[label.text] = microop
+                if label.is_extern:
+                    container.externs[label.text] = microop
+            container.add_microop(statement.mnemonic, microop)
          except:
              print_error("Error adding microop.")
              raise
      elif statement.is_directive:
+        if statement.name not in container.directives.keys():
+            raise Exception, "Unrecognized directive: %s" % statement.name
+        parser.symbols["__directiveFunctionFromInsideTheAssembler"] = \
+            container.directives[statement.name]
          try:
-            eval('container.%s()' % statement.name)
+            eval('__directiveFunctionFromInsideTheAssembler(%s)' %
+                    statement.params, {}, parser.symbols)
          except:
              print_error("Error executing directive.")
-            print container.directives
+            print(container.directives)
              raise
      else:
          raise Exception, "Didn't recognize the type of statement", statement
@@ -174,7 +187,6 @@ tokens = reserved + (
  
          'LPAREN', 'RPAREN',
          'LBRACE', 'RBRACE',
-        #'COMMA',
          'COLON', 'SEMI', 'DOT',
          'NEWLINE'
          )
@@ -190,28 +202,55 @@ reserved_map = { }
  for r in reserved:
      reserved_map[r.lower()] = r
  
+# Ignore comments
+def t_ANY_COMMENT(t):
+    r'\#[^\n]*(?=\n)'
+
+def t_ANY_MULTILINECOMMENT(t):
+    r'/\*([^/]|((?<!\*)/))*\*/'
+
+# A colon marks the end of a label. It should follow an ID which will
+# put the lexer in the "params" state. Seeing the colon will put it back
+# in the "asm" state since it knows it saw a label and not a mnemonic.
  def t_params_COLON(t):
      r':'
      t.lexer.begin('asm')
      return t
  
+# Parameters are a string of text which don't contain an unescaped statement
+# statement terminator, ie a newline or semi colon.
+def t_params_PARAMS(t):
+    r'([^\n;\\]|(\\[\n;\\]))+'
+    t.lineno += t.value.count('\n')
+    unescapeParamsRE = re.compile(r'(\\[\n;\\])')
+    def unescapeParams(mo):
+        val = mo.group(0)
+        return val[1]
+    t.value = unescapeParamsRE.sub(unescapeParams, t.value)
+    t.lexer.begin('asm')
+    return t
+
+# An "ID" in the micro assembler is either a label, directive, or mnemonic
+# If it's either a directive or a mnemonic, it will be optionally followed by
+# parameters. If it's a label, the following colon will make the lexer stop
+# looking for parameters.
  def t_asm_ID(t):
      r'[A-Za-z_]\w*'
      t.type = reserved_map.get(t.value, 'ID')
-    t.lexer.begin('params')
+    # If the ID is really "extern", we shouldn't start looking for parameters
+    # yet. The real ID, the label itself, is coming up.
+    if t.type != 'EXTERN':
+        t.lexer.begin('params')
      return t
  
+# If there is a label and you're -not- in the assembler (which would be caught
+# above), don't start looking for parameters.
  def t_ANY_ID(t):
      r'[A-Za-z_]\w*'
      t.type = reserved_map.get(t.value, 'ID')
      return t
  
-def t_params_PARAMS(t):
-    r'([^\n;]|((?<=\\)[\n;]))+'
-    t.lineno += t.value.count('\n')
-    t.lexer.begin('asm')
-    return t
-
+# Braces enter and exit micro assembly
  def t_INITIAL_LBRACE(t):
      r'\{'
      t.lexer.begin('asm')
@@ -222,15 +261,20 @@ def t_asm_RBRACE(t):
      t.lexer.begin('INITIAL')
      return t
  
+# At the top level, keep track of newlines only for line counting.
  def t_INITIAL_NEWLINE(t):
      r'\n+'
      t.lineno += t.value.count('\n')
  
+# In the micro assembler, do line counting but also return a token. The
+# token is needed by the parser to detect the end of a statement.
  def t_asm_NEWLINE(t):
      r'\n+'
      t.lineno += t.value.count('\n')
      return t
  
+# A newline or semi colon when looking for params signals that the statement
+# is over and the lexer should go back to looking for regular assembly.
  def t_params_NEWLINE(t):
      r'\n+'
      t.lineno += t.value.count('\n')
@@ -245,7 +289,6 @@ def t_params_SEMI(t):
  # Basic regular expressions to pick out simple tokens
  t_ANY_LPAREN = r'\('
  t_ANY_RPAREN = r'\)'
-#t_COMMA  = r','
  t_ANY_SEMI   = r';'
  t_ANY_DOT    = r'\.'
  
@@ -279,29 +322,28 @@ def p_rom_or_macros_1(t):
      'rom_or_macros : rom_or_macros rom_or_macro'
  
  def p_rom_or_macro_0(t):
-    '''rom_or_macro : rom_block'''
-
-def p_rom_or_macro_1(t):
-    '''rom_or_macro : macroop_def'''
-
-# A block of statements
-def p_block(t):
-    'block : LBRACE statements RBRACE'
-    block = Block()
-    block.statements = t[2]
-    t[0] = block
+    '''rom_or_macro : rom_block
+                    | macroop_def'''
  
  # Defines a section of microcode that should go in the current ROM
  def p_rom_block(t):
      'rom_block : DEF ROM block SEMI'
+    if not t.parser.rom:
+        print_error("Rom block found, but no Rom object specified.")
+        raise TypeError, "Rom block found, but no Rom object was specified."
      for statement in t[3].statements:
          handle_statement(t.parser, t.parser.rom, statement)
      t[0] = t.parser.rom
  
  # Defines a macroop that jumps to an external label in the ROM
  def p_macroop_def_0(t):
-    'macroop_def : DEF MACROOP LPAREN ID RPAREN SEMI'
-    t[0] = t[4]
+    'macroop_def : DEF MACROOP ID LPAREN ID RPAREN SEMI'
+    if not t.parser.rom_macroop_type:
+        print_error("ROM based macroop found, but no ROM macroop class was specified.")
+        raise TypeError, "ROM based macroop found, but no ROM macroop class was specified."
+    macroop = t.parser.rom_macroop_type(t[3], t[5])
+    t.parser.macroops[t[3]] = macroop
+
  
  # Defines a macroop that is combinationally generated
  def p_macroop_def_1(t):
@@ -313,7 +355,14 @@ def p_macroop_def_1(t):
          raise
      for statement in t[4].statements:
          handle_statement(t.parser, curop, statement)
-    t.parser.macroops.append(curop)
+    t.parser.macroops[t[3]] = curop
+
+# A block of statements
+def p_block(t):
+    'block : LBRACE statements RBRACE'
+    block = Block()
+    block.statements = t[2]
+    t[0] = block
  
  def p_statements_0(t):
      'statements : statement'
@@ -338,6 +387,7 @@ def p_content_of_statement_0(t):
                              | directive'''
      t[0] = t[1]
  
+# Ignore empty statements
  def p_content_of_statement_1(t):
      'content_of_statement : '
      pass
@@ -348,6 +398,7 @@ def p_end_of_statement(t):
                          | SEMI'''
      pass
  
+# Different flavors of microop to avoid shift/reduce errors
  def p_microop_0(t):
      'microop : labels ID'
      microop = Microop()
@@ -376,6 +427,7 @@ def p_microop_3(t):
      microop.params = t[2]
      t[0] = microop
  
+# Labels in the microcode
  def p_labels_0(t):
      'labels : label'
      t[0] = [t[1]]
@@ -385,6 +437,11 @@ def p_labels_1(t):
      t[1].append(t[2])
      t[0] = t[1]
  
+# labels on lines by themselves are attached to the following instruction.
+def p_labels_2(t):
+    'labels : labels NEWLINE'
+    t[0] = t[1]
+
  def p_label_0(t):
      'label : ID COLON'
      label = Label()
@@ -399,12 +456,20 @@ def p_label_1(t):
      label.text = t[2]
      t[0] = label
  
-def p_directive(t):
+# Directives for the macroop
+def p_directive_0(t):
      'directive : DOT ID'
      directive = Directive()
      directive.name = t[2]
      t[0] = directive
  
+def p_directive_1(t):
+    'directive : DOT ID PARAMS'
+    directive = Directive()
+    directive.name = t[2]
+    directive.params = t[3]
+    t[0] = directive
+
  # Parse error handler.  Note that the argument here is the offending
  # *token*, not a grammar symbol (hence the need to use t.value)
  def p_error(t):
@@ -415,19 +480,20 @@ def p_error(t):
  
  class MicroAssembler(object):
  
-    def __init__(self, macro_type, microops, rom):
+    def __init__(self, macro_type, microops,
+            rom = None, rom_macroop_type = None):
          self.lexer = lex.lex()
          self.parser = yacc.yacc()
          self.parser.macro_type = macro_type
-        self.parser.macroops = []
+        self.parser.macroops = {}
          self.parser.microops = microops
          self.parser.rom = rom
+        self.parser.rom_macroop_type = rom_macroop_type
+        self.parser.symbols = {}
+        self.symbols = self.parser.symbols
  
      def assemble(self, asm):
          self.parser.parse(asm, lexer=self.lexer)
-        for macroop in self.parser.macroops:
-            print macroop
-        print self.parser.rom
          macroops = self.parser.macroops
-        self.parser.macroops = []
+        self.parser.macroops = {}
          return macroops