From 6960213a16bbad7b96fdbb3ff10ddd4d9cde3e04 Mon Sep 17 00:00:00 2001
From: Andres Noetzli <andres.noetzli@gmail.com>
Date: Thu, 10 Mar 2022 17:54:33 -0800
Subject: [PATCH] Refactor kinds parser (#8287)

This commit does a minor refactoring of the kinds parser. It moves the
Python name formatting out of parsekinds.py, adds parsekinds.py as a
dependency for the Java kinds generator, and reformats the scripts using
YAPF.

This is the first step towards generalizing the scripts for other enums.
---
 src/api/java/CMakeLists.txt   |  1 +
 src/api/java/genkinds.py.in   | 14 +++---
 src/api/parsekinds.py         | 71 +++++------------------------
 src/api/python/genkinds.py.in | 84 ++++++++++++++++++++++++++++-------
 4 files changed, 85 insertions(+), 85 deletions(-)

diff --git a/src/api/java/CMakeLists.txt b/src/api/java/CMakeLists.txt
index 2c4151be7..95c1ae4a4 100644
--- a/src/api/java/CMakeLists.txt
+++ b/src/api/java/CMakeLists.txt
@@ -35,6 +35,7 @@ add_custom_command(
     --kinds-file-prefix "${CMAKE_CURRENT_BINARY_DIR}/io/github/cvc5/api/Kind"
   DEPENDS
     "${CMAKE_CURRENT_BINARY_DIR}/genkinds.py"
+    "${PROJECT_SOURCE_DIR}/src/api/parsekinds.py"
     "${PROJECT_SOURCE_DIR}/src/api/cpp/cvc5_kind.h"
 )
 
diff --git a/src/api/java/genkinds.py.in b/src/api/java/genkinds.py.in
index 18ac5ec3c..41f02d221 100644
--- a/src/api/java/genkinds.py.in
+++ b/src/api/java/genkinds.py.in
@@ -11,7 +11,6 @@
 # directory for licensing information.
 # #############################################################################
 ##
-
 """
 This script reads cvc5/src/api/cpp/cvc5_kind.h and generates
 cvc5/Kind.java file which declares all cvc5 kinds.
@@ -23,7 +22,6 @@ import sys
 import re
 import textwrap
 
-
 # get access to cvc5/src/api/parsekinds.py
 sys.path.insert(0, os.path.abspath('${CMAKE_SOURCE_DIR}/src/api'))
 
@@ -118,10 +116,10 @@ def gen_java(parser: KindsParser, filename):
     f = open(filename, "w")
     code = KINDS_JAVA_TOP
     enum_value = -2  # initial enum value
-    for kind, name in parser.kinds.items():
-        comment = parser.get_comment(kind)
+    for kind, comment in parser.kinds_doc.items():
         comment = format_comment(comment)
-        code += "{comment}\n  {name}({enum_value}),\n".format(comment=comment, name=kind, enum_value=enum_value)
+        code += "{comment}\n  {name}({enum_value}),\n".format(
+            comment=comment, name=kind, enum_value=enum_value)
         enum_value = enum_value + 1
     code += KINDS_JAVA_BOTTOM
     f.write(code)
@@ -131,9 +129,11 @@ def gen_java(parser: KindsParser, filename):
 if __name__ == "__main__":
     parser = argparse.ArgumentParser('Read a kinds header file and generate a '
                                      'corresponding java file')
-    parser.add_argument('--kinds-header', metavar='<KINDS_HEADER>',
+    parser.add_argument('--kinds-header',
+                        metavar='<KINDS_HEADER>',
                         help='The header file to read kinds from')
-    parser.add_argument('--kinds-file-prefix', metavar='<KIND_FILE_PREFIX>',
+    parser.add_argument('--kinds-file-prefix',
+                        metavar='<KIND_FILE_PREFIX>',
                         help='The prefix for the generated .java file',
                         default=DEFAULT_PREFIX)
 
diff --git a/src/api/parsekinds.py b/src/api/parsekinds.py
index 84d154f36..6eee76693 100644
--- a/src/api/parsekinds.py
+++ b/src/api/parsekinds.py
@@ -11,7 +11,6 @@
 # directory for licensing information.
 # #############################################################################
 ##
-
 """
 This script implements KindsParser which
 parses the header file cvc5/src/api/cpp/cvc5_kind.h
@@ -44,15 +43,6 @@ BLOCK_COMMENT_END = '*/'
 MACRO_BLOCK_BEGIN = '#if 0'
 MACRO_BLOCK_END = '#endif'
 
-# Format Kind Names
-# special cases for format_name
-_IS = '_IS'
-# replacements after some preprocessing
-replacements = {
-    'Bitvector': 'BV',
-    'Floatingpoint': 'FP'
-}
-
 
 class KindsParser:
     tokenmap = {
@@ -61,8 +51,6 @@ class KindsParser:
     }
 
     def __init__(self):
-        # dictionary from C++ Kind name to shortened name
-        self.kinds = OrderedDict()
         # dictionary from shortened name to documentation comment
         self.kinds_doc = OrderedDict()
         # the end token for the current type of block
@@ -77,46 +65,9 @@ class KindsParser:
 
     def get_comment(self, kind_name):
         '''
-        Look up a documentation comment for a Kind by name
-        Accepts both full C++ name and shortened name
-        '''
-        try:
-            return self.kinds_doc[kind_name]
-        except KeyError:
-            return self.kinds_doc[self.kinds[kind_name]]
-
-    def format_name(self, name):
+        Look up a documentation comment for a Kind by C++ name
         '''
-        In the Python API, each Kind name is reformatted for easier use
-
-        The naming scheme is:
-           1. capitalize the first letter of each word (delimited by underscores)
-           2. make the rest of the letters lowercase
-           3. replace Floatingpoint with FP
-           4. replace Bitvector with BV
-
-        There is one exception:
-           FLOATINGPOINT_IS_NAN  --> FPIsNan
-
-        For every "_IS" in the name, there's an underscore added before step 1,
-           so that the word after "Is" is capitalized
-
-        Examples:
-           BITVECTOR_ADD       -->  BVAdd
-           APPLY_SELECTOR      -->  ApplySelector
-           FLOATINGPOINT_IS_NAN -->  FPIsNan
-           SET_MINUS            -->  Setminus
-
-        See the generated .pxi file for an explicit mapping
-        '''
-        name = name.replace(_IS, _IS + US)
-        words = [w.capitalize() for w in name.lower().split(US)]
-        name = "".join(words)
-
-        for og, new in replacements.items():
-            name = name.replace(og, new)
-
-        return name
+        return self.kinds_doc[kind_name]
 
     def format_comment(self, comment):
         '''
@@ -126,10 +77,10 @@ class KindsParser:
             "Expecting to start with /* but got \"{}\"".format(comment[:2])
         assert comment[-2:] == '*/', \
             "Expecting to end with */ but got \"{}\"".format(comment[-2:])
-        comment = comment[2:-2].strip('*\n')   # /** ... */ -> ...
-        comment = textwrap.dedent(comment)     # remove indentation
-        comment = comment.replace('\n*', '\n') # remove leading "*""
-        comment = textwrap.dedent(comment)     # remove indentation
+        comment = comment[2:-2].strip('*\n')  # /** ... */ -> ...
+        comment = textwrap.dedent(comment)  # remove indentation
+        comment = comment.replace('\n*', '\n')  # remove leading "*""
+        comment = textwrap.dedent(comment)  # remove indentation
         comment = comment.replace('\\rst', '').replace('\\endrst', '')
         comment = comment.strip()  # remove leading and trailing spaces
         return comment
@@ -187,16 +138,14 @@ class KindsParser:
             elif self.in_kinds:
                 if line == OCB:
                     continue
+                name = line
                 if EQ in line:
-                    line = line[:line.find(EQ)].strip()
+                    name = line[:line.find(EQ)].strip()
                 elif C in line:
-                    line = line[:line.find(C)].strip()
-                fmt_name = self.format_name(line)
-                self.kinds[line] = fmt_name
+                    name = line[:line.find(C)].strip()
                 fmt_comment = self.format_comment(self.latest_block_comment)
-                self.kinds_doc[fmt_name] = fmt_comment
+                self.kinds_doc[name] = fmt_comment
             elif ENUM_START in line:
                 self.in_kinds = True
                 continue
         f.close()
-
diff --git a/src/api/python/genkinds.py.in b/src/api/python/genkinds.py.in
index 74f0e421e..5af6e79a5 100644
--- a/src/api/python/genkinds.py.in
+++ b/src/api/python/genkinds.py.in
@@ -11,7 +11,6 @@
 # directory for licensing information.
 # #############################################################################
 ##
-
 """
 This script reads cvc5/src/api/cpp/cvc5_kind.h and generates
 .pxd and .pxi files which declare all the cvc5 kinds and
@@ -38,7 +37,7 @@ from parsekinds import *
 DEFAULT_PREFIX = 'kinds'
 
 ################ Comments and Macro Tokens ############
-PYCOMMENT           = '#'
+PYCOMMENT = '#'
 
 KINDS_PXD_TOP = \
 r"""cdef extern from "api/cpp/cvc5_kind.h" namespace "cvc5::api":
@@ -64,47 +63,98 @@ class Kind(Enum):
 KINDS_ATTR_TEMPLATE = r'''    {name}=c_Kind.{kind}, """{doc}"""
 '''
 
+# Format Kind Names
+# special cases for format_name
+_IS = '_IS'
+# replacements after some preprocessing
+replacements = {'Bitvector': 'BV', 'Floatingpoint': 'FP'}
+
+
+def format_name(name):
+    '''
+    In the Python API, each Kind name is reformatted for easier use
+
+    The naming scheme is:
+       1. capitalize the first letter of each word (delimited by underscores)
+       2. make the rest of the letters lowercase
+       3. replace Floatingpoint with FP
+       4. replace Bitvector with BV
+
+    There is one exception:
+       FLOATINGPOINT_IS_NAN  --> FPIsNan
+
+    For every "_IS" in the name, there's an underscore added before step 1,
+       so that the word after "Is" is capitalized
+
+    Examples:
+       BITVECTOR_ADD       -->  BVAdd
+       APPLY_SELECTOR      -->  ApplySelector
+       FLOATINGPOINT_IS_NAN -->  FPIsNan
+       SET_MINUS            -->  Setminus
+
+    See the generated .pxi file for an explicit mapping
+    '''
+    name = name.replace(_IS, _IS + US)
+    words = [w.capitalize() for w in name.lower().split(US)]
+    name = "".join(words)
+
+    for og, new in replacements.items():
+        name = name.replace(og, new)
+
+    return name
+
+
 def gen_pxd(parser: KindsParser, filename):
     with open(filename, "w") as f:
         # include the format_name docstring in the generated file
         # could be helpful for users to see the formatting rules
-        for line in parser.format_name.__doc__.split(NL):
+        for line in format_name.__doc__.split(NL):
             f.write(PYCOMMENT)
             if not line.isspace():
                 f.write(line)
             f.write(NL)
         f.write(KINDS_PXD_TOP)
-        for kind in parser.kinds:
-            f.write('       {},\n'.format(kind))
+        for name, _ in parser.kinds_doc.items():
+            f.write('       {},\n'.format(name))
+
 
-def gen_pxi(parser : KindsParser, filename):
+def gen_pxi(parser: KindsParser, filename):
     with open(filename, "w") as f:
         f.write(KINDS_PXI_TOP)
-        for kind, name in parser.kinds.items():
-            doc = parser.kinds_doc.get(name, '')
-            f.write(KINDS_ATTR_TEMPLATE.format(name=name, kind=kind, doc=doc))
+        for name, doc in parser.kinds_doc.items():
+            f.write(
+                KINDS_ATTR_TEMPLATE.format(name=format_name(name),
+                                           kind=name,
+                                           doc=doc))
+
 
 comment_repls = {
     '`Solver::([a-zA-Z]+)\(([^)]*)\)[^`]*`': ':py:meth:`pycvc5.Solver.\\1`',
 }
+
+
 def reformat_comment(comment):
-    for pat,repl in comment_repls.items():
+    for pat, repl in comment_repls.items():
         comment = re.sub(pat, repl, comment)
     return comment
 
+
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser('Read a kinds header file and generate a '
-                         'corresponding pxd file, with simplified kind names.')
-    parser.add_argument('--kinds-header', metavar='<KINDS_HEADER>',
+    parser = argparse.ArgumentParser(
+        'Read a kinds header file and generate a '
+        'corresponding pxd file, with simplified kind names.')
+    parser.add_argument('--kinds-header',
+                        metavar='<KINDS_HEADER>',
                         help='The header file to read kinds from')
-    parser.add_argument('--kinds-file-prefix', metavar='<KIND_FILE_PREFIX>',
+    parser.add_argument('--kinds-file-prefix',
+                        metavar='<KIND_FILE_PREFIX>',
                         help='The prefix for the .pxd and .pxi files to write '
                         'the Cython declarations to.',
                         default=DEFAULT_PREFIX)
 
-    args               = parser.parse_args()
-    kinds_header       = args.kinds_header
-    kinds_file_prefix  = args.kinds_file_prefix
+    args = parser.parse_args()
+    kinds_header = args.kinds_header
+    kinds_file_prefix = args.kinds_file_prefix
 
     kp = KindsParser()
     kp.parse(kinds_header)
-- 
2.30.2