amd/registers: scripts for processing register descriptions in JSON
authorNicolai Hähnle <nicolai.haehnle@amd.com>
Mon, 6 May 2019 08:31:19 +0000 (10:31 +0200)
committerMarek Olšák <marek.olsak@amd.com>
Tue, 4 Jun 2019 00:05:20 +0000 (20:05 -0400)
We will derive both the debugging tables and (the majority of) the
register headers from descriptions in JSON, instead of deriving the
debugging tables from an awkward parsing of the register headers.

Some of the scripts are useful for maintaining the register database
itself. The scripts are designed to output reasonably readable JSON
by default.

src/amd/registers/canonicalize.py [new file with mode: 0644]
src/amd/registers/makeregheader.py [new file with mode: 0644]
src/amd/registers/mergedbs.py [new file with mode: 0644]
src/amd/registers/parseheader.py [new file with mode: 0644]
src/amd/registers/regdb.py [new file with mode: 0644]

diff --git a/src/amd/registers/canonicalize.py b/src/amd/registers/canonicalize.py
new file mode 100644 (file)
index 0000000..958f438
--- /dev/null
@@ -0,0 +1,92 @@
+#
+# Copyright 2019 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# on the rights to use, copy, modify, merge, publish, distribute, sub
+# license, and/or sell copies of the Software, and to permit persons to whom
+# the Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+# USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+"""
+Helper script that was used during the generation of the JSON data.
+
+  usage: python3 canonicalize.py FILE
+
+Reads the register database from FILE, performs canonicalization
+(de-duplication of enums and register types, implicitly sorting JSON by name)
+and attempts to deduce missing register types.
+
+Notes about deduced register types as well as the output JSON are printed on
+stdout.
+"""
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+from collections import defaultdict
+import json
+import re
+import sys
+
+from regdb import RegisterDatabase, deduplicate_enums, deduplicate_register_types
+
+RE_number = re.compile('[0-9]+')
+
+def deduce_missing_register_types(regdb):
+    """
+    This is a heuristic for filling in missing register types based on
+    sequentially named registers.
+    """
+    buckets = defaultdict(list)
+    for regmap in regdb.register_mappings():
+        buckets[RE_number.sub('0', regmap.name)].append(regmap)
+
+    for bucket in buckets.values():
+        if len(bucket) <= 1:
+            continue
+
+        regtypenames = set(
+            regmap.type_ref for regmap in bucket if hasattr(regmap, 'type_ref')
+        )
+        if len(regtypenames) == 1:
+            regtypename = regtypenames.pop()
+            for regmap in bucket:
+                if not hasattr(regmap, 'type_ref'):
+                    print('Deducing {0} -> {1}'.format(regmap.name, regtypename), file=sys.stderr)
+                regmap.type_ref = regtypename
+
+
+def main():
+    regdb_filename = sys.argv[1]
+    with open(regdb_filename, 'r') as filp:
+        regdb = RegisterDatabase.from_json(json.load(filp))
+
+    if len(sys.argv) > 2:
+        for regmap in regdb.register_mappings():
+            assert not hasattr(regmap, 'chips')
+            regmap.chips = [sys.argv[2]]
+
+    deduplicate_enums(regdb)
+    deduplicate_register_types(regdb)
+    deduce_missing_register_types(regdb)
+    regdb.garbage_collect()
+
+    print(regdb.encode_json_pretty())
+
+
+if __name__ == '__main__':
+    main()
+
+# kate: space-indent on; indent-width 4; replace-tabs on;
diff --git a/src/amd/registers/makeregheader.py b/src/amd/registers/makeregheader.py
new file mode 100644 (file)
index 0000000..006fee3
--- /dev/null
@@ -0,0 +1,384 @@
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+COPYRIGHT = '''
+/*
+ * Copyright 2015-2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+'''
+"""
+Create the (combined) register header from register JSON. Use --help for usage.
+"""
+
+import argparse
+from collections import defaultdict
+import itertools
+import json
+import re
+import sys
+
+from regdb import Object, RegisterDatabase, deduplicate_enums, deduplicate_register_types
+
+
+######### BEGIN HARDCODED CONFIGURATION
+
+# Chips are sorted chronologically
+CHIPS = [
+    Object(name='si', disambiguation='GFX6'),
+    Object(name='cik', disambiguation='GFX6'),
+    Object(name='vi', disambiguation='GFX6'),
+    Object(name='fiji', disambiguation='GFX6'),
+    Object(name='stoney', disambiguation='GFX6'),
+    Object(name='gfx9', disambiguation='GFX9'),
+]
+
+######### END HARDCODED CONFIGURATION
+
+def get_chip_index(chip):
+    """
+    Given a chip name, return its index in the global CHIPS list.
+    """
+    return next(idx for idx, obj in enumerate(CHIPS) if obj.name == chip)
+
+def get_disambiguation_suffix(chips):
+    """
+    Disambiguation suffix to be used for an enum entry or field name that
+    is supported in the given set of chips.
+    """
+    oldest_chip_index = min([get_chip_index(chip) for chip in chips])
+    return CHIPS[oldest_chip_index].disambiguation
+
+def get_chips_comment(chips, parent=None):
+    """
+    Generate a user-friendly comment describing the given set of chips.
+
+    The return value may be None, if such a comment is deemed unnecessary.
+
+    parent is an optional set of chips supporting a parent structure, e.g.
+    where chips may be the set of chips supporting a specific enum value,
+    parent would be the set of chips supporting the field containing the enum,
+    the idea being that no comment is necessary if all chips that support the
+    parent also support the child.
+    """
+    chipflags = [chip.name in chips for chip in CHIPS]
+    if all(chipflags):
+        return None
+
+    if parent is not None:
+        parentflags = [chip.name in parent for chip in CHIPS]
+        if all(childflag or not parentflag for childflag, parentflag in zip(chipflags, parentflags)):
+            return None
+
+    prefix = 0
+    for idx, chip, flag in zip(itertools.count(), CHIPS, chipflags):
+        if not flag:
+            break
+        prefix = idx + 1
+
+    suffix = len(CHIPS)
+    for idx, chip, flag in zip(itertools.count(), reversed(CHIPS), reversed(chipflags)):
+        if not flag:
+            break
+        suffix = len(CHIPS) - idx - 1
+
+    comment = []
+    if prefix > 0:
+        comment.append('<= {0}'.format(CHIPS[prefix - 1].name))
+    for chip, flag in zip(CHIPS[prefix:suffix], chipflags[prefix:suffix]):
+        if flag:
+            comment.append(chip.name)
+    if suffix < len(CHIPS):
+        comment.append('>= {0}'.format(CHIPS[suffix].name))
+
+    return ', '.join(comment)
+
+
+class HeaderWriter(object):
+    def __init__(self, regdb, guard=None):
+        self.guard = guard
+
+        # The following contain: Object(address, chips, name, regmap/field/enumentry)
+        self.register_lines = []
+        self.field_lines = []
+        self.value_lines = []
+
+        regtype_emit = defaultdict(set)
+        enum_emit = defaultdict(set)
+
+        for regmap in regdb.register_mappings():
+            type_ref = getattr(regmap, 'type_ref', None)
+            self.register_lines.append(Object(
+                address=regmap.map.at,
+                chips=set(regmap.chips),
+                name=regmap.name,
+                regmap=regmap,
+                type_refs=set([type_ref]) if type_ref else set(),
+            ))
+
+            basename = re.sub(r'[0-9]+', '', regmap.name)
+            key = '{type_ref}::{basename}'.format(**locals())
+            if type_ref is not None and regtype_emit[key].isdisjoint(regmap.chips):
+                regtype_emit[key].update(regmap.chips)
+
+                regtype = regdb.register_type(type_ref)
+                for field in regtype.fields:
+                    if field.name == 'RESERVED':
+                        continue
+
+                    enum_ref = getattr(field, 'enum_ref', None)
+                    self.field_lines.append(Object(
+                        address=regmap.map.at,
+                        chips=set(regmap.chips),
+                        name=field.name,
+                        field=field,
+                        bits=field.bits[:],
+                        type_refs=set([type_ref]) if type_ref else set(),
+                        enum_refs=set([enum_ref]) if enum_ref else set(),
+                    ))
+
+                    key = '{type_ref}::{basename}::{enum_ref}'.format(**locals())
+                    if enum_ref is not None and enum_emit[key].isdisjoint(regmap.chips):
+                        enum_emit[key].update(regmap.chips)
+
+                        enum = regdb.enum(enum_ref)
+                        for entry in enum.entries:
+                            self.value_lines.append(Object(
+                                address=regmap.map.at,
+                                chips=set(regmap.chips),
+                                name=entry.name,
+                                enumentry=entry,
+                                enum_refs=set([enum_ref]) if enum_ref else set(),
+                            ))
+
+        # Merge register lines
+        lines = self.register_lines
+        lines.sort(key=lambda line: (line.address, line.name))
+
+        self.register_lines = []
+        for line in lines:
+            prev = self.register_lines[-1] if self.register_lines else None
+            if prev and prev.address == line.address and prev.name == line.name:
+                prev.chips.update(line.chips)
+                prev.type_refs.update(line.type_refs)
+                continue
+            self.register_lines.append(line)
+
+        # Merge field lines
+        lines = self.field_lines
+        lines.sort(key=lambda line: (line.address, line.name))
+
+        self.field_lines = []
+        for line in lines:
+            merged = False
+            for prev in reversed(self.field_lines):
+                if prev.address != line.address or prev.name != line.name:
+                    break
+
+                # Can merge fields if they have the same starting bit and the
+                # range of the field as intended by the current line does not
+                # conflict with any of the regtypes covered by prev.
+                if prev.bits[0] != line.bits[0]:
+                    continue
+
+                if prev.bits[1] < line.bits[1]:
+                    # Current line's field extends beyond the range of prev.
+                    # Need to check for conflicts
+                    conflict = False
+                    for type_ref in prev.type_refs:
+                        for field in regdb.register_type(type_ref).fields:
+                            # The only possible conflict is for a prev field
+                            # that starts at a higher bit.
+                            if (field.bits[0] > line.bits[0] and
+                                field.bits[0] <= line.bits[1]):
+                                conflict = True
+                                break
+                        if conflict:
+                            break
+                    if conflict:
+                        continue
+
+                prev.bits[1] = max(prev.bits[1], line.bits[1])
+                prev.chips.update(line.chips)
+                prev.type_refs.update(line.type_refs)
+                prev.enum_refs.update(line.enum_refs)
+                merged = True
+                break
+            if not merged:
+                self.field_lines.append(line)
+
+        # Merge value lines
+        lines = self.value_lines
+        lines.sort(key=lambda line: (line.address, line.name))
+
+        self.value_lines = []
+        for line in lines:
+            for prev in reversed(self.value_lines):
+                if prev.address == line.address and prev.name == line.name and\
+                   prev.enumentry.value == line.enumentry.value:
+                    prev.chips.update(line.chips)
+                    prev.enum_refs.update(line.enum_refs)
+                    break
+            else:
+                self.value_lines.append(line)
+
+        # Disambiguate field and value lines
+        for idx, line in enumerate(self.field_lines):
+            prev = self.field_lines[idx - 1] if idx > 0 else None
+            next = self.field_lines[idx + 1] if idx + 1 < len(self.field_lines) else None
+            if (prev and prev.address == line.address and prev.field.name == line.field.name) or\
+               (next and next.address == line.address and next.field.name == line.field.name):
+                line.name += '_' + get_disambiguation_suffix(line.chips)
+
+        for idx, line in enumerate(self.value_lines):
+            prev = self.value_lines[idx - 1] if idx > 0 else None
+            next = self.value_lines[idx + 1] if idx + 1 < len(self.value_lines) else None
+            if (prev and prev.address == line.address and prev.enumentry.name == line.enumentry.name) or\
+               (next and next.address == line.address and next.enumentry.name == line.enumentry.name):
+                line.name += '_' + get_disambiguation_suffix(line.chips)
+
+    def print(self, filp, sort='address'):
+        """
+        Print out the entire register header.
+        """
+        if sort == 'address':
+            self.register_lines.sort(key=lambda line: (line.address, line.name))
+        else:
+            assert sort == 'name'
+            self.register_lines.sort(key=lambda line: (line.name, line.address))
+
+        # Collect and sort field lines by address
+        field_lines_by_address = defaultdict(list)
+        for line in self.field_lines:
+            field_lines_by_address[line.address].append(line)
+        for field_lines in field_lines_by_address.values():
+            if sort == 'address':
+                field_lines.sort(key=lambda line: (line.bits[0], line.name))
+            else:
+                field_lines.sort(key=lambda line: (line.name, line.bits[0]))
+
+        # Collect and sort value lines by address
+        value_lines_by_address = defaultdict(list)
+        for line in self.value_lines:
+            value_lines_by_address[line.address].append(line)
+        for value_lines in value_lines_by_address.values():
+            if sort == 'address':
+                value_lines.sort(key=lambda line: (line.enumentry.value, line.name))
+            else:
+                value_lines.sort(key=lambda line: (line.name, line.enumentry.value))
+
+        print('/* Automatically generated by amd/registers/makeregheader.py */\n', file=filp)
+        print(file=filp)
+        print(COPYRIGHT.strip(), file=filp)
+        print(file=filp)
+
+        if self.guard:
+            print('#ifndef {self.guard}'.format(**locals()), file=filp)
+            print('#define {self.guard}\n'.format(**locals()), file=filp)
+
+        for register_line in self.register_lines:
+            comment = get_chips_comment(register_line.chips)
+
+            address = '{0:X}'.format(register_line.address)
+            address = address.rjust(3 if register_line.regmap.map.to == 'pkt3' else 6, '0')
+
+            define_name = 'R_{address}_{register_line.name}'.format(**locals()).ljust(63)
+            comment = ' /* {0} */'.format(comment) if comment else ''
+            print('#define {define_name} 0x{address}{comment}'.format(**locals()), file=filp)
+
+            field_lines = field_lines_by_address[register_line.address]
+            field_idx = 0
+            while field_idx < len(field_lines):
+                field_line = field_lines[field_idx]
+
+                if field_line.type_refs.isdisjoint(register_line.type_refs):
+                    field_idx += 1
+                    continue
+                del field_lines[field_idx]
+
+                comment = get_chips_comment(field_line.chips, register_line.chips)
+
+                mask = (1 << (field_line.bits[1] - field_line.bits[0] + 1)) - 1
+                define_name = '_{address}_{field_line.name}(x)'.format(**locals()).ljust(58)
+                comment = ' /* {0} */'.format(comment) if comment else ''
+                print(
+                    '#define   S{define_name} (((unsigned)(x) & 0x{mask:X}) << {field_line.bits[0]}){comment}'
+                    .format(**locals()), file=filp)
+                print('#define   G{define_name} (((x) >> {field_line.bits[0]}) & 0x{mask:X})'
+                         .format(**locals()), file=filp)
+
+                complement = ((1 << 32) - 1) ^ (mask << field_line.bits[0])
+                define_name = '_{address}_{field_line.name}'.format(**locals()).ljust(58)
+                print('#define   C{define_name} 0x{complement:08X}'
+                         .format(**locals()), file=filp)
+
+                value_lines = value_lines_by_address[register_line.address]
+                value_idx = 0
+                while value_idx < len(value_lines):
+                    value_line = value_lines[value_idx]
+
+                    if value_line.enum_refs.isdisjoint(field_line.enum_refs):
+                        value_idx += 1
+                        continue
+                    del value_lines[value_idx]
+
+                    comment = get_chips_comment(value_line.chips, field_line.chips)
+
+                    define_name = 'V_{address}_{value_line.name}'.format(**locals()).ljust(55)
+                    comment = ' /* {0} */'.format(comment) if comment else ''
+                    print('#define     {define_name} {value_line.enumentry.value}{comment}'
+                          .format(**locals()), file=filp)
+
+        if self.guard:
+            print('\n#endif // {self.guard}'.format(**locals()), file=filp)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--chip', dest='chips', type=str, nargs='*',
+                        help='Chip for which to generate the header (all chips if unspecified)')
+    parser.add_argument('--sort', choices=['name', 'address'], default='address',
+                        help='Sort key for registers, fields, and enum values')
+    parser.add_argument('--guard', type=str, help='Name of the #include guard')
+    parser.add_argument('files', metavar='FILE', type=str, nargs='+',
+                        help='Register database file')
+    args = parser.parse_args()
+
+    regdb = None
+    for filename in args.files:
+        with open(filename, 'r') as filp:
+            db = RegisterDatabase.from_json(json.load(filp))
+            if regdb is None:
+                regdb = db
+            else:
+                regdb.update(db)
+
+    deduplicate_enums(regdb)
+    deduplicate_register_types(regdb)
+
+    w = HeaderWriter(regdb, guard=args.guard)
+    w.print(sys.stdout, sort=args.sort)
+
+
+if __name__ == '__main__':
+    main()
+
+# kate: space-indent on; indent-width 4; replace-tabs on;
diff --git a/src/amd/registers/mergedbs.py b/src/amd/registers/mergedbs.py
new file mode 100644 (file)
index 0000000..2a469e7
--- /dev/null
@@ -0,0 +1,55 @@
+#
+# Copyright 2017-2019 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# on the rights to use, copy, modify, merge, publish, distribute, sub
+# license, and/or sell copies of the Software, and to permit persons to whom
+# the Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+# USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+"""
+Helper script to merge register database JSON files.
+
+  usage: python3 mergedbs.py [FILES...]
+
+Will merge the given JSON files and output the result on stdout.
+"""
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+from collections import defaultdict
+import json
+import re
+import sys
+
+from regdb import RegisterDatabase, deduplicate_enums, deduplicate_register_types
+
+def main():
+    regdb = RegisterDatabase()
+    for filename in sys.argv[1:]:
+        with open(filename, 'r') as filp:
+            regdb.update(RegisterDatabase.from_json(json.load(filp)))
+
+    deduplicate_enums(regdb)
+    deduplicate_register_types(regdb)
+
+    print(regdb.encode_json_pretty())
+
+
+if __name__ == '__main__':
+    main()
+
+# kate: space-indent on; indent-width 4; replace-tabs on;
diff --git a/src/amd/registers/parseheader.py b/src/amd/registers/parseheader.py
new file mode 100644 (file)
index 0000000..24ec6a9
--- /dev/null
@@ -0,0 +1,199 @@
+#
+# Copyright 2017-2019 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# on the rights to use, copy, modify, merge, publish, distribute, sub
+# license, and/or sell copies of the Software, and to permit persons to whom
+# the Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+# USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+"""
+Helper script that parses a register header and produces a register database
+as output. Use as:
+
+  python3 parseheader.py ADDRESS_SPACE < header.h
+
+This script is included for reference -- we should be able to remove this in
+the future.
+"""
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import json
+import math
+import re
+import sys
+
+from regdb import Object, RegisterDatabase, deduplicate_enums, deduplicate_register_types
+
+
+RE_comment = re.compile(r'(/\*(.*)\*/)$|(//(.*))$')
+RE_prefix = re.compile(r'([RSV])_([0-9a-fA-F]+)_')
+RE_set_value = re.compile(r'\(\(\(unsigned\)\(x\) & ([0-9a-fA-Fx]+)\) << ([0-9]+)\)')
+RE_set_value_no_shift = re.compile(r'\((\(unsigned\))?\(x\) & ([0-9a-fA-Fx]+)\)')
+
+class HeaderParser(object):
+    def __init__(self, address_space):
+        self.regdb = RegisterDatabase()
+        self.chips = ['si', 'cik', 'vi', 'fiji', 'stoney', 'gfx9']
+        self.address_space = address_space
+
+    def __fini_field(self):
+        if self.__field is None:
+            return
+
+        if self.__enumentries:
+            self.__field.enum_ref = self.__regmap.name + '__' + self.__field.name
+            self.regdb.add_enum(self.__field.enum_ref, Object(
+                entries=self.__enumentries
+            ))
+        self.__fields.append(self.__field)
+
+        self.__enumentries = None
+        self.__field = None
+
+    def __fini_register(self):
+        if self.__regmap is None:
+            return
+
+        if self.__fields:
+            self.regdb.add_register_type(self.__regmap.name, Object(
+                fields=self.__fields
+            ))
+            self.__regmap.type_ref = self.__regmap.name
+        self.regdb.add_register_mapping(self.__regmap)
+
+        self.__regmap = None
+        self.__fields = None
+
+    def parse_header(self, filp):
+        regdb = RegisterDatabase()
+        chips = ['si', 'cik', 'vi', 'fiji', 'stoney', 'gfx9']
+
+        self.__regmap = None
+        self.__fields = None
+        self.__field = None
+        self.__enumentries = None
+
+        for line in filp:
+            if not line.startswith('#define '):
+                continue
+
+            line = line[8:].strip()
+
+            comment = None
+            m = RE_comment.search(line)
+            if m is not None:
+                comment = m.group(2) or m.group(4)
+                comment = comment.strip()
+                line = line[:m.span()[0]].strip()
+
+            split = line.split(None, 1)
+            name = split[0]
+
+            m = RE_prefix.match(name)
+            if m is None:
+                continue
+
+            prefix = m.group(1)
+            prefix_address = int(m.group(2), 16)
+            name = name[m.span()[1]:]
+
+            if prefix == 'V':
+                value = int(split[1], 0)
+
+                for entry in self.__enumentries:
+                    if name == entry.name:
+                        sys.exit('Duplicate value define: name = {0}'.format(name))
+
+                entry = Object(name=name, value=value)
+                if comment is not None:
+                    entry.comment = comment
+                self.__enumentries.append(entry)
+                continue
+
+            if prefix == 'S':
+                self.__fini_field()
+
+                if not name.endswith('(x)'):
+                    sys.exit('Missing (x) in S line: {0}'.line)
+                name = name[:-3]
+
+                for field in self.__fields:
+                    if name == field.name:
+                        sys.exit('Duplicate field define: {0}'.format(name))
+
+                m = RE_set_value.match(split[1])
+                if m is not None:
+                    unshifted_mask = int(m.group(1), 0)
+                    shift = int(m.group(2), 0)
+                else:
+                    m = RE_set_value_no_shift.match(split[1])
+                    if m is not None:
+                        unshifted_mask = int(m.group(2), 0)
+                        shift = 0
+                    else:
+                        sys.exit('Bad S_xxx_xxx define: {0}'.format(line))
+
+                num_bits = int(math.log2(unshifted_mask + 1))
+                if unshifted_mask != (1 << num_bits) - 1:
+                    sys.exit('Bad unshifted mask in {0}'.format(line))
+
+                self.__field = Object(
+                    name=name,
+                    bits=[shift, shift + num_bits - 1],
+                )
+                if comment is not None:
+                    self.__field.comment = comment
+                self.__enumentries = []
+
+            if prefix == 'R':
+                self.__fini_field()
+                self.__fini_register()
+
+                if regdb.register_mappings_by_name(name):
+                    sys.exit('Duplicate register define: {0}'.format(name))
+
+                address = int(split[1], 0)
+                if address != prefix_address:
+                    sys.exit('Inconsistent register address: {0}'.format(line))
+
+                self.__regmap = Object(
+                    name=name,
+                    chips=self.chips,
+                    map=Object(to=self.address_space, at=address),
+                )
+                self.__fields = []
+
+        self.__fini_field()
+        self.__fini_register()
+
+def main():
+    map_to = sys.argv[1]
+
+    parser = HeaderParser(map_to)
+    parser.parse_header(sys.stdin)
+
+    deduplicate_enums(parser.regdb)
+    deduplicate_register_types(parser.regdb)
+
+    print(parser.regdb.encode_json_pretty())
+
+
+if __name__ == '__main__':
+    main()
+
+# kate: space-indent on; indent-width 4; replace-tabs on;
diff --git a/src/amd/registers/regdb.py b/src/amd/registers/regdb.py
new file mode 100644 (file)
index 0000000..fc9a0a2
--- /dev/null
@@ -0,0 +1,901 @@
+#
+# Copyright 2017-2019 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# on the rights to use, copy, modify, merge, publish, distribute, sub
+# license, and/or sell copies of the Software, and to permit persons to whom
+# the Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+# USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+"""
+Python package containing common tools for manipulating register JSON.
+"""
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import itertools
+import json
+import re
+import sys
+
+from collections import defaultdict
+from contextlib import contextmanager
+
+class UnionFind(object):
+    """
+    Simplistic implementation of a union-find data structure that also keeps
+    track of the sets that have been unified.
+
+    - add: add an element to the implied global set of elements
+    - union: unify the sets containing the two given elements
+    - find: return the representative element of the set containing the
+            given element
+    - get_set: get the set containing the given element
+    - sets: iterate over all sets (the sets form a partition of the set of all
+            elements that have ever been added)
+    """
+    def __init__(self):
+        self.d = {}
+
+    def add(self, k):
+        if k not in self.d:
+            self.d[k] = set([k])
+
+    def union(self, k1, k2):
+        k1 = self.find(k1)
+        k2 = self.find(k2)
+        if k1 == k2:
+            return
+        if len(k1) < len(k2):
+            k1, k2 = k2, k1
+        self.d[k1].update(self.d[k2])
+        self.d[k2] = (k1,)
+
+    def find(self, k):
+        e = self.d[k]
+        if isinstance(e, set):
+            return k
+        assert isinstance(e, tuple)
+        r = self.find(e[0])
+        self.d[k] = (r,)
+        return r
+
+    def get_set(self, k):
+        k = self.find(k)
+        assert isinstance(self.d[k], set)
+        return self.d[k]
+
+    def sets(self):
+        for v in self.d.values():
+            if isinstance(v, set):
+                yield v
+
+
+class Object(object):
+    """
+    Convenience helper class that essentially acts as a dictionary for convenient
+    conversion from and to JSON while allowing the use of .field notation
+    instead of subscript notation for member access.
+    """
+    def __init__(self, **kwargs):
+        for k, v in kwargs.items():
+            setattr(self, k, v)
+
+    def update(self, **kwargs):
+        for key, value in kwargs.items():
+            setattr(self, key, value)
+        return self
+
+    def __str__(self):
+        return 'Object(' + ', '.join(
+            '{k}={v}'.format(**locals()) for k, v, in self.__dict__.items()
+        ) + ')'
+
+    @staticmethod
+    def from_json(json, keys=None):
+        if isinstance(json, list):
+            return [Object.from_json(v) for v in json]
+        elif isinstance(json, dict):
+            obj = Object()
+            for k, v in json.items():
+                if keys is not None and k in keys:
+                    v = keys[k](v)
+                else:
+                    v = Object.from_json(v)
+                setattr(obj, k, v)
+            return obj
+        else:
+            return json
+
+    @staticmethod
+    def to_json(obj):
+        if isinstance(obj, Object):
+            return dict((k, Object.to_json(v)) for k, v in obj.__dict__.items())
+        elif isinstance(obj, dict):
+            return dict((k, Object.to_json(v)) for k, v in obj.items())
+        elif isinstance(obj, list):
+            return [Object.to_json(v) for v in obj]
+        else:
+            return obj
+
+class MergeError(Exception):
+    def __init__(self, msg):
+        super(MergeError, self).__init__(msg)
+
+class RegisterDatabaseError(Exception):
+    def __init__(self, msg):
+        super(RegisterDatabaseError, self).__init__(msg)
+
+@contextmanager
+def merge_scope(name):
+    """
+    Wrap a merge handling function in a "scope" whose name will be added when
+    propagating MergeErrors.
+    """
+    try:
+        yield
+    except Exception as e:
+        raise MergeError('{name}: {e}'.format(**locals()))
+
+def merge_dicts(dicts, keys=None, values=None):
+    """
+    Generic dictionary merging function.
+
+    dicts -- list of (origin, dictionary) pairs to merge
+    keys -- optional dictionary to provide a merge-strategy per key;
+            the merge strategy is a callable which will receive a list of
+            (origin, value) pairs
+    value -- optional function which provides a merge-strategy for values;
+             the merge strategy is a callable which will receive the name of
+             the key and a list of (origin, value) pairs
+
+    The default strategy is to allow merging keys if all origin dictionaries
+    that contain the key have the same value for it.
+    """
+    ks = set()
+    for _, d in dicts:
+        ks.update(d.keys())
+
+    result = {}
+    for k in ks:
+        vs = [(o, d[k]) for o, d in dicts if k in d]
+        with merge_scope('Key {k}'.format(**locals())):
+            if keys is not None and k in keys:
+                result[k] = keys[k](vs)
+            elif values is not None:
+                result[k] = values(k, vs)
+            else:
+                base_origin, base = vs[0]
+                for other_origin, other in vs[1:]:
+                    if base != other:
+                        raise MergeError('{base} (from {base_origin}) != {other} (from {other_origin})'.format(**locals()))
+                result[k] = base
+    return result
+
+def merge_objects(objects, keys=None):
+    """
+    Like merge_dicts, but applied to instances of Object.
+    """
+    return Object(**merge_dicts([(origin, obj.__dict__) for origin, obj in objects], keys=keys))
+
+class RegisterDatabase(object):
+    """
+    A register database containing:
+
+    - enums: these are lists of named values that can occur in a register field
+    - register types: description of a register type or template as a list of
+                      fields
+    - register mappings: named and typed registers mapped at locations in an
+                         address space
+    """
+    def __init__(self):
+        self.__enums = {}
+        self.__register_types = {}
+        self.__register_mappings = []
+        self.__regmap_by_addr = None
+        self.__chips = None
+
+    def __post_init(self):
+        """
+        Perform some basic canonicalization:
+        - enum entries are sorted by value
+        - register type fields are sorted by starting bit
+        - __register_mappings is sorted by name
+        - the chips field of register mappings is sorted
+
+        Lazily computes the set of all chips mentioned by register mappings.
+        """
+        if self.__regmap_by_addr is not None:
+            return
+
+        for enum in self.__enums.values():
+            enum.entries.sort(key=lambda entry: entry.value)
+
+        for regtype in self.__register_types.values():
+            regtype.fields.sort(key=lambda field: field.bits[0])
+
+        self.__regmap_by_addr = defaultdict(list)
+        self.__chips = set()
+
+        # Merge regiseter mappings using sort order and garbage collect enums
+        # and register types.
+        old_register_mappings = self.__register_mappings
+        old_register_mappings.sort(key=lambda regmap: regmap.name)
+
+        self.__register_mappings = []
+        for regmap in old_register_mappings:
+            addr = (regmap.map.to, regmap.map.at)
+            chips = set(getattr(regmap, 'chips', ['undef']))
+            type_ref = getattr(regmap, 'type_ref', None)
+
+            self.__chips.update(chips)
+
+            merged = False
+            for other in reversed(self.__register_mappings):
+                if other.name != regmap.name:
+                    break
+
+                other_addr = (other.map.to, other.map.at)
+                other_chips = getattr(other, 'chips', ['undef'])
+                other_type_ref = getattr(other, 'type_ref', None)
+
+                if addr == other_addr and\
+                   (type_ref is None or other_type_ref is None or type_ref == other_type_ref):
+                    other.chips = sorted(list(chips.union(other_chips)))
+                    if type_ref is not None:
+                        other.type_ref = type_ref
+                    merged = True
+                    break
+
+            if merged:
+                continue
+
+            addrmappings = self.__regmap_by_addr[addr]
+
+            for other in addrmappings:
+                other_type_ref = getattr(other, 'type_ref', None)
+                other_chips = getattr(other, 'chips', ['undef'])
+                if type_ref is not None and other_type_ref is not None and \
+                   type_ref != other_type_ref and chips.intersection(other_chips):
+                    raise RegisterDatabaseError(
+                        'Registers {0} and {1} overlap and have conflicting types'.format(
+                            other.name, regmap.name))
+
+            addrmappings.append(regmap)
+            self.__register_mappings.append(regmap)
+
+    def garbage_collect(self):
+        """
+        Remove unreferenced enums and register types.
+        """
+        old_enums = self.__enums
+        old_register_types = self.__register_types
+
+        self.__enums = {}
+        self.__register_types = {}
+        for regmap in self.__register_mappings:
+            if hasattr(regmap, 'type_ref') and regmap.type_ref not in self.__register_types:
+                regtype = old_register_types[regmap.type_ref]
+                self.__register_types[regmap.type_ref] = regtype
+                for field in regtype.fields:
+                    if hasattr(field, 'enum_ref') and field.enum_ref not in self.__enums:
+                        self.__enums[field.enum_ref] = old_enums[field.enum_ref]
+
+    def __validate_register_type(self, regtype):
+        for field in regtype.fields:
+            if hasattr(field, 'enum_ref') and field.enum_ref not in self.__enums:
+                raise RegisterDatabaseError(
+                    'Register type field {0} has unknown enum_ref {1}'.format(
+                        field.name, field.enum_ref))
+
+    def __validate_register_mapping(self, regmap):
+        if hasattr(regmap, 'type_ref') and regmap.type_ref not in self.__register_types:
+            raise RegisterDatabaseError(
+                'Register mapping {0} has unknown type_ref {1}'.format(
+                    regmap.name, regmap.type_ref))
+
+    def __validate(self):
+        for regtype in self.__register_types.values():
+            self.__validate_register_type(regtype)
+        for regmap in self.__register_mappings:
+            self.__validate_register_mapping(regmap)
+
+    @staticmethod
+    def enum_key(enum):
+        """
+        Return a key that uniquely describes the signature of the given
+        enum (assuming that it has been canonicalized). Two enums with the
+        same key can be merged.
+        """
+        return ''.join(
+            ':{0}:{1}'.format(entry.name, entry.value)
+            for entry in enum.entries
+        )
+
+    def add_enum(self, name, enum):
+        if name in self.__enums:
+            raise RegisterDatabaseError('Duplicate enum ' + name)
+        self.__enums[name] = enum
+
+    @staticmethod
+    def __merge_enums(enums, union=False):
+        def merge_entries(entries_lists):
+            values = defaultdict(list)
+            for origin, enum in entries_lists:
+                for entry in enum:
+                    values[entry.value].append((origin, entry))
+
+            if not union:
+                if any(len(entries) != len(enums) for entries in values.values()):
+                    raise RegisterDatabaseError(
+                        'Attempting to merge enums with different values')
+
+            return [
+                merge_objects(entries)
+                for entries in values.values()
+            ]
+
+        return merge_objects(
+            enums,
+            keys={
+                'entries': merge_entries,
+            }
+        )
+
+    def merge_enums(self, names, newname, union=False):
+        """
+        Given a list of enum names, merge them all into one with a new name and
+        update all references.
+        """
+        if newname not in names and newname in self.__enums:
+            raise RegisterDatabaseError('Enum {0} already exists'.format(newname))
+
+        newenum = self.__merge_enums(
+            [(name, self.__enums[name]) for name in names],
+            union=union
+        )
+
+        for name in names:
+            del self.__enums[name]
+        self.__enums[newname] = newenum
+
+        for regtype in self.__register_types.values():
+            for field in regtype.fields:
+                if getattr(field, 'enum_ref', None) in names:
+                    field.enum_ref = newname
+
+        self.__regmap_by_addr = None
+
+    def add_register_type(self, name, regtype):
+        if regtype in self.__register_types:
+            raise RegisterDatabaseError('Duplicate register type ' + name)
+        self.__register_types[name] = regtype
+        self.__validate_register_type(regtype)
+
+    def register_type(self, name):
+        self.__post_init()
+        return self.__register_types[name]
+
+    @staticmethod
+    def __merge_register_types(regtypes, union=False, field_keys={}):
+        def merge_fields(fields_lists):
+            fields = defaultdict(list)
+            for origin, fields_list in fields_lists:
+                for field in fields_list:
+                    fields[field.bits[0]].append((origin, field))
+
+            if not union:
+                if any(len(entries) != len(regtypes) for entries in fields.values()):
+                    raise RegisterDatabaseError(
+                        'Attempting to merge register types with different fields')
+
+            return [
+                merge_objects(field, keys=field_keys)
+                for field in fields.values()
+            ]
+
+        with merge_scope('Register types {0}'.format(', '.join(name for name, _ in regtypes))):
+            return merge_objects(
+                regtypes,
+                keys={
+                    'fields': merge_fields,
+                }
+            )
+
+    def merge_register_types(self, names, newname, union=False):
+        """
+        Given a list of register type names, merge them all into one with a
+        new name and update all references.
+        """
+        if newname not in names and newname in self.__register_types:
+            raise RegisterDatabaseError('Register type {0} already exists'.format(newname))
+
+        newregtype = self.__merge_register_types(
+            [(name, self.__register_types[name]) for name in names],
+            union=union
+        )
+
+        for name in names:
+            del self.__register_types[name]
+        self.__register_types[newname] = newregtype
+
+        for regmap in self.__register_mappings:
+            if getattr(regmap, 'type_ref', None) in names:
+                regmap.type_ref = newname
+
+        self.__regmap_by_addr = None
+
+    def add_register_mapping(self, regmap):
+        self.__regmap_by_addr = None
+        self.__register_mappings.append(regmap)
+        self.__validate_register_mapping(regmap)
+
+    def remove_register_mappings(self, regmaps_to_remove):
+        self.__post_init()
+
+        regmaps_to_remove = set(regmaps_to_remove)
+
+        regmaps = self.__register_mappings
+        self.__register_mappings = []
+        for regmap in regmaps:
+            if regmap not in regmaps_to_remove:
+                self.__register_mappings.append(regmap)
+
+        self.__regmap_by_addr = None
+
+    def enum(self, name):
+        """
+        Return the enum of the given name, if any.
+        """
+        self.__post_init()
+        return self.__enums.get(name, None)
+
+    def enums(self):
+        """
+        Yields all (name, enum) pairs.
+        """
+        self.__post_init()
+        for name, enum in self.__enums.items():
+            yield (name, enum)
+
+    def fields(self):
+        """
+        Yields all (register_type, fields) pairs.
+        """
+        self.__post_init()
+        for regtype in self.__register_types.values():
+            for field in regtype.fields:
+                yield (regtype, field)
+
+    def register_types(self):
+        """
+        Yields all (name, register_type) pairs.
+        """
+        self.__post_init()
+        for name, regtype in self.__register_types.items():
+            yield (name, regtype)
+
+    def register_mappings_by_name(self, name):
+        """
+        Return a list of register mappings with the given name.
+        """
+        self.__post_init()
+
+        begin = 0
+        end = len(self.__register_mappings)
+        while begin < end:
+            middle = (begin + end) // 2
+            if self.__register_mappings[middle].name < name:
+                begin = middle + 1
+            elif name < self.__register_mappings[middle].name:
+                end = middle
+            else:
+                break
+
+        if begin >= end:
+            return []
+
+        # We now have begin <= mid < end with begin.name <= name, mid.name == name, name < end.name
+        # Narrow down begin and end
+        hi = middle
+        while begin < hi:
+            mid = (begin + hi) // 2
+            if self.__register_mappings[mid].name < name:
+                begin = mid + 1
+            else:
+                hi = mid
+
+        lo = middle + 1
+        while lo < end:
+            mid = (lo + end) // 2
+            if self.__register_mappings[mid].name == name:
+                lo = mid + 1
+            else:
+                end = mid
+
+        return self.__register_mappings[begin:end]
+
+    def register_mappings(self):
+        """
+        Yields all register mappings.
+        """
+        self.__post_init()
+        for regmap in self.__register_mappings:
+            yield regmap
+
+    def chips(self):
+        """
+        Yields all chips.
+        """
+        self.__post_init()
+        return iter(self.__chips)
+
+    def merge_chips(self, chips, newchip):
+        """
+        Merge register mappings of the given chips into a single chip of the
+        given name. Recursively merges register types and enums when appropriate.
+        """
+        self.__post_init()
+
+        chips = set(chips)
+
+        regtypes_merge = UnionFind()
+        enums_merge = UnionFind()
+
+        # Walk register mappings to find register types that should be merged.
+        for idx, regmap in itertools.islice(enumerate(self.__register_mappings), 1, None):
+            if not hasattr(regmap, 'type_ref'):
+                continue
+            if chips.isdisjoint(regmap.chips):
+                continue
+
+            for other in self.__register_mappings[idx-1::-1]:
+                if regmap.name != other.name:
+                    break
+                if chips.isdisjoint(other.chips):
+                    continue
+                if regmap.map.to != other.map.to or regmap.map.at != other.map.at:
+                    raise RegisterDatabaseError(
+                        'Attempting to merge chips with incompatible addresses of {0}'.format(regmap.name))
+                if not hasattr(regmap, 'type_ref'):
+                    continue
+
+                if regmap.type_ref != other.type_ref:
+                    regtypes_merge.add(regmap.type_ref)
+                    regtypes_merge.add(other.type_ref)
+                    regtypes_merge.union(regmap.type_ref, other.type_ref)
+
+        # Walk over regtype sets that are to be merged and find enums that
+        # should be merged.
+        for type_refs in regtypes_merge.sets():
+            fields_merge = defaultdict(set)
+            for type_ref in type_refs:
+                regtype = self.__register_types[type_ref]
+                for field in regtype.fields:
+                    if hasattr(field, 'enum_ref'):
+                        fields_merge[field.name].add(field.enum_ref)
+
+            for enum_refs in fields_merge.values():
+                if len(enum_refs) > 1:
+                    enum_refs = list(enum_refs)
+                    enums_merge.add(enum_refs[0])
+                    for enum_ref in enum_refs[1:]:
+                        enums_merge.add(enum_ref)
+                        enums_merge.union(enum_ref, enum_refs[0])
+
+        # Merge all mergeable enum sets
+        remap_enum_refs = {}
+        for enum_refs in enums_merge.sets():
+            enum_refs = sorted(enum_refs)
+            newname = enum_refs[0] + '_' + newchip
+            i = 0
+            while newname in self.__enums:
+                newname = enum_refs[0] + '_' + newchip + str(i)
+                i += 1
+
+            for enum_ref in enum_refs:
+                remap_enum_refs[enum_ref] = newname
+
+            # Don't use self.merge_enums, because we don't want to automatically
+            # update _all_ references to the merged enums (some may be from
+            # register types that aren't going to be merged).
+            self.add_enum(newname, self.__merge_enums(
+                [(enum_ref, self.__enums[enum_ref]) for enum_ref in enum_refs],
+                union=True
+            ))
+
+        # Merge all mergeable type refs
+        remap_type_refs = {}
+        for type_refs in regtypes_merge.sets():
+            type_refs = sorted(type_refs)
+            newname = type_refs[0] + '_' + newchip
+            i = 0
+            while newname in self.__enums:
+                newname = type_refs[0] + '_' + newchip + str(i)
+                i += 1
+
+            updated_regtypes = []
+            for type_ref in type_refs:
+                remap_type_refs[type_ref] = newname
+
+                regtype = Object.from_json(Object.to_json(self.__register_types[type_ref]))
+                for field in regtype.fields:
+                    if hasattr(field, 'enum_ref'):
+                        field.enum_ref = remap_enum_refs.get(enum_ref, enum_ref)
+
+                updated_regtypes.append(regtype)
+
+            def merge_enum_refs(enum_refs):
+                enum_refs = set(
+                    remap_enum_refs.get(enum_ref, enum_ref)
+                    for origin, enum_ref in enum_refs
+                )
+                assert len(enum_refs) == 1 # should be ensured by how we determine the enums to be merged
+                return enum_refs.pop()
+
+            self.add_register_type(newname, self.__merge_register_types(
+                [(type_ref, self.__register_types[type_ref]) for type_ref in type_refs],
+                field_keys={
+                    'enum_ref': merge_enum_refs,
+                },
+                union=True
+            ))
+
+        # Merge register mappings
+        register_mappings = self.__register_mappings
+        self.__register_mappings = []
+
+        regmap_accum = None
+        for regmap in register_mappings:
+            if regmap_accum and regmap.name != regmap_accum.name:
+                regmap_accum.chips = [newchip]
+                self.__register_mappings.append(regmap_accum)
+                regmap_accum = None
+
+            joining_chips = chips.intersection(regmap.chips)
+            if not joining_chips:
+                self.__register_mappings.append(regmap)
+                continue
+            remaining_chips = set(regmap.chips).difference(chips)
+
+            type_ref = getattr(regmap, 'type_ref', None)
+            if type_ref is None:
+                regmap.chips = sorted(remaining_chips.union([newchip]))
+                self.__register_mappings.append(regmap)
+                continue
+
+            type_ref = remap_type_refs.get(type_ref, type_ref)
+            if remaining_chips:
+                regmap.chips = sorted(remaining_chips)
+                self.__register_mappings.append(regmap)
+                if not regmap_accum:
+                    regmap = Object.from_json(Object.to_json(regmap))
+                    if type_ref is not None:
+                        regmap.type_ref = type_ref
+
+            if not regmap_accum:
+                regmap_accum = regmap
+            else:
+                if not hasattr(regmap_accum.type_ref, 'type_ref'):
+                    if type_ref is not None:
+                        regmap_accum.type_ref = type_ref
+                else:
+                    assert type_ref is None or type_ref == regmap_accum.type_ref
+        if regmap_accum:
+            self.__register_mappings.append(regmap_accum)
+
+    def update(self, other):
+        """
+        Add the contents of the other database to self.
+
+        Doesn't de-duplicate entries.
+        """
+        self.__post_init()
+        other.__post_init()
+
+        enum_remap = {}
+        regtype_remap = {}
+
+        for regmap in other.__register_mappings:
+            regmap = Object.from_json(Object.to_json(regmap))
+
+            type_ref = getattr(regmap, 'type_ref', None)
+            if type_ref is not None and type_ref not in regtype_remap:
+                regtype = Object.from_json(Object.to_json(other.__register_types[type_ref]))
+
+                chips = getattr(regmap, 'chips', [])
+                suffix = '_' + chips[0] if chips else ''
+
+                for field in regtype.fields:
+                    enum_ref = getattr(field, 'enum_ref', None)
+                    if enum_ref is not None and enum_ref not in enum_remap:
+                        enum = Object.from_json(Object.to_json(other.__enums[enum_ref]))
+
+                        remapped = enum_ref + suffix if enum_ref in self.__enums else enum_ref
+                        i = 0
+                        while remapped in self.__enums:
+                            remapped = enum_ref + suffix + str(i)
+                            i += 1
+                        self.add_enum(remapped, enum)
+                        enum_remap[enum_ref] = remapped
+
+                    if enum_ref is not None:
+                        field.enum_ref = enum_remap[enum_ref]
+
+                remapped = type_ref + suffix if type_ref in self.__register_types else type_ref
+                i = 0
+                while remapped in self.__register_types:
+                    remapped = type_ref + suffix + str(i)
+                    i += 1
+                self.add_register_type(remapped, regtype)
+                regtype_remap[type_ref] = remapped
+
+            if type_ref is not None:
+                regmap.type_ref = regtype_remap[type_ref]
+
+            self.add_register_mapping(regmap)
+
+    def to_json(self):
+        self.__post_init()
+        return {
+            'enums': Object.to_json(self.__enums),
+            'register_types': Object.to_json(self.__register_types),
+            'register_mappings': Object.to_json(self.__register_mappings),
+        }
+
+    def encode_json_pretty(self):
+        """
+        Use a custom JSON encoder which pretty prints, but keeps inner structures compact
+        """
+        # Since the JSON module isn't very extensible, this ends up being
+        # really hacky.
+        obj = self.to_json()
+
+        replacements = []
+        def placeholder(s):
+            placeholder = "JSON-{key}-NOSJ".format(key=len(replacements))
+            replacements.append(json.dumps(s, sort_keys=True))
+            return placeholder
+
+        # Pre-create non-indented encodings for inner objects
+        for enum in obj['enums'].values():
+            enum['entries'] = [
+                placeholder(entry)
+                for entry in enum['entries']
+            ]
+
+        for regtype in obj['register_types'].values():
+            regtype['fields'] = [
+                placeholder(field)
+                for field in regtype['fields']
+            ]
+
+        for regmap in obj['register_mappings']:
+            regmap['map'] = placeholder(regmap['map'])
+            if 'chips' in regmap:
+                regmap['chips'] = placeholder(regmap['chips'])
+
+        # Now create the 'outer' encoding with indentation and search-and-replace
+        # placeholders
+        result = json.dumps(obj, indent=1, sort_keys=True)
+
+        result = re.sub(
+            '"JSON-([0-9]+)-NOSJ"',
+            lambda m: replacements[int(m.group(1))],
+            result
+        )
+
+        return result
+
+    @staticmethod
+    def from_json(json):
+        db = RegisterDatabase()
+
+        db.__enums = dict((k, Object.from_json(v)) for k, v in json['enums'].items())
+        if 'register_types' in json:
+            db.__register_types = dict(
+                (k, Object.from_json(v))
+                for k, v in json['register_types'].items()
+            )
+        if 'register_mappings' in json:
+            db.__register_mappings = Object.from_json(json['register_mappings'])
+
+        # Old format
+        if 'registers' in json:
+            for reg in json['registers']:
+                type_ref = None
+                if 'fields' in reg and reg['fields']:
+                    type_ref = reg['names'][0]
+                    db.add_register_type(type_ref, Object(
+                        fields=Object.from_json(reg['fields'])
+                    ))
+
+                for name in reg['names']:
+                    regmap = Object(
+                        name=name,
+                        map=Object.from_json(reg['map'])
+                    )
+                    if type_ref is not None:
+                        regmap.type_ref = type_ref
+                    db.add_register_mapping(regmap)
+
+        db.__post_init()
+        return db
+
+def deduplicate_enums(regdb):
+    """
+    Find enums that have the exact same entries and merge them.
+    """
+    buckets = defaultdict(list)
+    for name, enum in regdb.enums():
+        buckets[RegisterDatabase.enum_key(enum)].append(name)
+
+    for bucket in buckets.values():
+        if len(bucket) > 1:
+            regdb.merge_enums(bucket, bucket[0])
+
+def deduplicate_register_types(regdb):
+    """
+    Find register types with the exact same fields (identified by name and
+    bit range) and merge them.
+
+    However, register types *aren't* merged if they have different enums for
+    the same field (as an exception, if one of them has an enum and the other
+    one doesn't, we assume that one is simply missing a bit of information and
+    merge the register types).
+    """
+    buckets = defaultdict(list)
+    for name, regtype in regdb.register_types():
+        key = ''.join(
+            ':{0}:{1}:{2}:'.format(
+                field.name, field.bits[0], field.bits[1],
+            )
+            for field in regtype.fields
+        )
+        buckets[key].append((name, regtype.fields))
+
+    for bucket in buckets.values():
+        # Register types in the same bucket have the same fields in the same
+        # places, but they may have different enum_refs. Allow merging when
+        # one has an enum_ref and another doesn't, but don't merge if they
+        # have enum_refs that differ.
+        bucket_enum_refs = [
+            [getattr(field, 'enum_ref', None) for field in fields]
+            for name, fields in bucket
+        ]
+        while bucket:
+            regtypes = [bucket[0][0]]
+            enum_refs = bucket_enum_refs[0]
+            del bucket[0]
+            del bucket_enum_refs[0]
+
+            idx = 0
+            while idx < len(bucket):
+                if all([
+                    not lhs or not rhs or lhs == rhs
+                    for lhs, rhs in zip(enum_refs, bucket_enum_refs[idx])
+                ]):
+                    regtypes.append(bucket[idx][0])
+                    enum_refs = [lhs or rhs for lhs, rhs in zip(enum_refs, bucket_enum_refs[idx])]
+                    del bucket[idx]
+                    del bucket_enum_refs[idx]
+                else:
+                    idx += 1
+
+            if len(regtypes) > 1:
+                regdb.merge_register_types(regtypes, regtypes[0])
+
+# kate: space-indent on; indent-width 4; replace-tabs on;