From a44460bf3d0226e95158e19b9e72b6f048d85bdb Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Tue, 29 Sep 2020 21:04:30 -0700 Subject: [PATCH] arch: Pull the (Sub)OperandList classes into their own file. Another step in breaking down the isa parser into more manageable parts. Change-Id: I0c5e70fe481bd17c0069b768129731e99a93ed0d Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/35816 Reviewed-by: Jason Lowe-Power Maintainer: Jason Lowe-Power Tested-by: kokoro --- src/arch/isa_parser/isa_parser.py | 265 +------------------------ src/arch/isa_parser/operand_list.py | 288 ++++++++++++++++++++++++++++ src/arch/isa_parser/util.py | 17 ++ 3 files changed, 306 insertions(+), 264 deletions(-) create mode 100755 src/arch/isa_parser/operand_list.py diff --git a/src/arch/isa_parser/isa_parser.py b/src/arch/isa_parser/isa_parser.py index eddba0c20..999d92f40 100755 --- a/src/arch/isa_parser/isa_parser.py +++ b/src/arch/isa_parser/isa_parser.py @@ -45,6 +45,7 @@ import traceback from types import * from m5.util.grammar import Grammar +from .operand_list import * from .operand_types import * from .util import * @@ -340,270 +341,6 @@ def makeList(arg): else: return [ arg ] -class OperandList(object): - '''Find all the operands in the given code block. Returns an operand - descriptor list (instance of class OperandList).''' - def __init__(self, parser, code): - self.items = [] - self.bases = {} - # delete strings and comments so we don't match on operands inside - for regEx in (stringRE, commentRE): - code = regEx.sub('', code) - # search for operands - next_pos = 0 - while 1: - match = parser.operandsRE().search(code, next_pos) - if not match: - # no more matches: we're done - break - op = match.groups() - # regexp groups are operand full name, base, and extension - (op_full, op_base, op_ext) = op - # If is a elem operand, define or update the corresponding - # vector operand - isElem = False - if op_base in parser.elemToVector: - isElem = True - elem_op = (op_base, op_ext) - op_base = parser.elemToVector[op_base] - op_ext = '' # use the default one - # if the token following the operand is an assignment, this is - # a destination (LHS), else it's a source (RHS) - is_dest = (assignRE.match(code, match.end()) != None) - is_src = not is_dest - - # see if we've already seen this one - op_desc = self.find_base(op_base) - if op_desc: - if op_ext and op_ext != '' and op_desc.ext != op_ext: - error ('Inconsistent extensions for operand %s: %s - %s' \ - % (op_base, op_desc.ext, op_ext)) - op_desc.is_src = op_desc.is_src or is_src - op_desc.is_dest = op_desc.is_dest or is_dest - if isElem: - (elem_base, elem_ext) = elem_op - found = False - for ae in op_desc.active_elems: - (ae_base, ae_ext) = ae - if ae_base == elem_base: - if ae_ext != elem_ext: - error('Inconsistent extensions for elem' - ' operand %s' % elem_base) - else: - found = True - if not found: - op_desc.active_elems.append(elem_op) - else: - # new operand: create new descriptor - op_desc = parser.operandNameMap[op_base](parser, - op_full, op_ext, is_src, is_dest) - # if operand is a vector elem, add the corresponding vector - # operand if not already done - if isElem: - op_desc.elemExt = elem_op[1] - op_desc.active_elems = [elem_op] - self.append(op_desc) - # start next search after end of current match - next_pos = match.end() - self.sort() - # enumerate source & dest register operands... used in building - # constructor later - self.numSrcRegs = 0 - self.numDestRegs = 0 - self.numFPDestRegs = 0 - self.numIntDestRegs = 0 - self.numVecDestRegs = 0 - self.numVecPredDestRegs = 0 - self.numCCDestRegs = 0 - self.numMiscDestRegs = 0 - self.memOperand = None - - # Flags to keep track if one or more operands are to be read/written - # conditionally. - self.predRead = False - self.predWrite = False - - for op_desc in self.items: - if op_desc.isReg(): - if op_desc.is_src: - op_desc.src_reg_idx = self.numSrcRegs - self.numSrcRegs += 1 - if op_desc.is_dest: - op_desc.dest_reg_idx = self.numDestRegs - self.numDestRegs += 1 - if op_desc.isFloatReg(): - self.numFPDestRegs += 1 - elif op_desc.isIntReg(): - self.numIntDestRegs += 1 - elif op_desc.isVecReg(): - self.numVecDestRegs += 1 - elif op_desc.isVecPredReg(): - self.numVecPredDestRegs += 1 - elif op_desc.isCCReg(): - self.numCCDestRegs += 1 - elif op_desc.isControlReg(): - self.numMiscDestRegs += 1 - elif op_desc.isMem(): - if self.memOperand: - error("Code block has more than one memory operand.") - self.memOperand = op_desc - - # Check if this operand has read/write predication. If true, then - # the microop will dynamically index source/dest registers. - self.predRead = self.predRead or op_desc.hasReadPred() - self.predWrite = self.predWrite or op_desc.hasWritePred() - - if parser.maxInstSrcRegs < self.numSrcRegs: - parser.maxInstSrcRegs = self.numSrcRegs - if parser.maxInstDestRegs < self.numDestRegs: - parser.maxInstDestRegs = self.numDestRegs - if parser.maxMiscDestRegs < self.numMiscDestRegs: - parser.maxMiscDestRegs = self.numMiscDestRegs - - # now make a final pass to finalize op_desc fields that may depend - # on the register enumeration - for op_desc in self.items: - op_desc.finalize(self.predRead, self.predWrite) - - def __len__(self): - return len(self.items) - - def __getitem__(self, index): - return self.items[index] - - def append(self, op_desc): - self.items.append(op_desc) - self.bases[op_desc.base_name] = op_desc - - def find_base(self, base_name): - # like self.bases[base_name], but returns None if not found - # (rather than raising exception) - return self.bases.get(base_name) - - # internal helper function for concat[Some]Attr{Strings|Lists} - def __internalConcatAttrs(self, attr_name, filter, result): - for op_desc in self.items: - if filter(op_desc): - result += getattr(op_desc, attr_name) - return result - - # return a single string that is the concatenation of the (string) - # values of the specified attribute for all operands - def concatAttrStrings(self, attr_name): - return self.__internalConcatAttrs(attr_name, lambda x: 1, '') - - # like concatAttrStrings, but only include the values for the operands - # for which the provided filter function returns true - def concatSomeAttrStrings(self, filter, attr_name): - return self.__internalConcatAttrs(attr_name, filter, '') - - # return a single list that is the concatenation of the (list) - # values of the specified attribute for all operands - def concatAttrLists(self, attr_name): - return self.__internalConcatAttrs(attr_name, lambda x: 1, []) - - # like concatAttrLists, but only include the values for the operands - # for which the provided filter function returns true - def concatSomeAttrLists(self, filter, attr_name): - return self.__internalConcatAttrs(attr_name, filter, []) - - def sort(self): - self.items.sort(key=lambda a: a.sort_pri) - -class SubOperandList(OperandList): - '''Find all the operands in the given code block. Returns an operand - descriptor list (instance of class OperandList).''' - def __init__(self, parser, code, requestor_list): - self.items = [] - self.bases = {} - # delete strings and comments so we don't match on operands inside - for regEx in (stringRE, commentRE): - code = regEx.sub('', code) - # search for operands - next_pos = 0 - while 1: - match = parser.operandsRE().search(code, next_pos) - if not match: - # no more matches: we're done - break - op = match.groups() - # regexp groups are operand full name, base, and extension - (op_full, op_base, op_ext) = op - # If is a elem operand, define or update the corresponding - # vector operand - if op_base in parser.elemToVector: - elem_op = op_base - op_base = parser.elemToVector[elem_op] - # find this op in the requestor list - op_desc = requestor_list.find_base(op_base) - if not op_desc: - error('Found operand %s which is not in the requestor list!' - % op_base) - else: - # See if we've already found this operand - op_desc = self.find_base(op_base) - if not op_desc: - # if not, add a reference to it to this sub list - self.append(requestor_list.bases[op_base]) - - # start next search after end of current match - next_pos = match.end() - self.sort() - self.memOperand = None - # Whether the whole PC needs to be read so parts of it can be accessed - self.readPC = False - # Whether the whole PC needs to be written after parts of it were - # changed - self.setPC = False - # Whether this instruction manipulates the whole PC or parts of it. - # Mixing the two is a bad idea and flagged as an error. - self.pcPart = None - - # Flags to keep track if one or more operands are to be read/written - # conditionally. - self.predRead = False - self.predWrite = False - - for op_desc in self.items: - if op_desc.isPCPart(): - self.readPC = True - if op_desc.is_dest: - self.setPC = True - - if op_desc.isPCState(): - if self.pcPart is not None: - if self.pcPart and not op_desc.isPCPart() or \ - not self.pcPart and op_desc.isPCPart(): - error("Mixed whole and partial PC state operands.") - self.pcPart = op_desc.isPCPart() - - if op_desc.isMem(): - if self.memOperand: - error("Code block has more than one memory operand.") - self.memOperand = op_desc - - # Check if this operand has read/write predication. If true, then - # the microop will dynamically index source/dest registers. - self.predRead = self.predRead or op_desc.hasReadPred() - self.predWrite = self.predWrite or op_desc.hasWritePred() - -# Regular expression object to match C++ strings -stringRE = re.compile(r'"([^"\\]|\\.)*"') - -# Regular expression object to match C++ comments -# (used in findOperands()) -commentRE = re.compile(r'(^)?[^\S\n]*/(?:\*(.*?)\*/[^\S\n]*|/[^\n]*)($)?', - re.DOTALL | re.MULTILINE) - -# Regular expression object to match assignment statements (used in -# findOperands()). If the code immediately following the first -# appearance of the operand matches this regex, then the operand -# appears to be on the LHS of an assignment, and is thus a -# destination. basically we're looking for an '=' that's not '=='. -# The heinous tangle before that handles the case where the operand -# has an array subscript. -assignRE = re.compile(r'(\[[^\]]+\])?\s*=(?!=)', re.MULTILINE) - def makeFlagConstructor(flag_list): if len(flag_list) == 0: return '' diff --git a/src/arch/isa_parser/operand_list.py b/src/arch/isa_parser/operand_list.py new file mode 100755 index 000000000..076b77e5a --- /dev/null +++ b/src/arch/isa_parser/operand_list.py @@ -0,0 +1,288 @@ +# Copyright (c) 2014, 2016, 2018-2019 ARM Limited +# All rights reserved +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Copyright (c) 2003-2005 The Regents of The University of Michigan +# Copyright (c) 2013,2015 Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from .util import assignRE, commentRE, stringRE +from .util import error + +class OperandList(object): + '''Find all the operands in the given code block. Returns an operand + descriptor list (instance of class OperandList).''' + def __init__(self, parser, code): + self.items = [] + self.bases = {} + # delete strings and comments so we don't match on operands inside + for regEx in (stringRE, commentRE): + code = regEx.sub('', code) + # search for operands + next_pos = 0 + while 1: + match = parser.operandsRE().search(code, next_pos) + if not match: + # no more matches: we're done + break + op = match.groups() + # regexp groups are operand full name, base, and extension + (op_full, op_base, op_ext) = op + # If is a elem operand, define or update the corresponding + # vector operand + isElem = False + if op_base in parser.elemToVector: + isElem = True + elem_op = (op_base, op_ext) + op_base = parser.elemToVector[op_base] + op_ext = '' # use the default one + # if the token following the operand is an assignment, this is + # a destination (LHS), else it's a source (RHS) + is_dest = (assignRE.match(code, match.end()) != None) + is_src = not is_dest + + # see if we've already seen this one + op_desc = self.find_base(op_base) + if op_desc: + if op_ext and op_ext != '' and op_desc.ext != op_ext: + error ('Inconsistent extensions for operand %s: %s - %s' \ + % (op_base, op_desc.ext, op_ext)) + op_desc.is_src = op_desc.is_src or is_src + op_desc.is_dest = op_desc.is_dest or is_dest + if isElem: + (elem_base, elem_ext) = elem_op + found = False + for ae in op_desc.active_elems: + (ae_base, ae_ext) = ae + if ae_base == elem_base: + if ae_ext != elem_ext: + error('Inconsistent extensions for elem' + ' operand %s' % elem_base) + else: + found = True + if not found: + op_desc.active_elems.append(elem_op) + else: + # new operand: create new descriptor + op_desc = parser.operandNameMap[op_base](parser, + op_full, op_ext, is_src, is_dest) + # if operand is a vector elem, add the corresponding vector + # operand if not already done + if isElem: + op_desc.elemExt = elem_op[1] + op_desc.active_elems = [elem_op] + self.append(op_desc) + # start next search after end of current match + next_pos = match.end() + self.sort() + # enumerate source & dest register operands... used in building + # constructor later + self.numSrcRegs = 0 + self.numDestRegs = 0 + self.numFPDestRegs = 0 + self.numIntDestRegs = 0 + self.numVecDestRegs = 0 + self.numVecPredDestRegs = 0 + self.numCCDestRegs = 0 + self.numMiscDestRegs = 0 + self.memOperand = None + + # Flags to keep track if one or more operands are to be read/written + # conditionally. + self.predRead = False + self.predWrite = False + + for op_desc in self.items: + if op_desc.isReg(): + if op_desc.is_src: + op_desc.src_reg_idx = self.numSrcRegs + self.numSrcRegs += 1 + if op_desc.is_dest: + op_desc.dest_reg_idx = self.numDestRegs + self.numDestRegs += 1 + if op_desc.isFloatReg(): + self.numFPDestRegs += 1 + elif op_desc.isIntReg(): + self.numIntDestRegs += 1 + elif op_desc.isVecReg(): + self.numVecDestRegs += 1 + elif op_desc.isVecPredReg(): + self.numVecPredDestRegs += 1 + elif op_desc.isCCReg(): + self.numCCDestRegs += 1 + elif op_desc.isControlReg(): + self.numMiscDestRegs += 1 + elif op_desc.isMem(): + if self.memOperand: + error("Code block has more than one memory operand.") + self.memOperand = op_desc + + # Check if this operand has read/write predication. If true, then + # the microop will dynamically index source/dest registers. + self.predRead = self.predRead or op_desc.hasReadPred() + self.predWrite = self.predWrite or op_desc.hasWritePred() + + if parser.maxInstSrcRegs < self.numSrcRegs: + parser.maxInstSrcRegs = self.numSrcRegs + if parser.maxInstDestRegs < self.numDestRegs: + parser.maxInstDestRegs = self.numDestRegs + if parser.maxMiscDestRegs < self.numMiscDestRegs: + parser.maxMiscDestRegs = self.numMiscDestRegs + + # now make a final pass to finalize op_desc fields that may depend + # on the register enumeration + for op_desc in self.items: + op_desc.finalize(self.predRead, self.predWrite) + + def __len__(self): + return len(self.items) + + def __getitem__(self, index): + return self.items[index] + + def append(self, op_desc): + self.items.append(op_desc) + self.bases[op_desc.base_name] = op_desc + + def find_base(self, base_name): + # like self.bases[base_name], but returns None if not found + # (rather than raising exception) + return self.bases.get(base_name) + + # internal helper function for concat[Some]Attr{Strings|Lists} + def __internalConcatAttrs(self, attr_name, filter, result): + for op_desc in self.items: + if filter(op_desc): + result += getattr(op_desc, attr_name) + return result + + # return a single string that is the concatenation of the (string) + # values of the specified attribute for all operands + def concatAttrStrings(self, attr_name): + return self.__internalConcatAttrs(attr_name, lambda x: 1, '') + + # like concatAttrStrings, but only include the values for the operands + # for which the provided filter function returns true + def concatSomeAttrStrings(self, filter, attr_name): + return self.__internalConcatAttrs(attr_name, filter, '') + + # return a single list that is the concatenation of the (list) + # values of the specified attribute for all operands + def concatAttrLists(self, attr_name): + return self.__internalConcatAttrs(attr_name, lambda x: 1, []) + + # like concatAttrLists, but only include the values for the operands + # for which the provided filter function returns true + def concatSomeAttrLists(self, filter, attr_name): + return self.__internalConcatAttrs(attr_name, filter, []) + + def sort(self): + self.items.sort(key=lambda a: a.sort_pri) + +class SubOperandList(OperandList): + '''Find all the operands in the given code block. Returns an operand + descriptor list (instance of class OperandList).''' + def __init__(self, parser, code, requestor_list): + self.items = [] + self.bases = {} + # delete strings and comments so we don't match on operands inside + for regEx in (stringRE, commentRE): + code = regEx.sub('', code) + # search for operands + next_pos = 0 + while 1: + match = parser.operandsRE().search(code, next_pos) + if not match: + # no more matches: we're done + break + op = match.groups() + # regexp groups are operand full name, base, and extension + (op_full, op_base, op_ext) = op + # If is a elem operand, define or update the corresponding + # vector operand + if op_base in parser.elemToVector: + elem_op = op_base + op_base = parser.elemToVector[elem_op] + # find this op in the requestor list + op_desc = requestor_list.find_base(op_base) + if not op_desc: + error('Found operand %s which is not in the requestor list!' + % op_base) + else: + # See if we've already found this operand + op_desc = self.find_base(op_base) + if not op_desc: + # if not, add a reference to it to this sub list + self.append(requestor_list.bases[op_base]) + + # start next search after end of current match + next_pos = match.end() + self.sort() + self.memOperand = None + # Whether the whole PC needs to be read so parts of it can be accessed + self.readPC = False + # Whether the whole PC needs to be written after parts of it were + # changed + self.setPC = False + # Whether this instruction manipulates the whole PC or parts of it. + # Mixing the two is a bad idea and flagged as an error. + self.pcPart = None + + # Flags to keep track if one or more operands are to be read/written + # conditionally. + self.predRead = False + self.predWrite = False + + for op_desc in self.items: + if op_desc.isPCPart(): + self.readPC = True + if op_desc.is_dest: + self.setPC = True + + if op_desc.isPCState(): + if self.pcPart is not None: + if self.pcPart and not op_desc.isPCPart() or \ + not self.pcPart and op_desc.isPCPart(): + error("Mixed whole and partial PC state operands.") + self.pcPart = op_desc.isPCPart() + + if op_desc.isMem(): + if self.memOperand: + error("Code block has more than one memory operand.") + self.memOperand = op_desc + + # Check if this operand has read/write predication. If true, then + # the microop will dynamically index source/dest registers. + self.predRead = self.predRead or op_desc.hasReadPred() + self.predWrite = self.predWrite or op_desc.hasWritePred() diff --git a/src/arch/isa_parser/util.py b/src/arch/isa_parser/util.py index 1a1be2601..7a000e8ac 100755 --- a/src/arch/isa_parser/util.py +++ b/src/arch/isa_parser/util.py @@ -50,6 +50,23 @@ import re def indent(s): return re.sub(r'(?m)^(?!#)', ' ', s) +# Regular expression object to match C++ strings +stringRE = re.compile(r'"([^"\\]|\\.)*"') + +# Regular expression object to match C++ comments +# (used in findOperands()) +commentRE = re.compile(r'(^)?[^\S\n]*/(?:\*(.*?)\*/[^\S\n]*|/[^\n]*)($)?', + re.DOTALL | re.MULTILINE) + +# Regular expression object to match assignment statements (used in +# findOperands()). If the code immediately following the first +# appearance of the operand matches this regex, then the operand +# appears to be on the LHS of an assignment, and is thus a +# destination. basically we're looking for an '=' that's not '=='. +# The heinous tangle before that handles the case where the operand +# has an array subscript. +assignRE = re.compile(r'(\[[^\]]+\])?\s*=(?!=)', re.MULTILINE) + # # Munge a somewhat arbitrarily formatted piece of Python code # (e.g. from a format 'let' block) into something whose indentation -- 2.30.2