From: Andrew Reynolds Date: Thu, 2 Sep 2021 22:32:49 +0000 (-0500) Subject: Add LFSC node converter (#6944) X-Git-Tag: cvc5-1.0.0~1282 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=bc008131991c13bb635c9351942e1ef3f7e6f49b;p=cvc5.git Add LFSC node converter (#6944) This utility converts from terms used in the internal calculus to terms that capture how they should be printed in LFSC. --- diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 001a8bbc7..a25d287a9 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -178,6 +178,8 @@ libcvc5_add_sources( proof/lazy_proof_chain.h proof/lazy_tree_proof_generator.cpp proof/lazy_tree_proof_generator.h + proof/lfsc/lfsc_node_converter.cpp + proof/lfsc/lfsc_node_converter.h proof/lfsc/lfsc_util.cpp proof/lfsc/lfsc_util.h proof/method_id.cpp diff --git a/src/expr/nary_term_util.h b/src/expr/nary_term_util.h index aae97b2cf..31157928c 100644 --- a/src/expr/nary_term_util.h +++ b/src/expr/nary_term_util.h @@ -41,7 +41,15 @@ bool hasListVar(TNode n); */ bool getListVarContext(TNode n, std::map& context); -/** get the null terminator */ +/** + * Get the null terminator for kind k and type node tn. + * + * Examples of null terminators: + * false for (OR, bool) + * true for (AND, bool) + * (as seq.empty (Seq Int)) for (STRING_CONCAT, (Seq Int) + * #x0 for (BITVECTOR_OR, (_ BitVec 4)) + */ Node getNullTerminator(Kind k, TypeNode tn); /** diff --git a/src/proof/lfsc/lfsc_node_converter.cpp b/src/proof/lfsc/lfsc_node_converter.cpp new file mode 100644 index 000000000..12326b212 --- /dev/null +++ b/src/proof/lfsc/lfsc_node_converter.cpp @@ -0,0 +1,1019 @@ +/****************************************************************************** + * Top contributors (to current version): + * Andrew Reynolds + * + * This file is part of the cvc5 project. + * + * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS + * in the top-level source directory and their institutional affiliations. + * All rights reserved. See the file COPYING in the top-level source + * directory for licensing information. + * **************************************************************************** + * + * Implementation of LFSC node conversion + */ + +#include "proof/lfsc/lfsc_node_converter.h" + +#include + +#include "expr/array_store_all.h" +#include "expr/dtype.h" +#include "expr/dtype_cons.h" +#include "expr/nary_term_util.h" +#include "expr/node_manager_attributes.h" +#include "expr/sequence.h" +#include "expr/skolem_manager.h" +#include "printer/smt2/smt2_printer.h" +#include "theory/bv/theory_bv_utils.h" +#include "theory/strings/word.h" +#include "theory/uf/theory_uf_rewriter.h" +#include "util/bitvector.h" +#include "util/iand.h" +#include "util/rational.h" +#include "util/regexp.h" +#include "util/string.h" + +using namespace cvc5::kind; + +namespace cvc5 { +namespace proof { + +LfscNodeConverter::LfscNodeConverter() +{ + NodeManager* nm = NodeManager::currentNM(); + d_arrow = nm->mkSortConstructor("arrow", 2); + + d_sortType = nm->mkSort("sortType"); + // the embedding of arrow into Node, which is binary constructor over sorts + TypeNode anfType = nm->mkFunctionType({d_sortType, d_sortType}, d_sortType); + d_typeAsNode[d_arrow] = getSymbolInternal(FUNCTION_TYPE, anfType, "arrow"); + + TypeNode intType = nm->integerType(); + TypeNode arrType = nm->mkFunctionType({d_sortType, d_sortType}, d_sortType); + d_typeKindToNodeCons[ARRAY_TYPE] = + getSymbolInternal(FUNCTION_TYPE, arrType, "Array"); + TypeNode bvType = nm->mkFunctionType(intType, d_sortType); + d_typeKindToNodeCons[BITVECTOR_TYPE] = + getSymbolInternal(FUNCTION_TYPE, bvType, "BitVec"); + TypeNode fpType = nm->mkFunctionType({intType, intType}, d_sortType); + d_typeKindToNodeCons[FLOATINGPOINT_TYPE] = + getSymbolInternal(FUNCTION_TYPE, fpType, "FloatingPoint"); + TypeNode setType = nm->mkFunctionType(d_sortType, d_sortType); + d_typeKindToNodeCons[SET_TYPE] = + getSymbolInternal(FUNCTION_TYPE, setType, "Set"); + d_typeKindToNodeCons[BAG_TYPE] = + getSymbolInternal(FUNCTION_TYPE, setType, "Bag"); + d_typeKindToNodeCons[SEQUENCE_TYPE] = + getSymbolInternal(FUNCTION_TYPE, setType, "Seq"); +} + +Node LfscNodeConverter::postConvert(Node n) +{ + NodeManager* nm = NodeManager::currentNM(); + Kind k = n.getKind(); + if (k == ASCRIPTION_TYPE) + { + // dummy node, return it + return n; + } + TypeNode tn = n.getType(); + Trace("lfsc-term-process-debug") + << "postConvert " << n << " " << k << std::endl; + if (k == BOUND_VARIABLE) + { + // ignore internally generated symbols + if (d_symbols.find(n) != d_symbols.end()) + { + return n; + } + // bound variable v is (bvar x T) + TypeNode intType = nm->integerType(); + Node x = nm->mkConst(Rational(getOrAssignIndexForVar(n))); + Node tc = typeAsNode(convertType(tn)); + TypeNode ftype = nm->mkFunctionType({intType, d_sortType}, tn); + Node bvarOp = getSymbolInternal(k, ftype, "bvar"); + return nm->mkNode(APPLY_UF, bvarOp, x, tc); + } + else if (k == SKOLEM || k == BOOLEAN_TERM_VARIABLE) + { + // constructors/selectors are represented by skolems, which are defined + // symbols + if (tn.isConstructor() || tn.isSelector() || tn.isTester() + || tn.isUpdater()) + { + // note these are not converted to their user named (cvc.) symbols here, + // to avoid type errors when constructing terms for postConvert + return n; + } + // skolems v print as their witness forms + // v is (skolem W) where W is the original or witness form of v + Node wi = SkolemManager::getOriginalForm(n); + if (wi == n) + { + // if it is not a purification skolem, maybe it is a witness skolem + wi = SkolemManager::getWitnessForm(n); + } + if (!wi.isNull() && wi != n) + { + Trace("lfsc-term-process-debug") << "...witness form " << wi << std::endl; + wi = convert(wi); + Trace("lfsc-term-process-debug") + << "...converted witness for " << wi << std::endl; + TypeNode ftype = nm->mkFunctionType(tn, tn); + Node skolemOp = getSymbolInternal(k, ftype, "skolem"); + return nm->mkNode(APPLY_UF, skolemOp, wi); + } + // might be a skolem function + Node ns = maybeMkSkolemFun(n); + if (!ns.isNull()) + { + return ns; + } + // Otherwise, it is an uncategorized skolem, must use a fresh variable. + // This case will only apply for terms originating from places with no + // proof support. + TypeNode intType = nm->integerType(); + TypeNode varType = nm->mkFunctionType({intType, d_sortType}, tn); + Node var = mkInternalSymbol("var", varType); + Node index = nm->mkConst(Rational(getOrAssignIndexForVar(n))); + Node tc = typeAsNode(convertType(tn)); + return nm->mkNode(APPLY_UF, var, index, tc); + } + else if (n.isVar()) + { + std::stringstream ss; + ss << n; + Node nn = mkInternalSymbol(getNameForUserName(ss.str()), tn); + return nn; + } + else if (k == APPLY_UF) + { + return convert(theory::uf::TheoryUfRewriter::getHoApplyForApplyUf(n)); + } + else if (k == APPLY_CONSTRUCTOR || k == APPLY_SELECTOR || k == APPLY_TESTER + || k == APPLY_SELECTOR_TOTAL || k == APPLY_UPDATER) + { + // must convert other kinds of apply to functions, since we convert to + // HO_APPLY + Node opc = getOperatorOfTerm(n, true); + if (n.getNumChildren() == 0) + { + return opc; + } + std::vector children; + children.push_back(opc); + children.insert(children.end(), n.begin(), n.end()); + return postConvert(nm->mkNode(APPLY_UF, children)); + } + else if (k == HO_APPLY) + { + std::vector argTypes; + argTypes.push_back(n[0].getType()); + argTypes.push_back(n[1].getType()); + TypeNode tnh = nm->mkFunctionType(argTypes, tn); + Node hconstf = getSymbolInternal(k, tnh, "apply"); + return nm->mkNode(APPLY_UF, hconstf, n[0], n[1]); + } + else if (k == CONST_RATIONAL || k == CAST_TO_REAL) + { + if (k == CAST_TO_REAL) + { + // already converted + do + { + n = n[0]; + Assert(n.getKind() == APPLY_UF || n.getKind() == CONST_RATIONAL); + } while (n.getKind() != CONST_RATIONAL); + } + TypeNode tnv = nm->mkFunctionType(tn, tn); + Node rconstf; + Node arg; + Rational r = n.getConst(); + if (tn.isInteger()) + { + rconstf = getSymbolInternal(k, tnv, "int"); + if (r.sgn() == -1) + { + // use LFSC syntax for mpz negation + Node mpzn = getSymbolInternal(k, nm->mkFunctionType(tn, tn), "~"); + arg = nm->mkNode(APPLY_UF, mpzn, nm->mkConst(r.abs())); + } + else + { + arg = n; + } + } + else + { + rconstf = getSymbolInternal(k, tnv, "real"); + // ensure rationals are printed properly here using mpq syntax + // Note that inconvieniently, LFSC uses (non-sexpr) syntax n/m for + // constant rationals, hence we must use a string + std::stringstream ss; + ss << "__LFSC_TMP" << r.getNumerator().abs() << "/" << r.getDenominator(); + arg = mkInternalSymbol(ss.str(), tn); + // negative (~ n/m) + if (r.sgn() == -1) + { + Node mpzn = getSymbolInternal(k, nm->mkFunctionType(tn, tn), "~"); + arg = nm->mkNode(APPLY_UF, mpzn, arg); + } + } + return nm->mkNode(APPLY_UF, rconstf, arg); + } + else if (k == CONST_BITVECTOR) + { + TypeNode btn = nm->booleanType(); + TypeNode tnv = nm->mkFunctionType(btn, tn); + TypeNode btnv = nm->mkFunctionType(btn, btn); + BitVector bv = n.getConst(); + size_t w = bv.getSize(); + Node ret = getSymbolInternal(k, btn, "bvn"); + Node b0 = getSymbolInternal(k, btn, "b0"); + Node b1 = getSymbolInternal(k, btn, "b1"); + Node bvc = getSymbolInternal(k, btnv, "bvc"); + for (size_t i = 0; i < w; i++) + { + Node arg = bv.isBitSet((w - 1) - i) ? b1 : b0; + ret = nm->mkNode(APPLY_UF, bvc, arg, ret); + } + Node bconstf = getSymbolInternal(k, tnv, "bv"); + return nm->mkNode(APPLY_UF, bconstf, ret); + } + else if (k == CONST_STRING) + { + //"" is emptystr + //"A" is (char 65) + //"ABC" is (str.++ (char 65) (str.++ (char 66) (str.++ (char 67) emptystr))) + std::vector charVec; + getCharVectorInternal(n, charVec); + Assert(!charVec.empty()); + if (charVec.size() == 1) + { + // handles empty string and singleton character + return charVec[0]; + } + std::reverse(charVec.begin(), charVec.end()); + Node ret = postConvert(getNullTerminator(STRING_CONCAT, tn)); + for (size_t i = 0, size = charVec.size(); i < size; i++) + { + ret = nm->mkNode(STRING_CONCAT, charVec[i], ret); + } + return ret; + } + else if (k == CONST_SEQUENCE) + { + const std::vector& charVec = n.getConst().getVec(); + TypeNode etype = nm->mkFunctionType(d_sortType, tn); + Node ret = getSymbolInternal(k, etype, "seq.empty"); + ret = nm->mkNode(APPLY_UF, ret, typeAsNode(convertType(tn))); + std::vector vecu; + for (size_t i = 0, size = charVec.size(); i < size; i++) + { + Node u = nm->mkNode(SEQ_UNIT, postConvert(charVec[size - (i + 1)])); + if (size == 1) + { + // singleton case + return u; + } + ret = nm->mkNode(STRING_CONCAT, u, ret); + } + return ret; + } + else if (k == STORE_ALL) + { + Node t = typeAsNode(convertType(tn)); + TypeNode caRetType = nm->mkFunctionType(tn.getArrayConstituentType(), tn); + TypeNode catype = nm->mkFunctionType(d_sortType, caRetType); + Node bconstf = getSymbolInternal(k, catype, "array_const"); + Node f = nm->mkNode(APPLY_UF, bconstf, t); + ArrayStoreAll storeAll = n.getConst(); + return nm->mkNode(APPLY_UF, f, convert(storeAll.getValue())); + } + else if (k == GEQ || k == GT || k == LEQ || k == LT || k == MINUS + || k == DIVISION || k == DIVISION_TOTAL || k == INTS_DIVISION + || k == INTS_DIVISION_TOTAL || k == INTS_MODULUS + || k == INTS_MODULUS_TOTAL || k == UMINUS || k == POW + || isIndexedOperatorKind(k)) + { + // must give special names to SMT-LIB operators with arithmetic subtyping + // note that MINUS is not n-ary + // get the macro-apply version of the operator + Node opc = getOperatorOfTerm(n, true); + std::vector children; + children.push_back(opc); + children.insert(children.end(), n.begin(), n.end()); + return nm->mkNode(APPLY_UF, children); + } + else if (k == EMPTYSET || k == UNIVERSE_SET || k == EMPTYBAG) + { + Node t = typeAsNode(convertType(tn)); + TypeNode etype = nm->mkFunctionType(d_sortType, tn); + Node ef = getSymbolInternal( + k, + etype, + k == EMPTYSET ? "emptyset" + : (k == UNIVERSE_SET ? "univset" : "emptybag")); + return nm->mkNode(APPLY_UF, ef, t); + } + else if (n.isClosure()) + { + // (forall ((x1 T1) ... (xn Tk)) P) is + // ((forall x1 T1) ((forall x2 T2) ... ((forall xk Tk) P))). We use + // SEXPR to do this, which avoids the need for indexed operators. + Node ret = n[1]; + Node cop = getOperatorOfClosure(n, true); + for (size_t i = 0, nchild = n[0].getNumChildren(); i < nchild; i++) + { + size_t ii = (nchild - 1) - i; + Node v = n[0][ii]; + Node vop = getOperatorOfBoundVar(cop, v); + ret = nm->mkNode(APPLY_UF, vop, ret); + } + // notice that intentionally we drop annotations here + return ret; + } + else if (k == REGEXP_LOOP) + { + // ((_ re.loop n1 n2) t) is ((re.loop n1 n2) t) + TypeNode intType = nm->integerType(); + TypeNode relType = + nm->mkFunctionType({intType, intType}, nm->mkFunctionType(tn, tn)); + Node rop = getSymbolInternal( + k, relType, printer::smt2::Smt2Printer::smtKindString(k)); + RegExpLoop op = n.getOperator().getConst(); + Node n1 = nm->mkConst(Rational(op.d_loopMinOcc)); + Node n2 = nm->mkConst(Rational(op.d_loopMaxOcc)); + return nm->mkNode(APPLY_UF, nm->mkNode(APPLY_UF, rop, n1, n2), n[0]); + } + else if (k == MATCH) + { + // currently unsupported + return n; + } + else if (k == SEP_NIL) + { + Node tnn = typeAsNode(convertType(tn)); + TypeNode ftype = nm->mkFunctionType(d_sortType, tn); + Node s = getSymbolInternal(k, ftype, "sep.nil"); + return nm->mkNode(APPLY_UF, s, tnn); + } + else if (NodeManager::isNAryKind(k) && n.getNumChildren() >= 2) + { + size_t nchild = n.getNumChildren(); + Assert(n.getMetaKind() != metakind::PARAMETERIZED); + // convert all n-ary applications to binary + std::vector children(n.begin(), n.end()); + // distinct is special case + if (k == DISTINCT) + { + // DISTINCT(x1,...,xn) ---> + // AND(DISTINCT(x1,x2), AND(,..., AND(,..,DISTINCT(x_{n-1},x_n)))) + Node ret = nm->mkNode(k, children[0], children[1]); + for (unsigned i = 0; i < nchild; i++) + for (unsigned j = i + 1; j < nchild; j++) + { + if (i != 0 && j != 1) + { + ret = nm->mkNode(AND, ret, nm->mkNode(k, children[i], children[j])); + } + } + Trace("lfsc-term-process-debug") << "n: " << n << std::endl + << "ret: " << ret << std::endl; + return ret; + } + std::reverse(children.begin(), children.end()); + // Add the null-terminator. This is done to disambiguate the number + // of children for term with n-ary operators. In particular note that + // (or A B C (or D E)) has representation: + // (or A (or B (or C (or (or D E) false)))) + // This makes the AST above distinguishable from (or A B C D E), + // which otherwise would both have representation: + // (or A (or B (or C (or D E)))) + Node nullTerm = getNullTerminator(k, tn); + // Most operators simply get binarized + Node ret; + size_t istart = 0; + if (nullTerm.isNull()) + { + ret = children[0]; + istart = 1; + } + else + { + // must convert recursively, since nullTerm may have subterms. + ret = convert(nullTerm); + } + // the kind to chain + Kind ck = k; + // check whether we are also changing the operator name, in which case + // we build a binary uninterpreted function opc + Node opc; + if (k == PLUS || k == MULT || k == NONLINEAR_MULT) + { + std::stringstream opName; + // currently allow subtyping + opName << "a."; + opName << printer::smt2::Smt2Printer::smtKindString(k); + TypeNode ftype = nm->mkFunctionType({tn, tn}, tn); + opc = getSymbolInternal(k, ftype, opName.str()); + ck = APPLY_UF; + } + // now, iterate over children and make binary conversion + for (size_t i = istart, npchild = children.size(); i < npchild; i++) + { + if (!opc.isNull()) + { + ret = nm->mkNode(ck, opc, children[i], ret); + } + else + { + ret = nm->mkNode(ck, children[i], ret); + } + } + return ret; + } + return n; +} + +TypeNode LfscNodeConverter::postConvertType(TypeNode tn) +{ + NodeManager* nm = NodeManager::currentNM(); + TypeNode cur = tn; + Node tnn; + Kind k = tn.getKind(); + Trace("lfsc-term-process-debug") + << "postConvertType " << tn << " " << tn.getNumChildren() << " " << k + << std::endl; + if (k == FUNCTION_TYPE) + { + // (-> T1 ... Tn T) is (arrow T1 .... (arrow Tn T)) + std::vector argTypes = tn.getArgTypes(); + // also make the node embedding of the type + Node arrown = d_typeAsNode[d_arrow]; + Assert(!arrown.isNull()); + cur = tn.getRangeType(); + tnn = typeAsNode(cur); + for (std::vector::reverse_iterator it = argTypes.rbegin(); + it != argTypes.rend(); + ++it) + { + std::vector aargs; + aargs.push_back(*it); + aargs.push_back(cur); + cur = nm->mkSort(d_arrow, aargs); + tnn = nm->mkNode(APPLY_UF, arrown, typeAsNode(*it), tnn); + } + } + else if (k == BITVECTOR_TYPE) + { + tnn = d_typeKindToNodeCons[k]; + Node w = nm->mkConst(Rational(tn.getBitVectorSize())); + tnn = nm->mkNode(APPLY_UF, tnn, w); + } + else if (k == FLOATINGPOINT_TYPE) + { + tnn = d_typeKindToNodeCons[k]; + Node e = nm->mkConst(Rational(tn.getFloatingPointExponentSize())); + Node s = nm->mkConst(Rational(tn.getFloatingPointSignificandSize())); + tnn = nm->mkNode(APPLY_UF, tnn, e, s); + } + else if (tn.getNumChildren() == 0) + { + // special case: tuples must be distinguished by their arity + if (tn.isTuple()) + { + const DType& dt = tn.getDType(); + unsigned int nargs = dt[0].getNumArgs(); + if (nargs > 0) + { + std::vector types; + std::vector convTypes; + std::vector targs; + for (unsigned int i = 0; i < nargs; i++) + { + // it is not converted yet, convert here + TypeNode tnc = convertType(dt[0][i].getRangeType()); + types.push_back(d_sortType); + convTypes.push_back(tnc); + targs.push_back(typeAsNode(tnc)); + } + TypeNode ftype = nm->mkFunctionType(types, d_sortType); + // must distinguish by arity + std::stringstream ss; + ss << "Tuple_" << nargs; + targs.insert(targs.begin(), getSymbolInternal(k, ftype, ss.str())); + tnn = nm->mkNode(APPLY_UF, targs); + // we are changing its name, we must make a sort constructor + cur = nm->mkSortConstructor(ss.str(), nargs); + cur = nm->mkSort(cur, convTypes); + } + } + if (tnn.isNull()) + { + std::stringstream ss; + tn.toStream(ss, Language::LANG_SMTLIB_V2_6); + if (tn.isSort() || (tn.isDatatype() && !tn.isTuple())) + { + std::stringstream sss; + sss << LfscNodeConverter::getNameForUserName(ss.str()); + tnn = getSymbolInternal(k, d_sortType, sss.str()); + cur = nm->mkSort(sss.str()); + } + else + { + tnn = getSymbolInternal(k, d_sortType, ss.str()); + } + } + } + else + { + // to build the type-as-node, must convert the component types + std::vector targs; + std::vector types; + for (const TypeNode& tnc : tn) + { + targs.push_back(typeAsNode(tnc)); + types.push_back(d_sortType); + } + Node op; + if (k == PARAMETRIC_DATATYPE) + { + // erase first child, which repeats the datatype + targs.erase(targs.begin(), targs.begin() + 1); + types.erase(types.begin(), types.begin() + 1); + TypeNode ftype = nm->mkFunctionType(types, d_sortType); + // the operator has been converted; it is no longer a datatype, thus + // we must print to get its name. + std::stringstream ss; + ss << tn[0]; + op = getSymbolInternal(k, ftype, ss.str()); + } + else if (k == SORT_TYPE) + { + TypeNode ftype = nm->mkFunctionType(types, d_sortType); + std::string name; + tn.getAttribute(expr::VarNameAttr(), name); + op = getSymbolInternal(k, ftype, name); + } + else + { + std::map::iterator it = d_typeKindToNodeCons.find(k); + if (it != d_typeKindToNodeCons.end()) + { + op = it->second; + } + } + if (!op.isNull()) + { + targs.insert(targs.begin(), op); + tnn = nm->mkNode(APPLY_UF, targs); + } + else + { + AlwaysAssert(false); + } + } + Assert(!tnn.isNull()); + Trace("lfsc-term-process-debug") << "...type as node: " << tnn << std::endl; + d_typeAsNode[cur] = tnn; + return cur; +} + +std::string LfscNodeConverter::getNameForUserName(const std::string& name) +{ + std::stringstream ss; + ss << "cvc." << name; + return ss.str(); +} + +bool LfscNodeConverter::shouldTraverse(Node n) +{ + Kind k = n.getKind(); + // don't convert bound variable list directly + if (k == BOUND_VAR_LIST) + { + return false; + } + // should not traverse internal applications + if (k == APPLY_UF) + { + if (d_symbols.find(n.getOperator()) != d_symbols.end()) + { + return false; + } + } + return true; +} + +Node LfscNodeConverter::maybeMkSkolemFun(Node k, bool macroApply) +{ + NodeManager* nm = NodeManager::currentNM(); + SkolemManager* sm = nm->getSkolemManager(); + SkolemFunId sfi = SkolemFunId::NONE; + Node cacheVal; + TypeNode tn = k.getType(); + if (sm->isSkolemFunction(k, sfi, cacheVal)) + { + if (sfi == SkolemFunId::SHARED_SELECTOR) + { + // a skolem corresponding to shared selector should print in + // LFSC as (sel T n) where T is the type and n is the index of the + // shared selector. + TypeNode fselt = nm->mkFunctionType(tn.getSelectorDomainType(), + tn.getSelectorRangeType()); + TypeNode intType = nm->integerType(); + TypeNode selt = nm->mkFunctionType({d_sortType, intType}, fselt); + Node sel = getSymbolInternal(k.getKind(), selt, "sel"); + Node kn = typeAsNode(convertType(tn.getSelectorRangeType())); + Assert(!cacheVal.isNull() && cacheVal.getKind() == CONST_RATIONAL); + return nm->mkNode(APPLY_UF, sel, kn, cacheVal); + } + else if (sfi == SkolemFunId::RE_UNFOLD_POS_COMPONENT) + { + // a skolem corresponding to a regular expression unfolding component + // should print as (skolem_re_unfold_pos t R n) where the skolem is the + // n^th component for the unfolding of (str.in_re t R). + TypeNode strType = nm->stringType(); + TypeNode reType = nm->regExpType(); + TypeNode intType = nm->integerType(); + TypeNode reut = nm->mkFunctionType({strType, reType, intType}, strType); + Node sk = getSymbolInternal(k.getKind(), reut, "skolem_re_unfold_pos"); + Assert(!cacheVal.isNull() && cacheVal.getKind() == SEXPR + && cacheVal.getNumChildren() == 3); + // third value is mpz, which is not converted + return nm->mkNode( + APPLY_UF, + {sk, convert(cacheVal[0]), convert(cacheVal[1]), cacheVal[2]}); + } + } + return Node::null(); +} + +Node LfscNodeConverter::typeAsNode(TypeNode tni) const +{ + // should always exist in the cache, as we always run types through + // postConvertType before calling this method. + std::map::const_iterator it = d_typeAsNode.find(tni); + AlwaysAssert(it != d_typeAsNode.end()) << "Missing typeAsNode " << tni; + return it->second; +} + +Node LfscNodeConverter::mkInternalSymbol(const std::string& name, TypeNode tn) +{ + Node sym = NodeManager::currentNM()->mkBoundVar(name, tn); + d_symbols.insert(sym); + return sym; +} + +Node LfscNodeConverter::getSymbolInternalFor(Node n, const std::string& name) +{ + return getSymbolInternal(n.getKind(), n.getType(), name); +} + +Node LfscNodeConverter::getSymbolInternal(Kind k, + TypeNode tn, + const std::string& name) +{ + std::tuple key(k, tn, name); + std::map, Node>::iterator it = + d_symbolsMap.find(key); + if (it != d_symbolsMap.end()) + { + return it->second; + } + Node sym = mkInternalSymbol(name, tn); + d_symbolToBuiltinKind[sym] = k; + d_symbolsMap[key] = sym; + return sym; +} + +void LfscNodeConverter::getCharVectorInternal(Node c, std::vector& chars) +{ + Assert(c.getKind() == CONST_STRING); + NodeManager* nm = NodeManager::currentNM(); + const std::vector& vec = c.getConst().getVec(); + if (vec.size() == 0) + { + Node ec = getSymbolInternalFor(c, "emptystr"); + chars.push_back(ec); + return; + } + TypeNode tnc = nm->mkFunctionType(nm->integerType(), c.getType()); + Node aconstf = getSymbolInternal(CONST_STRING, tnc, "char"); + for (unsigned i = 0, size = vec.size(); i < size; i++) + { + Node cc = nm->mkNode(APPLY_UF, aconstf, nm->mkConst(Rational(vec[i]))); + chars.push_back(cc); + } +} + +bool LfscNodeConverter::isIndexedOperatorKind(Kind k) +{ + return k == BITVECTOR_EXTRACT || k == BITVECTOR_REPEAT + || k == BITVECTOR_ZERO_EXTEND || k == BITVECTOR_SIGN_EXTEND + || k == BITVECTOR_ROTATE_LEFT || k == BITVECTOR_ROTATE_RIGHT + || k == INT_TO_BITVECTOR || k == IAND || k == APPLY_UPDATER + || k == APPLY_TESTER; +} + +std::vector LfscNodeConverter::getOperatorIndices(Kind k, Node n) +{ + NodeManager* nm = NodeManager::currentNM(); + std::vector indices; + switch (k) + { + case BITVECTOR_EXTRACT: + { + BitVectorExtract p = n.getConst(); + indices.push_back(nm->mkConst(Rational(p.d_high))); + indices.push_back(nm->mkConst(Rational(p.d_low))); + break; + } + case BITVECTOR_REPEAT: + indices.push_back( + nm->mkConst(Rational(n.getConst().d_repeatAmount))); + break; + case BITVECTOR_ZERO_EXTEND: + indices.push_back(nm->mkConst( + Rational(n.getConst().d_zeroExtendAmount))); + break; + case BITVECTOR_SIGN_EXTEND: + indices.push_back(nm->mkConst( + Rational(n.getConst().d_signExtendAmount))); + break; + case BITVECTOR_ROTATE_LEFT: + indices.push_back(nm->mkConst( + Rational(n.getConst().d_rotateLeftAmount))); + break; + case BITVECTOR_ROTATE_RIGHT: + indices.push_back(nm->mkConst( + Rational(n.getConst().d_rotateRightAmount))); + break; + case INT_TO_BITVECTOR: + indices.push_back( + nm->mkConst(Rational(n.getConst().d_size))); + break; + case IAND: + indices.push_back(nm->mkConst(Rational(n.getConst().d_size))); + break; + case APPLY_TESTER: + { + unsigned index = DType::indexOf(n); + const DType& dt = DType::datatypeOf(n); + indices.push_back(dt[index].getConstructor()); + } + break; + case APPLY_UPDATER: + { + unsigned index = DType::indexOf(n); + const DType& dt = DType::datatypeOf(n); + unsigned cindex = DType::cindexOf(n); + indices.push_back(dt[cindex][index].getSelector()); + } + break; + default: Assert(false); break; + } + return indices; +} + +Node LfscNodeConverter::getNullTerminator(Kind k, TypeNode tn) +{ + NodeManager* nm = NodeManager::currentNM(); + Node nullTerm; + switch (k) + { + case REGEXP_CONCAT: + // the language containing only the empty string, which has special + // syntax in LFSC + nullTerm = getSymbolInternal(k, tn, "re.empty"); + break; + case BITVECTOR_CONCAT: + { + // the null terminator of bitvector concat is a dummy variable of + // bit-vector type with zero width, regardless of the type of the overall + // concat. + TypeNode bvz = nm->mkBitVectorType(0); + nullTerm = getSymbolInternal(k, bvz, "emptybv"); + } + break; + default: + // no special handling, or not null terminated + break; + } + if (!nullTerm.isNull()) + { + return nullTerm; + } + // otherwise, fall back to standard utility + return expr::getNullTerminator(k, tn); +} + +Kind LfscNodeConverter::getBuiltinKindForInternalSymbol(Node op) const +{ + std::map::const_iterator it = d_symbolToBuiltinKind.find(op); + if (it != d_symbolToBuiltinKind.end()) + { + return it->second; + } + return UNDEFINED_KIND; +} + +Node LfscNodeConverter::getOperatorOfTerm(Node n, bool macroApply) +{ + Assert(n.hasOperator()); + Assert(!n.isClosure()); + NodeManager* nm = NodeManager::currentNM(); + Kind k = n.getKind(); + std::stringstream opName; + Trace("lfsc-term-process-debug2") + << "getOperatorOfTerm " << n << " " << k << " " + << (n.getMetaKind() == metakind::PARAMETERIZED) << " " + << isIndexedOperatorKind(k) << std::endl; + if (n.getMetaKind() == metakind::PARAMETERIZED) + { + Node op = n.getOperator(); + std::vector indices; + if (isIndexedOperatorKind(k)) + { + indices = getOperatorIndices(k, n.getOperator()); + // we must convert the name of indices on updaters and testers + if (k == APPLY_UPDATER || k == APPLY_TESTER) + { + Assert(indices.size() == 1); + // must convert to user name + std::stringstream sss; + sss << indices[0]; + TypeNode intType = nm->integerType(); + indices[0] = + getSymbolInternal(k, intType, getNameForUserName(sss.str())); + } + } + else if (op.getType().isFunction()) + { + return op; + } + // note other kinds of functions (e.g. selectors and testers) + std::vector argTypes; + for (const Node& nc : n) + { + argTypes.push_back(nc.getType()); + } + TypeNode ftype = n.getType(); + if (!argTypes.empty()) + { + ftype = nm->mkFunctionType(argTypes, ftype); + } + Node ret; + if (isIndexedOperatorKind(k)) + { + std::vector itypes; + for (const Node& i : indices) + { + itypes.push_back(i.getType()); + } + if (!itypes.empty()) + { + ftype = nm->mkFunctionType(itypes, ftype); + } + if (!macroApply) + { + if (k != APPLY_UPDATER && k != APPLY_TESTER) + { + opName << "f_"; + } + } + opName << printer::smt2::Smt2Printer::smtKindString(k); + } + else if (k == APPLY_CONSTRUCTOR) + { + unsigned index = DType::indexOf(op); + const DType& dt = DType::datatypeOf(op); + std::stringstream ssc; + ssc << dt[index].getConstructor(); + opName << getNameForUserName(ssc.str()); + } + else if (k == APPLY_SELECTOR) + { + unsigned index = DType::indexOf(op); + const DType& dt = DType::datatypeOf(op); + unsigned cindex = DType::cindexOf(op); + std::stringstream sss; + sss << dt[cindex][index].getSelector(); + opName << getNameForUserName(sss.str()); + } + else if (k == APPLY_SELECTOR_TOTAL) + { + ret = maybeMkSkolemFun(op, macroApply); + Assert(!ret.isNull()); + } + else if (k == SINGLETON || k == MK_BAG) + { + if (!macroApply) + { + opName << "f_"; + } + opName << printer::smt2::Smt2Printer::smtKindString(k); + } + else + { + opName << op; + } + if (ret.isNull()) + { + ret = getSymbolInternal(k, ftype, opName.str()); + } + // if indexed, apply to index + if (!indices.empty()) + { + std::vector ichildren; + ichildren.push_back(ret); + ichildren.insert(ichildren.end(), indices.begin(), indices.end()); + ret = nm->mkNode(APPLY_UF, ichildren); + } + Trace("lfsc-term-process-debug2") << "...return " << ret << std::endl; + return ret; + } + std::vector argTypes; + for (const Node& nc : n) + { + argTypes.push_back(nc.getType()); + } + // we only use binary operators + if (NodeManager::isNAryKind(k)) + { + argTypes.resize(2); + } + TypeNode tn = n.getType(); + TypeNode ftype = nm->mkFunctionType(argTypes, tn); + // most functions are called f_X where X is the SMT-LIB name, if we are + // getting the macroApply variant, then we don't prefix with `f_`. + if (!macroApply) + { + opName << "f_"; + } + // all arithmetic kinds must explicitly deal with real vs int subtyping + if (k == PLUS || k == MULT || k == NONLINEAR_MULT || k == GEQ || k == GT + || k == LEQ || k == LT || k == MINUS || k == DIVISION + || k == DIVISION_TOTAL || k == INTS_DIVISION || k == INTS_DIVISION_TOTAL + || k == INTS_MODULUS || k == INTS_MODULUS_TOTAL || k == UMINUS + || k == POW) + { + // currently allow subtyping + opName << "a."; + } + if (k == UMINUS) + { + opName << "u"; + } + opName << printer::smt2::Smt2Printer::smtKindString(k); + if (k == DIVISION_TOTAL || k == INTS_DIVISION_TOTAL + || k == INTS_MODULUS_TOTAL) + { + opName << "_total"; + } + return getSymbolInternal(k, ftype, opName.str()); +} + +Node LfscNodeConverter::getOperatorOfClosure(Node q, bool macroApply) +{ + NodeManager* nm = NodeManager::currentNM(); + TypeNode bodyType = nm->mkFunctionType(q[1].getType(), q.getType()); + // We permit non-flat function types here + // intType is used here for variable indices + TypeNode intType = nm->integerType(); + TypeNode ftype = nm->mkFunctionType({intType, d_sortType}, bodyType); + Kind k = q.getKind(); + std::stringstream opName; + if (!macroApply) + { + opName << "f_"; + } + opName << printer::smt2::Smt2Printer::smtKindString(k); + return getSymbolInternal(k, ftype, opName.str()); +} + +Node LfscNodeConverter::getOperatorOfBoundVar(Node cop, Node v) +{ + NodeManager* nm = NodeManager::currentNM(); + Node x = nm->mkConst(Rational(getOrAssignIndexForVar(v))); + Node tc = typeAsNode(convertType(v.getType())); + return nm->mkNode(APPLY_UF, cop, x, tc); +} + +size_t LfscNodeConverter::getOrAssignIndexForVar(Node v) +{ + Assert(v.isVar()); + std::map::iterator it = d_varIndex.find(v); + if (it != d_varIndex.end()) + { + return it->second; + } + size_t id = d_varIndex.size(); + d_varIndex[v] = id; + return id; +} + +} // namespace proof +} // namespace cvc5 diff --git a/src/proof/lfsc/lfsc_node_converter.h b/src/proof/lfsc/lfsc_node_converter.h new file mode 100644 index 000000000..fefe24d91 --- /dev/null +++ b/src/proof/lfsc/lfsc_node_converter.h @@ -0,0 +1,152 @@ +/****************************************************************************** + * Top contributors (to current version): + * Andrew Reynolds + * + * This file is part of the cvc5 project. + * + * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS + * in the top-level source directory and their institutional affiliations. + * All rights reserved. See the file COPYING in the top-level source + * directory for licensing information. + * **************************************************************************** + * + * Implementation of LFSC node conversion + */ +#include "cvc5_private.h" + +#ifndef CVC4__PROOF__LFSC__LFSC_NODE_CONVERTER_H +#define CVC4__PROOF__LFSC__LFSC_NODE_CONVERTER_H + +#include +#include + +#include "expr/node.h" +#include "expr/node_converter.h" +#include "expr/type_node.h" + +namespace cvc5 { +namespace proof { + +/** + * This is a helper class for the LFSC printer that converts nodes into + * form that LFSC expects. It should only be used by the LFSC printer. + */ +class LfscNodeConverter : public NodeConverter +{ + public: + LfscNodeConverter(); + ~LfscNodeConverter() {} + /** convert to internal */ + Node postConvert(Node n) override; + /** convert to internal */ + TypeNode postConvertType(TypeNode tn) override; + /** + * Get the null terminator for kind k and type tn. The type tn can be + * omitted if applications of kind k do not have parametric type. + * + * The returned null terminator is *not* converted to internal form. + * + * For examples of null terminators, see nary_term_utils.h. + */ + Node getNullTerminator(Kind k, TypeNode tn = TypeNode::null()); + /** + * Return the properly named operator for n of the form (f t1 ... tn), where + * f could be interpreted or uninterpreted. This method is used for cases + * where it is important to get the term corresponding to the operator for + * a term. An example is for the base REFL step of nested CONG. + */ + Node getOperatorOfTerm(Node n, bool macroApply = false); + /** + * Recall that (forall ((x Int)) (P x)) is printed as: + * (apply (forall N Int) (apply P (bvar N Int))) + * in LFSC, where N is an integer indicating the id of the variable. + * + * Get closure operator. In the above example, this method returns the + * uninterpreted function whose name is "forall" and is used to construct + * higher-order operators for each bound variable in the closure. + */ + Node getOperatorOfClosure(Node q, bool macroApply = false); + /** + * Get closure operator, where cop is the term returned by + * getOperatorOfClosure(q), where q is the closures to which v + * belongs. For example, for FORALL closures, this method will return the + * node that prints as "(forall N Int)". + */ + Node getOperatorOfBoundVar(Node cop, Node v); + /** + * Get the variable index for variable v, or assign a fresh index if it is + * not yet assigned. + */ + size_t getOrAssignIndexForVar(Node v); + /** + * Make an internal symbol with custom name. This is a BOUND_VARIABLE that + * has a distinguished status so that it is *not* printed as (bvar ...). The + * returned variable is always fresh. + */ + Node mkInternalSymbol(const std::string& name, TypeNode tn); + /** + * Get builtin kind for internal symbol op + */ + Kind getBuiltinKindForInternalSymbol(Node op) const; + + /** get name for user name */ + static std::string getNameForUserName(const std::string& name); + + private: + /** Should we traverse n? */ + bool shouldTraverse(Node n) override; + /** + * Make skolem function, if k was constructed by a skolem function identifier + * (in SkolemManager::mkSkolemFunction) that is supported in the LFSC + * signature. + */ + Node maybeMkSkolemFun(Node k, bool macroApply = false); + /** + * Type as node, returns a node that prints in the form that LFSC will + * interpret as the type tni. This method is required since types can be + * passed as arguments to terms. This method assumes that tni has been + * converted to internal form (via the convertType method of this class). + */ + Node typeAsNode(TypeNode tni) const; + /** + * Get symbol for term, a special case of the method below for the type and + * kind of n. + */ + Node getSymbolInternalFor(Node n, const std::string& name); + /** + * Get symbol internal, (k,tn,name) are for caching, name is the name. This + * method returns a fresh symbol of the given name and type. It is frequently + * used when the type of a native operator does not match the type of the + * LFSC operator. + */ + Node getSymbolInternal(Kind k, TypeNode tn, const std::string& name); + /** + * Get character vector, add internal vector of characters for c. + */ + void getCharVectorInternal(Node c, std::vector& chars); + /** Is k a kind that is printed as an indexed operator in LFSC? */ + static bool isIndexedOperatorKind(Kind k); + /** get indices for printing the operator of n in the LFSC format */ + static std::vector getOperatorIndices(Kind k, Node n); + /** terms with different syntax than smt2 */ + std::map, Node> d_symbolsMap; + /** the set of all internally generated symbols */ + std::unordered_set d_symbols; + /** symbols to builtin kinds*/ + std::map d_symbolToBuiltinKind; + /** arrow type constructor */ + TypeNode d_arrow; + /** the type of LFSC sorts, which can appear in terms */ + TypeNode d_sortType; + /** Used for getting unique index for variable */ + std::map d_varIndex; + /** Cache for typeAsNode */ + std::map d_typeAsNode; + /** Used for interpreted builtin parametric sorts */ + std::map d_typeKindToNodeCons; +}; + +} // namespace proof +} // namespace cvc5 + +#endif