From b7aa53c0126948cae651b91555e44f8ce2f546bc Mon Sep 17 00:00:00 2001 From: Morgan Deters Date: Sat, 9 Jun 2012 14:09:39 +0000 Subject: [PATCH] Cleanup and comments for the dag-ifier. Also some unit testing for it. --- src/expr/expr_template.h | 6 +- src/printer/Makefile.am | 3 +- src/printer/ast/ast_printer.cpp | 1 + src/printer/cvc/cvc_printer.cpp | 1 + src/printer/dagification_visitor.cpp | 139 +++++++++++++++++ src/printer/dagification_visitor.h | 225 +++++++++++++-------------- src/printer/smt2/smt2_printer.cpp | 1 + src/util/options.cpp | 14 +- test/unit/expr/node_black.h | 43 +++++ 9 files changed, 309 insertions(+), 124 deletions(-) create mode 100644 src/printer/dagification_visitor.cpp diff --git a/src/expr/expr_template.h b/src/expr/expr_template.h index 6cd476a5f..5129cd73a 100644 --- a/src/expr/expr_template.h +++ b/src/expr/expr_template.h @@ -815,10 +815,10 @@ public: /** * Construct a ExprDag with the given setting (letify only common - * subexpressions that appear more than 'dag' times). dag==0 means + * subexpressions that appear more than 'dag' times). dag <= 0 means * don't dagify. */ - ExprDag(size_t dag) : d_dag(dag) {} + explicit ExprDag(int dag) : d_dag(dag < 0 ? 0 : dag) {} inline void applyDag(std::ostream& out) { // (offset by one to detect whether default has been set yet) @@ -942,7 +942,7 @@ public: ${getConst_instantiations} -#line 938 "${template}" +#line 946 "${template}" namespace expr { diff --git a/src/printer/Makefile.am b/src/printer/Makefile.am index 3f0eba12d..21b88d4be 100644 --- a/src/printer/Makefile.am +++ b/src/printer/Makefile.am @@ -6,9 +6,10 @@ AM_CXXFLAGS = -Wall -Wno-unknown-pragmas $(FLAG_VISIBILITY_HIDDEN) noinst_LTLIBRARIES = libprinter.la libprinter_la_SOURCES = \ - dagification_visitor.h \ printer.h \ printer.cpp \ + dagification_visitor.h \ + dagification_visitor.cpp \ ast/ast_printer.h \ ast/ast_printer.cpp \ smt/smt_printer.h \ diff --git a/src/printer/ast/ast_printer.cpp b/src/printer/ast/ast_printer.cpp index 5a7b2e834..479c26aaf 100644 --- a/src/printer/ast/ast_printer.cpp +++ b/src/printer/ast/ast_printer.cpp @@ -23,6 +23,7 @@ #include "expr/command.h" #include "printer/dagification_visitor.h" #include "util/node_visitor.h" +#include "theory/substitutions.h" #include #include diff --git a/src/printer/cvc/cvc_printer.cpp b/src/printer/cvc/cvc_printer.cpp index cc95d72b0..d20ba0354 100644 --- a/src/printer/cvc/cvc_printer.cpp +++ b/src/printer/cvc/cvc_printer.cpp @@ -21,6 +21,7 @@ #include "util/language.h" // for LANG_AST #include "expr/node_manager.h" // for VarNameAttr #include "expr/command.h" +#include "theory/substitutions.h" #include #include diff --git a/src/printer/dagification_visitor.cpp b/src/printer/dagification_visitor.cpp new file mode 100644 index 000000000..da651cd54 --- /dev/null +++ b/src/printer/dagification_visitor.cpp @@ -0,0 +1,139 @@ +/********************* */ +/*! \file dagification_visitor.cpp + ** \verbatim + ** Original author: mdeters + ** Major contributors: none + ** Minor contributors (to current version): none + ** This file is part of the CVC4 prototype. + ** Copyright (c) 2009, 2010, 2011, 2012 The Analysis of Computer Systems Group (ACSys) + ** Courant Institute of Mathematical Sciences + ** New York University + ** See the file COPYING in the top-level source directory for licensing + ** information.\endverbatim + ** + ** \brief Implementation of a dagifier for CVC4 expressions + ** + ** Implementation of a dagifier for CVC4 expressions. + **/ + +#include "printer/dagification_visitor.h" + +#include "context/context.h" +#include "theory/substitutions.h" + +#include + +namespace CVC4 { +namespace printer { + +DagificationVisitor::DagificationVisitor(unsigned threshold, std::string letVarPrefix) : + d_threshold(threshold), + d_letVarPrefix(letVarPrefix), + d_nodeCount(), + d_top(), + d_context(new context::Context()), + d_substitutions(new theory::SubstitutionMap(d_context)), + d_letVar(0), + d_done(false), + d_uniqueParent(), + d_substNodes() { + + // 0 doesn't make sense + CheckArgument(threshold > 0, threshold); +} + +DagificationVisitor::~DagificationVisitor() { + delete d_substitutions; + delete d_context; +} + +bool DagificationVisitor::alreadyVisited(TNode current, TNode parent) { + // don't visit variables, constants, or those exprs that we've + // already seen more than the threshold: if we've increased + // the count beyond the threshold already, we've done the same + // for all subexpressions, so it isn't useful to traverse and + // increment again (they'll be dagified anyway). + return current.getMetaKind() == kind::metakind::VARIABLE || + current.getMetaKind() == kind::metakind::CONSTANT || + ( ( current.getKind() == kind::NOT || + current.getKind() == kind::UMINUS ) && + ( current[0].getMetaKind() == kind::metakind::VARIABLE || + current[0].getMetaKind() == kind::metakind::CONSTANT ) ) || + current.getKind() == kind::SORT_TYPE || + d_nodeCount[current] > d_threshold; +} + +void DagificationVisitor::visit(TNode current, TNode parent) { + if(d_uniqueParent.find(current) != d_uniqueParent.end()) { + // we've seen this expr before + + TNode& uniqueParent = d_uniqueParent[current]; + + if(!uniqueParent.isNull() && uniqueParent != parent) { + // there is not a unique parent for this expr, mark it + uniqueParent = TNode::null(); + } + + // increase the count + const unsigned count = ++d_nodeCount[current]; + + if(count > d_threshold) { + // candidate for a let binder + d_substNodes.push_back(current); + } + } else { + // we haven't seen this expr before + Assert(d_nodeCount[current] == 0); + d_nodeCount[current] = 1; + d_uniqueParent[current] = parent; + } +} + +void DagificationVisitor::start(TNode node) { + AlwaysAssert(!d_done, "DagificationVisitor cannot be re-used"); + d_top = node; +} + +void DagificationVisitor::done(TNode node) { + AlwaysAssert(!d_done); + + d_done = true; + + // letify subexprs before parents (cascading LETs) + std::sort(d_substNodes.begin(), d_substNodes.end()); + + for(std::vector::iterator i = d_substNodes.begin(); + i != d_substNodes.end(); + ++i) { + Assert(d_nodeCount[*i] > d_threshold); + TNode parent = d_uniqueParent[*i]; + if(!parent.isNull() && d_nodeCount[parent] > d_threshold) { + // no need to letify this expr, because it only occurs in + // a single super-expression, and that one will be letified + continue; + } + + // construct the let binder + std::stringstream ss; + ss << d_letVarPrefix << d_letVar++; + Node letvar = NodeManager::currentNM()->mkVar(ss.str(), (*i).getType()); + + // apply previous substitutions to the rhs, enabling cascading LETs + Node n = d_substitutions->apply(*i); + Assert(! d_substitutions->hasSubstitution(n)); + d_substitutions->addSubstitution(n, letvar); + } +} + +const theory::SubstitutionMap& DagificationVisitor::getLets() { + AlwaysAssert(d_done, "DagificationVisitor must be used as a visitor before getting the dagified version out!"); + return *d_substitutions; +} + +Node DagificationVisitor::getDagifiedBody() { + AlwaysAssert(d_done, "DagificationVisitor must be used as a visitor before getting the dagified version out!"); + return d_substitutions->apply(d_top); +} + +}/* CVC4::printer namespace */ +}/* CVC4 namespace */ diff --git a/src/printer/dagification_visitor.h b/src/printer/dagification_visitor.h index 8e17f6027..ff0442547 100644 --- a/src/printer/dagification_visitor.h +++ b/src/printer/dagification_visitor.h @@ -10,6 +10,10 @@ ** New York University ** See the file COPYING in the top-level source directory for licensing ** information.\endverbatim + ** + ** \brief A dagifier for CVC4 expressions + ** + ** A dagifier for CVC4 expressions. **/ #include "cvc4_private.h" @@ -17,160 +21,155 @@ #ifndef __CVC4__PRINTER__DAGIFICATION_VISITOR_H #define __CVC4__PRINTER__DAGIFICATION_VISITOR_H -#include "context/context.h" -#include "theory/substitutions.h" #include "expr/node.h" #include "util/hash.h" #include #include -#include namespace CVC4 { + +namespace context { + class Context; +}/* CVC4::context namespace */ + +namespace theory { + class SubstitutionMap; +}/* CVC4::theory namespace */ + namespace printer { +/** + * This is a visitor class (intended to be used with CVC4's NodeVisitor) + * that visits an expression looking for common subexpressions that appear + * more than N times, where N is a configurable threshold. Afterward, + * let bindings can be extracted from this visitor and applied to the + * expression. + * + * This dagifier never introduces let bindings for variables, constants, + * unary-minus exprs over variables or constants, or NOT exprs over + * variables or constants. This dagifier never introduces let bindings + * for types. + */ class DagificationVisitor { - unsigned d_threshold; - std::string d_letVarPrefix; + /** + * The threshold for dagification. Subexprs occurring more than this + * number of times are dagified. + */ + const unsigned d_threshold; + + /** + * The prefix for introduced let bindings. + */ + const std::string d_letVarPrefix; + + /** + * A map of subexprs to their occurrence count. + */ std::hash_map d_nodeCount; + + /** + * The top-most node we are visiting. + */ TNode d_top; + + /** + * This class doesn't operate in a context-dependent fashion, but + * SubstitutionMap does, so we need a context. + */ context::Context* d_context; + + /** + * A map of subexprs to their newly-introduced let bindings. + */ theory::SubstitutionMap* d_substitutions; + + /** + * The current count of let bindings. Used to build unique names + * for the bindings. + */ unsigned d_letVar; + + /** + * Keep track of whether we are done yet (for assertions---this visitor + * can only be used one time). + */ bool d_done; + + /** + * If a subexpr occurs uniquely in one parent expr, this map points to + * it. An expr not occurring as a key in this map means we haven't + * seen it yet (and its occurrence count should be zero). If an expr + * points to the null expr in this map, it means we've seen more than + * one parent, so the subexpr doesn't have a unique parent. + * + * This information is kept because if a subexpr occurs more than the + * threshold, it is normally subject to dagification. But if it occurs + * only in one unique parent expression, and the parent meets the + * threshold too, then the parent will be dagified and there's no point + * in independently dagifying the child. (If it is beyond the threshold + * and occurs in more than one parent, we'll independently dagify.) + */ std::hash_map d_uniqueParent; + + /** + * A list of all nodes that meet the occurrence threshold and therefore + * *may* be subject to dagification, except for the unique-parent rule + * mentioned above. + */ std::vector d_substNodes; public: + /** Our visitor doesn't return anything. */ typedef void return_type; - DagificationVisitor(unsigned threshold, std::string letVarPrefix = "_let_") : - d_threshold(threshold), - d_letVarPrefix(letVarPrefix), - d_nodeCount(), - d_top(), - d_context(new context::Context()), - d_substitutions(new theory::SubstitutionMap(d_context)), - d_letVar(0), - d_done(false), - d_uniqueParent(), - d_substNodes() { - - // 0 doesn't make sense - CheckArgument(threshold > 0, threshold); - } - - ~DagificationVisitor() { - delete d_substitutions; - delete d_context; - } - - /** - * Returns true if current has already been dagified. - */ - bool alreadyVisited(TNode current, TNode parent) { - // don't visit variables, constants, or those exprs that we've - // already seen more than the threshold: if we've increased - // the count beyond the threshold already, we've done the same - // for all subexpressions, so it isn't useful to traverse and - // increment again (they'll be dagified anyway). - return current.getMetaKind() == kind::metakind::VARIABLE || - current.getMetaKind() == kind::metakind::CONSTANT || - ( ( current.getKind() == kind::NOT || - current.getKind() == kind::UMINUS ) && - ( current[0].getMetaKind() == kind::metakind::VARIABLE || - current[0].getMetaKind() == kind::metakind::CONSTANT ) ) || - current.getKind() == kind::SORT_TYPE || - d_nodeCount[current] > d_threshold; - } - - /** - * Dagify the "current" expression. - */ - void visit(TNode current, TNode parent) { - if(d_uniqueParent.find(current) != d_uniqueParent.end()) { - TNode& uniqueParent = d_uniqueParent[current]; - - if(!uniqueParent.isNull() && uniqueParent != parent) { - // there is not a unique parent for this expr - uniqueParent = TNode::null(); - } - - unsigned count = ++d_nodeCount[current]; - - if(count > d_threshold) { - d_substNodes.push_back(current); - } - } else { - Assert(d_nodeCount[current] == 0); - d_nodeCount[current] = 1; - d_uniqueParent[current] = parent; - } - } - /** - * Marks the node as the starting literal. + * Construct a dagification visitor with the given threshold and let + * binding prefix. + * + * @param threshold the threshold to apply for dagification (must be > 0) + * @param letVarPrefix prefix for let bindings (by default, "_let_") */ - void start(TNode node) { - Assert(!d_done, "DagificationVisitor cannot be re-used"); - d_top = node; - } + DagificationVisitor(unsigned threshold, std::string letVarPrefix = "_let_"); /** - * Called when we're done with all visitation. + * Simple destructor, clean up memory. */ - void done(TNode node) { - Assert(!d_done); - - d_done = true; + ~DagificationVisitor(); - // letify subexprs before parents (cascading LETs) - std::sort(d_substNodes.begin(), d_substNodes.end()); + /** + * Returns true if "current" has already been visited a sufficient + * number of times to make it a candidate for dagification, or if + * it cannot ever be subject to dagification. + */ + bool alreadyVisited(TNode current, TNode parent); - for(std::vector::iterator i = d_substNodes.begin(); - i != d_substNodes.end(); - ++i) { - Assert(d_nodeCount[*i] > d_threshold); - TNode parent = d_uniqueParent[*i]; - if(!parent.isNull() && d_nodeCount[parent] > d_threshold) { - // no need to letify this expr, because it only occurs in - // a single super-expression, and that one will be letified - continue; - } + /** + * Visit the expr "current", it might be a candidate for a let binder. + */ + void visit(TNode current, TNode parent); - std::stringstream ss; - ss << d_letVarPrefix << d_letVar++; - Node letvar = NodeManager::currentNM()->mkVar(ss.str(), (*i).getType()); + /** + * Marks the node as the starting literal. + */ + void start(TNode node); - Node n = d_substitutions->apply(*i); - // the three last arguments to addSubstitution are: - // invalidateCache -- the rhs of our substitution is a letvar, - // we're not going to use it on lhs so no cache problem - // backSub - no need for SubstitutionMap to do internal substitution, - // we did our own above - // forwardSub - ditto - Assert(! d_substitutions->hasSubstitution(n)); - d_substitutions->addSubstitution(n, letvar); - } - } + /** + * Called when we're done with all visitation. Does postprocessing. + */ + void done(TNode node); /** * Get the let substitutions. */ - const theory::SubstitutionMap& getLets() { - Assert(d_done, "DagificationVisitor must be used as a visitor before getting the dagified version out!"); - return *d_substitutions; - } + const theory::SubstitutionMap& getLets(); /** * Return the let-substituted expression. */ - Node getDagifiedBody() { - Assert(d_done, "DagificationVisitor must be used as a visitor before getting the dagified version out!"); - return d_substitutions->apply(d_top); - } + Node getDagifiedBody(); };/* class DagificationVisitor */ diff --git a/src/printer/smt2/smt2_printer.cpp b/src/printer/smt2/smt2_printer.cpp index d3ec376ae..72ce3804d 100644 --- a/src/printer/smt2/smt2_printer.cpp +++ b/src/printer/smt2/smt2_printer.cpp @@ -26,6 +26,7 @@ #include "util/boolean_simplification.h" #include "printer/dagification_visitor.h" #include "util/node_visitor.h" +#include "theory/substitutions.h" using namespace std; diff --git a/src/util/options.cpp b/src/util/options.cpp index a6bd9d09a..26881e052 100644 --- a/src/util/options.cpp +++ b/src/util/options.cpp @@ -842,13 +842,13 @@ throw(OptionException) { if(dag < 0) { throw OptionException("--default-dag-thresh requires a nonnegative argument."); } - Debug.getStream() << Expr::dag(size_t(dag)); - Trace.getStream() << Expr::dag(size_t(dag)); - Notice.getStream() << Expr::dag(size_t(dag)); - Chat.getStream() << Expr::dag(size_t(dag)); - Message.getStream() << Expr::dag(size_t(dag)); - Warning.getStream() << Expr::dag(size_t(dag)); - Dump.getStream() << Expr::dag(size_t(dag)); + Debug.getStream() << Expr::dag(dag); + Trace.getStream() << Expr::dag(dag); + Notice.getStream() << Expr::dag(dag); + Chat.getStream() << Expr::dag(dag); + Message.getStream() << Expr::dag(dag); + Warning.getStream() << Expr::dag(dag); + Dump.getStream() << Expr::dag(dag); } break; diff --git a/test/unit/expr/node_black.h b/test/unit/expr/node_black.h index 36a92ec2f..f617ebd5b 100644 --- a/test/unit/expr/node_black.h +++ b/test/unit/expr/node_black.h @@ -596,6 +596,49 @@ public: TS_ASSERT(sstr.str() == "(XOR (AND w (OR x y) z) (AND w (OR x y) z))"); } + void testDagifier() { + TypeNode intType = d_nodeManager->integerType(); + TypeNode fnType = d_nodeManager->mkFunctionType(intType, intType); + + Node x = d_nodeManager->mkVar("x", intType); + Node y = d_nodeManager->mkVar("y", intType); + Node f = d_nodeManager->mkVar("f", fnType); + Node g = d_nodeManager->mkVar("g", fnType); + Node fx = d_nodeManager->mkNode(APPLY_UF, f, x); + Node fy = d_nodeManager->mkNode(APPLY_UF, f, y); + Node gx = d_nodeManager->mkNode(APPLY_UF, g, x); + Node gy = d_nodeManager->mkNode(APPLY_UF, g, y); + Node fgx = d_nodeManager->mkNode(APPLY_UF, f, gx); + Node ffx = d_nodeManager->mkNode(APPLY_UF, f, fx); + Node fffx = d_nodeManager->mkNode(APPLY_UF, f, ffx); + Node fffx_eq_x = d_nodeManager->mkNode(EQUAL, fffx, x); + Node fffx_eq_y = d_nodeManager->mkNode(EQUAL, fffx, y); + Node fx_eq_gx = d_nodeManager->mkNode(EQUAL, fx, gx); + Node x_eq_y = d_nodeManager->mkNode(EQUAL, x, y); + Node fgx_eq_gy = d_nodeManager->mkNode(EQUAL, fgx, gy); + + Node n = d_nodeManager->mkNode(OR, fffx_eq_x, fffx_eq_y, fx_eq_gx, x_eq_y, fgx_eq_gy); + + stringstream sstr; + sstr << Node::setdepth(-1) << Node::setlanguage(language::output::LANG_CVC4); + sstr << Node::dag(false) << n; // never dagify + TS_ASSERT(sstr.str() == "(f(f(f(x))) = x) OR (f(f(f(x))) = y) OR (f(x) = g(x)) OR (x = y) OR (f(g(x)) = g(y))"); + + sstr.str(string()); + sstr << Node::dag(true) << n; // always dagify + TS_ASSERT(sstr.str() == "LET _let_0 = f(x), _let_1 = g(x), _let_2 = f(f(_let_0)) IN (_let_2 = x) OR (_let_2 = y) OR (_let_0 = _let_1) OR (x = y) OR (f(_let_1) = g(y))"); + + sstr.str(string()); + sstr << Node::dag(2) << n; // dagify subexprs occurring > 2 times + TS_ASSERT(sstr.str() == "LET _let_0 = f(x) IN (f(f(_let_0)) = x) OR (f(f(_let_0)) = y) OR (_let_0 = g(x)) OR (x = y) OR (f(g(x)) = g(y))"); + + Warning() << Node::setdepth(-1) << Node::setlanguage(language::output::LANG_CVC4) << Node::dag(2) << n << std::endl; + sstr.str(string()); + sstr << Node::dag(3) << n; // dagify subexprs occurring > 3 times + TS_ASSERT(sstr.str() == "(f(f(f(x))) = x) OR (f(f(f(x))) = y) OR (f(x) = g(x)) OR (x = y) OR (f(g(x)) = g(y))"); + Warning() << Node::setdepth(-1) << Node::setlanguage(language::output::LANG_CVC4) << Node::dag(2) << n << std::endl; + } + // This Test is designed to fail in a way that will cause a segfault, // so it is commented out. // This is for demonstrating what a certain type of user error looks like. -- 2.30.2