From 16e809f698060645812667925b3e0c4d403ee71a Mon Sep 17 00:00:00 2001 From: Tim King Date: Thu, 10 Nov 2016 15:22:49 -0800 Subject: [PATCH] Adding garbage collection of nodes with maxed out reference counts. --- src/expr/node_manager.cpp | 71 +++++++++++++++++++++++++++-- src/expr/node_manager.h | 61 +++++++++++++++++++------ src/expr/node_value.cpp | 18 ++++---- src/expr/node_value.h | 27 +++++++++-- test/unit/expr/expr_public.h | 2 +- test/unit/expr/node_manager_black.h | 15 ++++-- 6 files changed, 156 insertions(+), 38 deletions(-) diff --git a/src/expr/node_manager.cpp b/src/expr/node_manager.cpp index b07af0c14..1d54d0f9d 100644 --- a/src/expr/node_manager.cpp +++ b/src/expr/node_manager.cpp @@ -42,6 +42,8 @@ namespace CVC4 { CVC4_THREADLOCAL(NodeManager*) NodeManager::s_current = NULL; +namespace { + /** * This class sets it reference argument to true and ensures that it gets set * to false on destruction. This can be used to make sure a flag gets toggled @@ -82,6 +84,9 @@ struct NVReclaim { } }; +} // namespace + + NodeManager::NodeManager(ExprManager* exprManager) : d_options(new Options()), d_statisticsRegistry(new StatisticsRegistry()), @@ -169,7 +174,7 @@ NodeManager::~NodeManager() { for(unsigned i = 0; i < unsigned(kind::LAST_KIND); ++i) { d_operators[i] = Node::null(); } - + d_unique_vars.clear(); TypeNode dummy; @@ -178,7 +183,9 @@ NodeManager::~NodeManager() { d_rt_cache.d_children.clear(); d_rt_cache.d_data = dummy; - for( std::vector::iterator datatype_iter = d_ownedDatatypes.begin(), datatype_end = d_ownedDatatypes.end(); + for (std::vector::iterator + datatype_iter = d_ownedDatatypes.begin(), + datatype_end = d_ownedDatatypes.end(); datatype_iter != datatype_end; ++datatype_iter) { Datatype* datatype = *datatype_iter; delete datatype; @@ -186,8 +193,25 @@ NodeManager::~NodeManager() { d_ownedDatatypes.clear(); Assert(!d_attrManager->inGarbageCollection() ); - while(!d_zombies.empty()) { - reclaimZombies(); + + std::vector order = TopologicalSort(d_maxedOut); + d_maxedOut.clear(); + + while (!d_zombies.empty() || !order.empty()) { + if (d_zombies.empty()) { + // Delete the maxed out nodes in toplogical order once we know + // there are no additional zombies, or other nodes to worry about. + Assert(!order.empty()); + // We process these in reverse to reverse the topological order. + NodeValue* greatest_maxed_out = order.back(); + order.pop_back(); + Assert(greatest_maxed_out->HasMaximizedReferenceCount()); + Debug("gc") << "Force zombify " << greatest_maxed_out << std::endl; + greatest_maxed_out->d_rc = 0; + markForDeletion(greatest_maxed_out); + } else { + reclaimZombies(); + } } poolRemove( &expr::NodeValue::null() ); @@ -333,6 +357,45 @@ void NodeManager::reclaimZombies() { } }/* NodeManager::reclaimZombies() */ +std::vector NodeManager::TopologicalSort( + const std::vector& roots) { + std::vector order; + std::vector > stack; + NodeValueIDSet visited; + const NodeValueIDSet root_set(roots.begin(), roots.end()); + + for (size_t index = 0; index < roots.size(); index++) { + NodeValue* root = roots[index]; + if (visited.find(root) == visited.end()) { + stack.push_back(std::make_pair(false, root)); + } + while (!stack.empty()) { + NodeValue* current = stack.back().second; + const bool visited_children = stack.back().first; + Debug("gc") << "Topological sort " << current << " " << visited_children + << std::endl; + if (visited_children) { + if (root_set.find(current) != root_set.end()) { + order.push_back(current); + } + stack.pop_back(); + } else { + stack.back().first = true; + Assert(visited.count(current) == 0); + visited.insert(current); + for (int i = 0; i < current->getNumChildren(); ++i) { + expr::NodeValue* child = current->getChild(i); + if (visited.find(child) == visited.end()) { + stack.push_back(std::make_pair(false, child)); + } + } + } + } + } + Assert(order.size() == roots.size()); + return order; +} /* NodeManager::TopologicalSort() */ + TypeNode NodeManager::getType(TNode n, bool check) throw(TypeCheckingExceptionPrivate, AssertionException) { diff --git a/src/expr/node_manager.h b/src/expr/node_manager.h index aaffeb322..d85ff23d5 100644 --- a/src/expr/node_manager.h +++ b/src/expr/node_manager.h @@ -58,20 +58,24 @@ namespace expr { * to NodeManager events via NodeManager::subscribeEvents(this). */ class NodeManagerListener { -public: - virtual ~NodeManagerListener() { } - virtual void nmNotifyNewSort(TypeNode tn, uint32_t flags) { } - virtual void nmNotifyNewSortConstructor(TypeNode tn) { } - virtual void nmNotifyInstantiateSortConstructor(TypeNode ctor, TypeNode sort, uint32_t flags) { } - virtual void nmNotifyNewDatatypes(const std::vector& datatypes) { } - virtual void nmNotifyNewVar(TNode n, uint32_t flags) { } - virtual void nmNotifyNewSkolem(TNode n, const std::string& comment, uint32_t flags) { } + public: + virtual ~NodeManagerListener() {} + virtual void nmNotifyNewSort(TypeNode tn, uint32_t flags) {} + virtual void nmNotifyNewSortConstructor(TypeNode tn) {} + virtual void nmNotifyInstantiateSortConstructor(TypeNode ctor, TypeNode sort, + uint32_t flags) {} + virtual void nmNotifyNewDatatypes( + const std::vector& datatypes) {} + virtual void nmNotifyNewVar(TNode n, uint32_t flags) {} + virtual void nmNotifyNewSkolem(TNode n, const std::string& comment, + uint32_t flags) {} /** - * Notify a listener of a Node that's being GCed. If this function stores a reference + * Notify a listener of a Node that's being GCed. If this function stores a + * reference * to the Node somewhere, very bad things will happen. */ - virtual void nmNotifyDeleteNode(TNode n) { } -};/* class NodeManagerListener */ + virtual void nmNotifyDeleteNode(TNode n) {} +}; /* class NodeManagerListener */ class NodeManager { template friend class CVC4::NodeBuilder; @@ -96,7 +100,7 @@ class NodeManager { expr::NodeValuePoolEq> NodeValuePool; typedef __gnu_cxx::hash_set ZombieSet; + expr::NodeValueIDEquality> NodeValueIDSet; static CVC4_THREADLOCAL(NodeManager*) s_current; @@ -146,7 +150,13 @@ class NodeManager { * we might like to delete nodes in least-recently-used order. But * we also need to avoid processing a zombie twice. */ - ZombieSet d_zombies; + NodeValueIDSet d_zombies; + + /** + * NodeValues with maxed out reference counts. These live as long as the + * NodeManager. They have a custom deallocation procedure at the very end. + */ + std::vector d_maxedOut; /** * A set of operator singletons (w.r.t. to this NodeManager @@ -157,7 +167,7 @@ class NodeManager { * plusOperator->getConst(), you get kind::PLUS back. */ Node d_operators[kind::LAST_KIND]; - + /** unique vars per (Kind,Type) */ std::map< Kind, std::map< TypeNode, Node > > d_unique_vars; @@ -165,7 +175,7 @@ class NodeManager { * A list of subscribers for NodeManager events. */ std::vector d_listeners; - + /** A list of datatypes owned by this node manager. */ std::vector d_ownedDatatypes; @@ -277,6 +287,19 @@ class NodeManager { } } + /** + * Register a NodeValue as having a maxed out reference count. This NodeValue + * will live as long as its containing NodeManager. + */ + inline void markRefCountMaxedOut(expr::NodeValue* nv) { + Assert(nv->HasMaximizedReferenceCount()); + if(Debug.isOn("gc")) { + Debug("gc") << "marking node value " << nv + << " [" << nv->d_id << "]: as maxed out" << std::endl; + } + d_maxedOut.push_back(nv); + } + /** * Reclaim all zombies. */ @@ -287,6 +310,14 @@ class NodeManager { */ bool safeToReclaimZombies() const; + /** + * Returns a reverse topological sort of a list of NodeValues. The NodeValues + * must be valid and have ids. The NodeValues are not modified (including ref + * counts). + */ + static std::vector TopologicalSort( + const std::vector& roots); + /** * This template gives a mechanism to stack-allocate a NodeValue * with enough space for N children (where N is a compile-time diff --git a/src/expr/node_value.cpp b/src/expr/node_value.cpp index a40075ca9..40649ef2c 100644 --- a/src/expr/node_value.cpp +++ b/src/expr/node_value.cpp @@ -37,7 +37,8 @@ namespace expr { string NodeValue::toString() const { stringstream ss; - OutputLanguage outlang = (this == &null()) ? language::output::LANG_AUTO : options::outputLanguage(); + OutputLanguage outlang = (this == &null()) ? language::output::LANG_AUTO + : options::outputLanguage(); toStream(ss, -1, false, false, outlang); return ss.str(); } @@ -49,7 +50,8 @@ void NodeValue::toStream(std::ostream& out, int toDepth, bool types, size_t dag, // count, even just for printing. RefCountGuard guard(this); - Printer::getPrinter(language)->toStream(out, TNode(this), toDepth, types, dag); + Printer::getPrinter(language)->toStream(out, TNode(this), toDepth, types, + dag); } void NodeValue::printAst(std::ostream& out, int ind) const { @@ -58,14 +60,14 @@ void NodeValue::printAst(std::ostream& out, int ind) const { indent(out, ind); out << '('; out << getKind(); - if(getMetaKind() == kind::metakind::VARIABLE) { + if (getMetaKind() == kind::metakind::VARIABLE) { out << ' ' << getId(); - } else if(getMetaKind() == kind::metakind::CONSTANT) { + } else if (getMetaKind() == kind::metakind::CONSTANT) { out << ' '; kind::metakind::NodeValueConstPrinter::toStream(out, this); } else { - if(nv_begin() != nv_end()) { - for(const_nv_iterator child = nv_begin(); child != nv_end(); ++child) { + if (nv_begin() != nv_end()) { + for (const_nv_iterator child = nv_begin(); child != nv_end(); ++child) { out << std::endl; (*child)->printAst(out, ind + 1); } @@ -76,5 +78,5 @@ void NodeValue::printAst(std::ostream& out, int ind) const { out << ')'; } -}/* CVC4::expr namespace */ -}/* CVC4 namespace */ +} /* CVC4::expr namespace */ +} /* CVC4 namespace */ diff --git a/src/expr/node_value.h b/src/expr/node_value.h index b403d0c86..8f1df0fff 100644 --- a/src/expr/node_value.h +++ b/src/expr/node_value.h @@ -122,6 +122,9 @@ class NodeValue { void inc(); void dec(); + // Returns true if the reference count is maximized. + inline bool HasMaximizedReferenceCount() { return d_rc == MAX_RC; } + /** * Uninitializing constructor for NodeBuilder's use. */ @@ -294,14 +297,20 @@ public: private: + /** + * RAII guard that increases the reference count if the reference count to be > 0. + * Otherwise, this does nothing. This does not just increment the reference + * count to avoid maxing out the d_rc field. This is only for low level functions. + */ class RefCountGuard { NodeValue* d_nv; + bool d_increased; public: RefCountGuard(const NodeValue* nv) : d_nv(const_cast(nv)) { - // inc() - if(__builtin_expect( ( d_nv->d_rc < MAX_RC ), true )) { - ++d_nv->d_rc; + d_increased = (d_nv->d_rc == 0); + if(d_increased) { + d_nv->d_rc = 1; } } ~RefCountGuard() { @@ -310,7 +319,7 @@ private: // E.g., this can happen when debugging code calls the print // routines below. As RefCountGuards are scoped on the stack, // this should be fine---but not in multithreaded contexts! - if(__builtin_expect( ( d_nv->d_rc < MAX_RC ), true )) { + if(d_increased) { --d_nv->d_rc; } } @@ -415,6 +424,13 @@ inline void NodeValue::inc() { // FIXME multithreading if(__builtin_expect( ( d_rc < MAX_RC ), true )) { ++d_rc; + if(__builtin_expect( ( d_rc == MAX_RC ), false )) { + Assert(NodeManager::currentNM() != NULL, + "No current NodeManager on incrementing of NodeValue: " + "maybe a public CVC4 interface function is missing a " + "NodeManagerScope ?"); + NodeManager::currentNM()->markRefCountMaxedOut(this); + } } } @@ -425,7 +441,8 @@ inline void NodeValue::dec() { if(__builtin_expect( ( d_rc == 0 ), false )) { Assert(NodeManager::currentNM() != NULL, "No current NodeManager on destruction of NodeValue: " - "maybe a public CVC4 interface function is missing a NodeManagerScope ?"); + "maybe a public CVC4 interface function is missing a " + "NodeManagerScope ?"); NodeManager::currentNM()->markForDeletion(this); } } diff --git a/test/unit/expr/expr_public.h b/test/unit/expr/expr_public.h index ed772a471..cfdee8d37 100644 --- a/test/unit/expr/expr_public.h +++ b/test/unit/expr/expr_public.h @@ -277,7 +277,7 @@ public: TS_ASSERT(a_bool->getType(true) == d_em->booleanType()); TS_ASSERT(b_bool->getType(false) == d_em->booleanType()); TS_ASSERT(b_bool->getType(true) == d_em->booleanType()); - TS_ASSERT_THROWS(d_em->mkExpr(MULT,*a_bool,*b_bool).getType(true), + TS_ASSERT_THROWS(d_em->mkExpr(MULT,*a_bool,*b_bool).getType(true), TypeCheckingException); // These need better support for operators // TS_ASSERT(and_op->getType().isNull()); diff --git a/test/unit/expr/node_manager_black.h b/test/unit/expr/node_manager_black.h index 3958e0f2c..13d8084fa 100644 --- a/test/unit/expr/node_manager_black.h +++ b/test/unit/expr/node_manager_black.h @@ -19,6 +19,7 @@ #include #include +#include "base/output.h" #include "expr/node_manager.h" #include "expr/node_manager_attributes.h" #include "util/integer.h" @@ -308,12 +309,16 @@ public: std::vector vars; const unsigned int max = metakind::getUpperBoundForKind(AND); TypeNode boolType = d_nodeManager->booleanType(); - Node skolem = d_nodeManager->mkSkolem("i", boolType); - for( unsigned int i = 0; i <= max; ++i ) { - vars.push_back( skolem ); + Node skolem_i = d_nodeManager->mkSkolem("i", boolType); + Node skolem_j = d_nodeManager->mkSkolem("j", boolType); + Node andNode = skolem_i.andNode(skolem_j); + Node orNode = skolem_i.orNode(skolem_j); + while (vars.size() <= max) { + vars.push_back(andNode); + vars.push_back(skolem_j); + vars.push_back(orNode); } - TS_ASSERT_THROWS( d_nodeManager->mkNode(AND, vars), AssertionException ); + TS_ASSERT_THROWS(d_nodeManager->mkNode(AND, vars), AssertionException); #endif } - }; -- 2.30.2