std::vector<SubCircuit::Solver::MineResult> results;
mySolver.mine(results, minNodes, maxNodes, minMatches);
-The mine() method has an optional fifth parameter that limits the number
-of matches counted in one graph. This can be useful when mining for circuits
-that are found in at least a number of graphs. E.g. the following call
-would find all subcircuits with 5 nodes that are found in at least 7 of
-the registered graphs:
+The mine() method has an optional fifth parameter that limits the number of
+matches counted in one graph. This can be useful when mining for circuits that
+are found in at least a number of graphs. E.g. the following call would find
+all subcircuits with 5 nodes that are found in at least 7 of the registered
+graphs:
mySolver.mine(results, 5, 5, 7, 1);
Note that this miner is not very efficient and therefore its use is not
-recommended for large circuits.
+recommended for large circuits. Also note that the miner is working under the
+assumption that subgraph isomorphism is bidirectional. This is not the case in
+circuits with gates with shorted pins. This can result in undetected frequent
+subcircuits in some corner cases.
Debugging
verbose = backupVerbose;
}
- int testForMining(std::vector<Solver::MineResult> &results, std::set<NodeSet> &usedSets, std::vector<std::set<NodeSet>> &nextPool, NodeSet &testSet,
+ int testForMining(std::vector<Solver::MineResult> &results, std::set<NodeSet> &usedSets, std::set<NodeSet> &nextPool, NodeSet &testSet,
const std::string &graphId, const Graph &graph, int minNodes, int minMatches, int limitMatchesPerGraph)
{
// printf("test: %s\n", testSet.to_string().c_str());
// printf("match: %s%s\n", resultSet.to_string().c_str(), usedSets.count(resultSet) > 0 ? " (dup)" : "");
+#if 0
if (usedSets.count(resultSet) > 0) {
- // FIXME: assert(thisNodeSetSet.count(resultSet) > 0);
+ // Because of shorted pins isomorphisim is not always bidirectional!
+ //
+ // This means that the following assert is not true in all cases and subgraph A might
+ // show up in the matches for subgraph B but not vice versa... This also means that the
+ // order in which subgraphs are processed has an impact on the results set.
+ //
+ assert(thisNodeSetSet.count(resultSet) > 0);
continue;
}
+#else
+ if (thisNodeSetSet.count(resultSet) > 0)
+ continue;
+#endif
usedSets.insert(resultSet);
thisNodeSetSet.insert(resultSet);
results.push_back(result);
}
- nextPool.push_back(thisNodeSetSet);
+ nextPool.insert(thisNodeSetSet.begin(), thisNodeSetSet.end());
return matches;
}
- void findNodePairs(std::vector<Solver::MineResult> &results, std::vector<std::set<NodeSet>> &nodePairs, int minNodes, int minMatches, int limitMatchesPerGraph)
+ void findNodePairs(std::vector<Solver::MineResult> &results, std::set<NodeSet> &nodePairs, int minNodes, int minMatches, int limitMatchesPerGraph)
{
+ int groupCounter = 0;
std::set<NodeSet> usedPairs;
+ nodePairs.clear();
if (verbose)
- printf("\nFind frequent node pairs:\n");
+ printf("\nMining for frequent node pairs:\n");
for (auto &graph_it : graphData)
for (int node1 = 0; node1 < int(graph_it.second.graph.nodes.size()); node1++)
if (verbose)
printf("Pair %s[%s,%s] -> %d%s\n", graphId.c_str(), graph.nodes[node1].nodeId.c_str(),
- graph.nodes[node2].nodeId.c_str(), matches, matches < minMatches ? " *min*" : "");
+ graph.nodes[node2].nodeId.c_str(), matches, matches < minMatches ? " *purge*" : "");
+
+ if (minMatches <= matches)
+ groupCounter++;
}
if (verbose)
- printf("found %d.\n", int(nodePairs.size()));
+ printf("Found a total of %d subgraphs in %d groups.\n", int(nodePairs.size()), groupCounter);
}
- void findNextPool(std::vector<Solver::MineResult> &results, std::vector<std::set<NodeSet>> &pool,
+ void findNextPool(std::vector<Solver::MineResult> &results, std::set<NodeSet> &pool,
int oldSetSize, int increment, int minNodes, int minMatches, int limitMatchesPerGraph)
{
- std::vector<std::set<NodeSet>> nextPool;
+ int groupCounter = 0;
std::map<std::string, std::vector<const NodeSet*>> poolPerGraph;
+ std::set<NodeSet> nextPool;
- for (auto &i1 : pool)
- for (auto &i2 : i1)
- poolPerGraph[i2.graphId].push_back(&i2);
+ for (auto &it : pool)
+ poolPerGraph[it.graphId].push_back(&it);
if (verbose)
- printf("\nFind frequent subcircuits of size %d using increment %d:\n", oldSetSize+increment, increment);
+ printf("\nMining for frequent subcircuits of size %d using increment %d:\n", oldSetSize+increment, increment);
std::set<NodeSet> usedSets;
for (auto &it : poolPerGraph)
printf("%s%s", first ? "" : ",", graph.nodes[nodeIdx].nodeId.c_str());
first = false;
}
- printf("] -> %d%s\n", matches, matches < minMatches ? " *min*" : "");
+ printf("] -> %d%s\n", matches, matches < minMatches ? " *purge*" : "");
}
+
+ if (minMatches <= matches)
+ groupCounter++;
}
- if (verbose)
- printf("found %d.\n", int(nextPool.size()));
pool.swap(nextPool);
+
+ if (verbose)
+ printf("Found a total of %d subgraphs in %d groups.\n", int(pool.size()), groupCounter);
}
- // interface to the public Solver class
+ // interface to the public solver class
protected:
SolverWorker(Solver *userSolver) : userSolver(userSolver), verbose(false)
void mine(std::vector<Solver::MineResult> &results, int minNodes, int maxNodes, int minMatches, int limitMatchesPerGraph)
{
int nodeSetSize = 2;
- std::vector<std::set<NodeSet>> pool;
+ std::set<NodeSet> pool;
findNodePairs(results, pool, minNodes, minMatches, limitMatchesPerGraph);
while ((maxNodes < 0 || nodeSetSize < maxNodes) && pool.size() > 0)