[DRAT] DRAT data structure (#2767)
authorAlex Ozdemir <aozdemir@hmc.edu>
Sun, 6 Jan 2019 18:32:42 +0000 (19:32 +0100)
committerGitHub <noreply@github.com>
Sun, 6 Jan 2019 18:32:42 +0000 (19:32 +0100)
* Copied old DRAT data-structure files.

Next step: clean up the code, and adapt them to our current usage plans.

* Polished the DRAT class.

Notably, removed the idea of lazy-parsing, this is now just a DRAT
wrapper class.

More explicit about whether methods handle binary or text.

Better constructor patterns

* Added implementation of textual DRAT output

* reordered the DratInstruction structure.
* removed the public modifier from the above struct
* removed the operator << implementation for DratInstruction

* use emplace_back

* Addressing Yoni's first review

* Extracted "write literal in DIMACS format" idea as a function
* Replaced some spurious Debug streams with `os`. (they were left over
from an earlier refactor)
* Improved some documentation

* Removed aside about std::string

* Addressed Mathias' comments

Specifically
* SCREAMING_SNAKE_CASED enum variants.
* Extracted some common logic from two branches of a conditional.
* Cleaned out some undefined behavior from bit manipulation.

* Unit tests for binary DRAT parsing

* Added text output test

* s/white/black/ derp

src/CMakeLists.txt
src/proof/drat/drat_proof.cpp [new file with mode: 0644]
src/proof/drat/drat_proof.h [new file with mode: 0644]
test/unit/CMakeLists.txt
test/unit/proof/CMakeLists.txt [new file with mode: 0644]
test/unit/proof/drat_proof_black.h [new file with mode: 0644]

index a0d191f15ce2f2ef178d9596a54152234831d8c0..502db63f46fa7adbdd21f3ed040360c9a777f9fa 100644 (file)
@@ -133,6 +133,8 @@ libcvc4_add_sources(
   proof/clause_id.h
   proof/cnf_proof.cpp
   proof/cnf_proof.h
+  proof/drat/drat_proof.cpp
+  proof/drat/drat_proof.h
   proof/lemma_proof.cpp
   proof/lemma_proof.h
   proof/lfsc_proof_printer.cpp
diff --git a/src/proof/drat/drat_proof.cpp b/src/proof/drat/drat_proof.cpp
new file mode 100644 (file)
index 0000000..8168832
--- /dev/null
@@ -0,0 +1,246 @@
+/*********************                                                        */
+/*! \file drat_proof.cpp
+ ** \verbatim
+ ** Top contributors (to current version):
+ **   Alex Ozdemir
+ ** This file is part of the CVC4 project.
+ ** Copyright (c) 2009-2018 by the authors listed in the file AUTHORS
+ ** in the top-level source directory) and their institutional affiliations.
+ ** All rights reserved.  See the file COPYING in the top-level source
+ ** directory for licensing information.\endverbatim
+ **
+ ** \brief DRAT Proof Format
+ **
+ ** Defines deserialization for DRAT proofs.
+ **/
+
+#include "proof/drat/drat_proof.h"
+#include <bitset>
+
+namespace CVC4 {
+namespace proof {
+namespace drat {
+
+// helper functions for parsing the binary DRAT format.
+
+/**
+ * Parses a binary literal which starts at `start` and must not go beyond `end`
+ *
+ * Leaves the iterator one past the last byte that is a part of the clause.
+ *
+ * If the literal overruns `end`, then raises a `InvalidDratProofException`.
+ */
+SatLiteral parse_binary_literal(std::string::const_iterator& start,
+                                const std::string::const_iterator& proof_end)
+{
+  // lit is encoded as uint represented by a variable-length byte sequence
+  uint64_t literal_represented_as_uint = 0;
+  for (uint8_t shift = 0; start != proof_end; ++start, shift += 7)
+  {
+    // Check whether shift is so large that we're going to lose some
+    // information
+    if (shift + 7 >= 64)
+    {
+      throw InvalidDratProofException(
+          "While parsing a DRAT proof, encountered a literal that was too "
+          "long");
+    }
+    unsigned char byte = *start;
+    // The MSB of the byte is an indicator of whether the sequence continues
+    bool continued = (byte >> 7) & 1;
+    uint64_t numeric_part = byte & 0x7f;
+    literal_represented_as_uint |= numeric_part << shift;
+    if (!continued)
+    {
+      // LSB of `literal_represented_as_uint` indicates negation.
+      bool negated = literal_represented_as_uint & 1;
+      // Rest is the literal number
+      SatVariable var_number = literal_represented_as_uint >> 1;
+      ++start;
+      // Internal clauses start at 0, external ones start at 1.
+      return SatLiteral(var_number - 1, negated);
+    }
+  }
+  throw InvalidDratProofException(
+      "Literal in DRAT proof was not done when "
+      "EOF was encountered");
+}
+
+/**
+ * Parses a binary clause which starts at `start` and must not go beyond `end`
+ *
+ * Leaves the iterator one past the last byte that is a part of the clause.
+ * That is, one past the null byte.
+ *
+ * If the clause overruns `end`, then raises a `InvalidDratProofException`.
+ */
+SatClause parse_binary_clause(std::string::const_iterator& start,
+                              const std::string::const_iterator& proof_end)
+{
+  SatClause clause;
+  // A clause is a 0-terminated sequence of literals
+  while (start != proof_end)
+  {
+    // Is the clause done?
+    if (*start == 0)
+    {
+      ++start;
+      return clause;
+    }
+    else
+    {
+      // If not, parse another literal
+      clause.emplace_back(parse_binary_literal(start, proof_end));
+    }
+  }
+  // We've overrun the end of the byte stream.
+  throw InvalidDratProofException(
+      "Clause in DRAT proof was not done when "
+      "EOF was encountered");
+}
+
+/**
+ * Writes this SAT literal in the textual DIMACS format. i.e. as a non-zero
+ * integer.
+ *
+ * Since internally +0 and -0 are valid literals, we add one to each
+ * literal's number (SAT variable) when outputtting it.
+ *
+ * @param os the stream to write to
+ * @param l the literal to write
+ */
+void outputLiteralAsDimacs(std::ostream& os, SatLiteral l)
+{
+  if (l.isNegated())
+  {
+    os << '-';
+  }
+  // add 1 to  convert between internal literals and their DIMACS
+  // representaations.
+  os << l.getSatVariable() + 1;
+}
+
+// DratInstruction implementation
+
+DratInstruction::DratInstruction(DratInstructionKind kind, SatClause clause)
+    : d_kind(kind), d_clause(clause)
+{
+  // All intialized
+}
+
+void DratInstruction::outputAsText(std::ostream& os) const
+{
+  switch (d_kind)
+  {
+    case DratInstructionKind::ADDITION:
+    {
+      for (const SatLiteral& l : d_clause)
+      {
+        outputLiteralAsDimacs(os, l);
+        os << ' ';
+      }
+      os << '0' << std::endl;
+      break;
+    }
+    case DratInstructionKind::DELETION:
+    {
+      os << "d ";
+      for (const SatLiteral& l : d_clause)
+      {
+        outputLiteralAsDimacs(os, l);
+        os << ' ';
+      }
+      os << '0' << std::endl;
+      break;
+    }
+    default: { Unreachable("Unknown DRAT instruction kind");
+    }
+  }
+}
+
+// DratProof implementation
+
+DratProof::DratProof() : d_instructions() {}
+
+// See the "binary format" section of
+// https://www.cs.utexas.edu/~marijn/drat-trim/
+DratProof DratProof::fromBinary(const std::string& s)
+{
+  DratProof proof;
+  if (Debug.isOn("pf::drat"))
+  {
+    Debug("pf::drat") << "Parsing binary DRAT proof" << std::endl;
+    Debug("pf::drat") << "proof length: " << s.length() << " bytes"
+                      << std::endl;
+    Debug("pf::drat") << "proof as bytes: ";
+    for (char i : s)
+    {
+      if (i == 'a' || i == 'd')
+      {
+        Debug("pf::drat") << std::endl << "  " << std::bitset<8>(i);
+      }
+      else
+      {
+        Debug("pf::drat") << " " << std::bitset<8>(i);
+      }
+    }
+    Debug("pf::drat") << std::endl << "parsing proof..." << std::endl;
+  }
+
+  // For each instruction
+  for (auto i = s.cbegin(), end = s.cend(); i != end;)
+  {
+    switch (*i)
+    {
+      case 'a':
+      {
+        ++i;
+        proof.d_instructions.emplace_back(ADDITION,
+                                          parse_binary_clause(i, end));
+        break;
+      }
+      case 'd':
+      {
+        ++i;
+        proof.d_instructions.emplace_back(DELETION,
+                                          parse_binary_clause(i, end));
+        break;
+      }
+      default:
+      {
+        std::ostringstream s;
+        s << "Invalid instruction in Drat proof. Instruction bits: "
+          << std::bitset<8>(*i)
+          << ". Expected 'a' (01100001) or 'd' "
+             "(01100100).";
+        throw InvalidDratProofException(s.str());
+      }
+    }
+  }
+
+  if (Debug.isOn("pf::drat"))
+  {
+    Debug("pf::drat") << "Printing out DRAT in textual format:" << std::endl;
+    proof.outputAsText(Debug("pf::drat"));
+  }
+
+  return proof;
+};
+
+const std::vector<DratInstruction>& DratProof::getInstructions() const
+{
+  return d_instructions;
+};
+
+void DratProof::outputAsText(std::ostream& os) const
+{
+  for (const DratInstruction& instruction : d_instructions)
+  {
+    instruction.outputAsText(os);
+    os << "\n";
+  }
+};
+
+}  // namespace drat
+}  // namespace proof
+}  // namespace CVC4
diff --git a/src/proof/drat/drat_proof.h b/src/proof/drat/drat_proof.h
new file mode 100644 (file)
index 0000000..4bd4f4c
--- /dev/null
@@ -0,0 +1,128 @@
+/*********************                                                        */
+/*! \file drat_proof.h
+ ** \verbatim
+ ** Top contributors (to current version):
+ **   Alex Ozdemir
+ ** This file is part of the CVC4 project.
+ ** Copyright (c) 2009-2018 by the authors listed in the file AUTHORS
+ ** in the top-level source directory) and their institutional affiliations.
+ ** All rights reserved.  See the file COPYING in the top-level source
+ ** directory for licensing information.\endverbatim
+ **
+ ** \brief DRAT Proof Format
+ **
+ ** Declares C++ types that represent a DRAT proof.
+ ** Defines serialization for these types.
+ **
+ ** You can find an introduction to DRAT in the drat-trim paper:
+ **    http://www.cs.utexas.edu/~marijn/publications/drat-trim.pdf
+ **
+ **/
+
+#ifndef __CVC4__PROOF__DRAT__DRAT_PROOF_H
+#define __CVC4__PROOF__DRAT__DRAT_PROOF_H
+
+#include "cvc4_private.h"
+#include "prop/sat_solver.h"
+#include "prop/sat_solver_types.h"
+
+namespace CVC4 {
+namespace proof {
+namespace drat {
+
+using CVC4::prop::SatClause;
+using CVC4::prop::SatLiteral;
+using CVC4::prop::SatVariable;
+
+class CVC4_PUBLIC InvalidDratProofException : public CVC4::Exception
+{
+ public:
+  InvalidDratProofException() : Exception("Invalid DRAT Proof") {}
+
+  InvalidDratProofException(const std::string& msg) : Exception(msg) {}
+
+  InvalidDratProofException(const char* msg) : Exception(msg) {}
+}; /* class InvalidDratProofException */
+
+enum DratInstructionKind
+{
+  ADDITION,
+  DELETION
+};
+
+struct DratInstruction
+{
+  DratInstruction(DratInstructionKind kind, SatClause clause);
+
+  /**
+   * Write the DRAT instruction in textual format.
+   * The format is described in:
+   *    http://www.cs.utexas.edu/~marijn/publications/drat-trim.pdf
+   *
+   * @param os the stream to write to
+   */
+  void outputAsText(std::ostream& os) const;
+
+  DratInstructionKind d_kind;
+  SatClause d_clause;
+};
+
+class DratProof
+{
+ public:
+  DratProof(const DratProof&) = default;
+
+  DratProof(DratProof&&) = default;
+
+  ~DratProof() = default;
+
+  /**
+   * Parses a DRAT proof from the **binary format**.
+   * The format is described at:
+   *    https://www.cs.utexas.edu/~marijn/drat-trim/#contact
+   *
+   * What do the standard authors mean by the format being "binary"?
+   * They just mean that proofs in this format should be understood as
+   * sequences of bytes, not sequences of ASCII/Unicode/your favorite character
+   * set characters.
+   *
+   * @param binaryProof a string containing the bytes of the binary proof.
+   *        Even though the proof isn't text, it's safe to store it in a string
+   *        because C++ strings don't make any gaurantees about the encoding of
+   *        their contents. This makes them (effectively) just byte sequences.
+   *
+   * @return the parsed proof
+   */
+  static DratProof fromBinary(const std::string& binaryProof);
+
+  /**
+   * @return The instructions in this proof
+   */
+  const std::vector<DratInstruction>& getInstructions() const;
+
+  /**
+   * Write the DRAT proof in textual format.
+   * The format is described in:
+   *    http://www.cs.utexas.edu/~marijn/publications/drat-trim.pdf
+   *
+   * @param os the stream to write to
+   */
+  void outputAsText(std::ostream& os) const;
+
+ private:
+  /**
+   * Create an DRAT proof with no instructions.
+   */
+  DratProof();
+
+  /**
+   * The instructions of the DRAT proof.
+   */
+  std::vector<DratInstruction> d_instructions;
+};
+
+}  // namespace drat
+}  // namespace proof
+}  // namespace CVC4
+
+#endif  // __CVC4__PROOF__DRAT__DRAT_PROOF_H
index 4ba7205a5c2448bbf6df7de4c70dbef519c938d7..4659f2647ea7b8bcc3501c41ea22bd1b0b588b9d 100644 (file)
@@ -89,5 +89,6 @@ add_subdirectory(expr)
 add_subdirectory(main)
 add_subdirectory(parser)
 add_subdirectory(prop)
+add_subdirectory(proof)
 add_subdirectory(theory)
 add_subdirectory(util)
diff --git a/test/unit/proof/CMakeLists.txt b/test/unit/proof/CMakeLists.txt
new file mode 100644 (file)
index 0000000..9f462f7
--- /dev/null
@@ -0,0 +1,4 @@
+#-----------------------------------------------------------------------------#
+# Add unit tests
+
+cvc4_add_unit_test_black(drat_proof_black proof)
diff --git a/test/unit/proof/drat_proof_black.h b/test/unit/proof/drat_proof_black.h
new file mode 100644 (file)
index 0000000..3d8a1b5
--- /dev/null
@@ -0,0 +1,148 @@
+/*********************                                                        */
+/*! \file drat_proof_black.h
+ ** \verbatim
+ ** Top contributors (to current version):
+ **   Alex Ozdemir
+ ** This file is part of the CVC4 project.
+ ** Copyright (c) 2009-2018 by the authors listed in the file AUTHORS
+ ** in the top-level source directory) and their institutional affiliations.
+ ** All rights reserved.  See the file COPYING in the top-level source
+ ** directory for licensing information.\endverbatim
+ **
+ ** \brief Black box testing of the DRAT proof class
+ **
+ ** In particular, tests DRAT binary parsing.
+ **/
+
+
+#include <cxxtest/TestSuite.h>
+
+#include "proof/drat/drat_proof.h"
+
+using namespace CVC4::proof::drat;
+
+class DratProofBlack : public CxxTest::TestSuite
+{
+ public:
+  void setUp() override {}
+  void tearDown() override {}
+
+  void testParseOneAdd();
+  void testParseOneMediumAdd();
+  void testParseOneBigAdd();
+  void testParseLiteralIsTooBig();
+  void testParseLiteralOverflow();
+  void testParseClauseOverflow();
+
+  void testParseTwo();
+
+  void testOutputTwoAsText();
+};
+
+void DratProofBlack::testParseOneAdd()
+{
+  // a 1;
+  std::string input("a\x02\x00", 3);
+  DratProof proof = DratProof::fromBinary(input);
+
+
+  TS_ASSERT_EQUALS(proof.getInstructions()[0].d_kind, ADDITION);
+  TS_ASSERT_EQUALS(proof.getInstructions()[0].d_clause.size(), 1);
+  TS_ASSERT_EQUALS(proof.getInstructions()[0].d_clause[0], SatLiteral(0, false));
+}
+
+void DratProofBlack::testParseOneMediumAdd()
+{
+  // a -255;
+  std::string input("a\xff\x01\x00", 4);
+  DratProof proof = DratProof::fromBinary(input);
+
+
+  TS_ASSERT_EQUALS(proof.getInstructions()[0].d_kind, ADDITION);
+  TS_ASSERT_EQUALS(proof.getInstructions()[0].d_clause.size(), 1);
+  TS_ASSERT_EQUALS(proof.getInstructions()[0].d_clause[0], SatLiteral(126, true));
+}
+
+void DratProofBlack::testParseOneBigAdd()
+{
+  // a -2199023255551;
+  std::string input("a\xff\xff\xff\xff\xff\x7f\x00", 8);
+  DratProof proof = DratProof::fromBinary(input);
+
+
+  TS_ASSERT_EQUALS(proof.getInstructions()[0].d_kind, ADDITION);
+  TS_ASSERT_EQUALS(proof.getInstructions()[0].d_clause.size(), 1);
+  TS_ASSERT_EQUALS(proof.getInstructions()[0].d_clause[0], SatLiteral(2199023255550, true));
+}
+
+void DratProofBlack::testParseLiteralIsTooBig()
+{
+  std::string input("a\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x7f\x00", 14);
+  TS_ASSERT_THROWS(DratProof::fromBinary(input), InvalidDratProofException);
+}
+
+void DratProofBlack::testParseLiteralOverflow()
+{
+  std::string input("a\x80", 2);
+  TS_ASSERT_THROWS(DratProof::fromBinary(input), InvalidDratProofException);
+}
+
+void DratProofBlack::testParseClauseOverflow()
+{
+  std::string input("a\x80\x01", 3);
+  TS_ASSERT_THROWS(DratProof::fromBinary(input), InvalidDratProofException);
+}
+
+void DratProofBlack::testParseTwo()
+{
+  // d -63 -8193
+  // 129 -8191
+  std::string input("\x64\x7f\x83\x80\x01\x00\x61\x82\x02\xff\x7f\x00", 12);
+  DratProof proof = DratProof::fromBinary(input);
+
+
+  TS_ASSERT_EQUALS(proof.getInstructions()[0].d_kind, DELETION);
+  TS_ASSERT_EQUALS(proof.getInstructions()[0].d_clause.size(), 2);
+  TS_ASSERT_EQUALS(proof.getInstructions()[0].d_clause[0], SatLiteral(62, true));
+  TS_ASSERT_EQUALS(proof.getInstructions()[0].d_clause[1], SatLiteral(8192, true));
+
+  TS_ASSERT_EQUALS(proof.getInstructions()[1].d_kind, ADDITION);
+  TS_ASSERT_EQUALS(proof.getInstructions()[1].d_clause.size(), 2);
+  TS_ASSERT_EQUALS(proof.getInstructions()[1].d_clause[0], SatLiteral(128, false));
+  TS_ASSERT_EQUALS(proof.getInstructions()[1].d_clause[1], SatLiteral(8190, true));
+}
+
+void DratProofBlack::testOutputTwoAsText()
+{
+  // d -63 -8193
+  // 129 -8191
+  std::string input("\x64\x7f\x83\x80\x01\x00\x61\x82\x02\xff\x7f\x00", 12);
+  DratProof proof = DratProof::fromBinary(input);
+
+  std::ostringstream output;
+  proof.outputAsText(output);
+
+  std::istringstream tokens(output.str());
+  std::string token;
+
+  tokens >> token;
+  TS_ASSERT_EQUALS(token, "d");
+
+  tokens >> token;
+  TS_ASSERT_EQUALS(token, "-63");
+
+  tokens >> token;
+  TS_ASSERT_EQUALS(token, "-8193");
+
+  tokens >> token;
+  TS_ASSERT_EQUALS(token, "0");
+
+  tokens >> token;
+  TS_ASSERT_EQUALS(token, "129");
+
+  tokens >> token;
+  TS_ASSERT_EQUALS(token, "-8191");
+
+  tokens >> token;
+  TS_ASSERT_EQUALS(token, "0");
+}