From: Alex Ozdemir Date: Sun, 6 Jan 2019 18:32:42 +0000 (+0100) Subject: [DRAT] DRAT data structure (#2767) X-Git-Tag: cvc5-1.0.0~4300 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=610952322417e3758f2b62300f618721c269b2b3;p=cvc5.git [DRAT] DRAT data structure (#2767) * Copied old DRAT data-structure files. Next step: clean up the code, and adapt them to our current usage plans. * Polished the DRAT class. Notably, removed the idea of lazy-parsing, this is now just a DRAT wrapper class. More explicit about whether methods handle binary or text. Better constructor patterns * Added implementation of textual DRAT output * reordered the DratInstruction structure. * removed the public modifier from the above struct * removed the operator << implementation for DratInstruction * use emplace_back * Addressing Yoni's first review * Extracted "write literal in DIMACS format" idea as a function * Replaced some spurious Debug streams with `os`. (they were left over from an earlier refactor) * Improved some documentation * Removed aside about std::string * Addressed Mathias' comments Specifically * SCREAMING_SNAKE_CASED enum variants. * Extracted some common logic from two branches of a conditional. * Cleaned out some undefined behavior from bit manipulation. * Unit tests for binary DRAT parsing * Added text output test * s/white/black/ derp --- diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a0d191f15..502db63f4 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -133,6 +133,8 @@ libcvc4_add_sources( proof/clause_id.h proof/cnf_proof.cpp proof/cnf_proof.h + proof/drat/drat_proof.cpp + proof/drat/drat_proof.h proof/lemma_proof.cpp proof/lemma_proof.h proof/lfsc_proof_printer.cpp diff --git a/src/proof/drat/drat_proof.cpp b/src/proof/drat/drat_proof.cpp new file mode 100644 index 000000000..81688321e --- /dev/null +++ b/src/proof/drat/drat_proof.cpp @@ -0,0 +1,246 @@ +/********************* */ +/*! \file drat_proof.cpp + ** \verbatim + ** Top contributors (to current version): + ** Alex Ozdemir + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2018 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief DRAT Proof Format + ** + ** Defines deserialization for DRAT proofs. + **/ + +#include "proof/drat/drat_proof.h" +#include + +namespace CVC4 { +namespace proof { +namespace drat { + +// helper functions for parsing the binary DRAT format. + +/** + * Parses a binary literal which starts at `start` and must not go beyond `end` + * + * Leaves the iterator one past the last byte that is a part of the clause. + * + * If the literal overruns `end`, then raises a `InvalidDratProofException`. + */ +SatLiteral parse_binary_literal(std::string::const_iterator& start, + const std::string::const_iterator& proof_end) +{ + // lit is encoded as uint represented by a variable-length byte sequence + uint64_t literal_represented_as_uint = 0; + for (uint8_t shift = 0; start != proof_end; ++start, shift += 7) + { + // Check whether shift is so large that we're going to lose some + // information + if (shift + 7 >= 64) + { + throw InvalidDratProofException( + "While parsing a DRAT proof, encountered a literal that was too " + "long"); + } + unsigned char byte = *start; + // The MSB of the byte is an indicator of whether the sequence continues + bool continued = (byte >> 7) & 1; + uint64_t numeric_part = byte & 0x7f; + literal_represented_as_uint |= numeric_part << shift; + if (!continued) + { + // LSB of `literal_represented_as_uint` indicates negation. + bool negated = literal_represented_as_uint & 1; + // Rest is the literal number + SatVariable var_number = literal_represented_as_uint >> 1; + ++start; + // Internal clauses start at 0, external ones start at 1. + return SatLiteral(var_number - 1, negated); + } + } + throw InvalidDratProofException( + "Literal in DRAT proof was not done when " + "EOF was encountered"); +} + +/** + * Parses a binary clause which starts at `start` and must not go beyond `end` + * + * Leaves the iterator one past the last byte that is a part of the clause. + * That is, one past the null byte. + * + * If the clause overruns `end`, then raises a `InvalidDratProofException`. + */ +SatClause parse_binary_clause(std::string::const_iterator& start, + const std::string::const_iterator& proof_end) +{ + SatClause clause; + // A clause is a 0-terminated sequence of literals + while (start != proof_end) + { + // Is the clause done? + if (*start == 0) + { + ++start; + return clause; + } + else + { + // If not, parse another literal + clause.emplace_back(parse_binary_literal(start, proof_end)); + } + } + // We've overrun the end of the byte stream. + throw InvalidDratProofException( + "Clause in DRAT proof was not done when " + "EOF was encountered"); +} + +/** + * Writes this SAT literal in the textual DIMACS format. i.e. as a non-zero + * integer. + * + * Since internally +0 and -0 are valid literals, we add one to each + * literal's number (SAT variable) when outputtting it. + * + * @param os the stream to write to + * @param l the literal to write + */ +void outputLiteralAsDimacs(std::ostream& os, SatLiteral l) +{ + if (l.isNegated()) + { + os << '-'; + } + // add 1 to convert between internal literals and their DIMACS + // representaations. + os << l.getSatVariable() + 1; +} + +// DratInstruction implementation + +DratInstruction::DratInstruction(DratInstructionKind kind, SatClause clause) + : d_kind(kind), d_clause(clause) +{ + // All intialized +} + +void DratInstruction::outputAsText(std::ostream& os) const +{ + switch (d_kind) + { + case DratInstructionKind::ADDITION: + { + for (const SatLiteral& l : d_clause) + { + outputLiteralAsDimacs(os, l); + os << ' '; + } + os << '0' << std::endl; + break; + } + case DratInstructionKind::DELETION: + { + os << "d "; + for (const SatLiteral& l : d_clause) + { + outputLiteralAsDimacs(os, l); + os << ' '; + } + os << '0' << std::endl; + break; + } + default: { Unreachable("Unknown DRAT instruction kind"); + } + } +} + +// DratProof implementation + +DratProof::DratProof() : d_instructions() {} + +// See the "binary format" section of +// https://www.cs.utexas.edu/~marijn/drat-trim/ +DratProof DratProof::fromBinary(const std::string& s) +{ + DratProof proof; + if (Debug.isOn("pf::drat")) + { + Debug("pf::drat") << "Parsing binary DRAT proof" << std::endl; + Debug("pf::drat") << "proof length: " << s.length() << " bytes" + << std::endl; + Debug("pf::drat") << "proof as bytes: "; + for (char i : s) + { + if (i == 'a' || i == 'd') + { + Debug("pf::drat") << std::endl << " " << std::bitset<8>(i); + } + else + { + Debug("pf::drat") << " " << std::bitset<8>(i); + } + } + Debug("pf::drat") << std::endl << "parsing proof..." << std::endl; + } + + // For each instruction + for (auto i = s.cbegin(), end = s.cend(); i != end;) + { + switch (*i) + { + case 'a': + { + ++i; + proof.d_instructions.emplace_back(ADDITION, + parse_binary_clause(i, end)); + break; + } + case 'd': + { + ++i; + proof.d_instructions.emplace_back(DELETION, + parse_binary_clause(i, end)); + break; + } + default: + { + std::ostringstream s; + s << "Invalid instruction in Drat proof. Instruction bits: " + << std::bitset<8>(*i) + << ". Expected 'a' (01100001) or 'd' " + "(01100100)."; + throw InvalidDratProofException(s.str()); + } + } + } + + if (Debug.isOn("pf::drat")) + { + Debug("pf::drat") << "Printing out DRAT in textual format:" << std::endl; + proof.outputAsText(Debug("pf::drat")); + } + + return proof; +}; + +const std::vector& DratProof::getInstructions() const +{ + return d_instructions; +}; + +void DratProof::outputAsText(std::ostream& os) const +{ + for (const DratInstruction& instruction : d_instructions) + { + instruction.outputAsText(os); + os << "\n"; + } +}; + +} // namespace drat +} // namespace proof +} // namespace CVC4 diff --git a/src/proof/drat/drat_proof.h b/src/proof/drat/drat_proof.h new file mode 100644 index 000000000..4bd4f4c04 --- /dev/null +++ b/src/proof/drat/drat_proof.h @@ -0,0 +1,128 @@ +/********************* */ +/*! \file drat_proof.h + ** \verbatim + ** Top contributors (to current version): + ** Alex Ozdemir + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2018 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief DRAT Proof Format + ** + ** Declares C++ types that represent a DRAT proof. + ** Defines serialization for these types. + ** + ** You can find an introduction to DRAT in the drat-trim paper: + ** http://www.cs.utexas.edu/~marijn/publications/drat-trim.pdf + ** + **/ + +#ifndef __CVC4__PROOF__DRAT__DRAT_PROOF_H +#define __CVC4__PROOF__DRAT__DRAT_PROOF_H + +#include "cvc4_private.h" +#include "prop/sat_solver.h" +#include "prop/sat_solver_types.h" + +namespace CVC4 { +namespace proof { +namespace drat { + +using CVC4::prop::SatClause; +using CVC4::prop::SatLiteral; +using CVC4::prop::SatVariable; + +class CVC4_PUBLIC InvalidDratProofException : public CVC4::Exception +{ + public: + InvalidDratProofException() : Exception("Invalid DRAT Proof") {} + + InvalidDratProofException(const std::string& msg) : Exception(msg) {} + + InvalidDratProofException(const char* msg) : Exception(msg) {} +}; /* class InvalidDratProofException */ + +enum DratInstructionKind +{ + ADDITION, + DELETION +}; + +struct DratInstruction +{ + DratInstruction(DratInstructionKind kind, SatClause clause); + + /** + * Write the DRAT instruction in textual format. + * The format is described in: + * http://www.cs.utexas.edu/~marijn/publications/drat-trim.pdf + * + * @param os the stream to write to + */ + void outputAsText(std::ostream& os) const; + + DratInstructionKind d_kind; + SatClause d_clause; +}; + +class DratProof +{ + public: + DratProof(const DratProof&) = default; + + DratProof(DratProof&&) = default; + + ~DratProof() = default; + + /** + * Parses a DRAT proof from the **binary format**. + * The format is described at: + * https://www.cs.utexas.edu/~marijn/drat-trim/#contact + * + * What do the standard authors mean by the format being "binary"? + * They just mean that proofs in this format should be understood as + * sequences of bytes, not sequences of ASCII/Unicode/your favorite character + * set characters. + * + * @param binaryProof a string containing the bytes of the binary proof. + * Even though the proof isn't text, it's safe to store it in a string + * because C++ strings don't make any gaurantees about the encoding of + * their contents. This makes them (effectively) just byte sequences. + * + * @return the parsed proof + */ + static DratProof fromBinary(const std::string& binaryProof); + + /** + * @return The instructions in this proof + */ + const std::vector& getInstructions() const; + + /** + * Write the DRAT proof in textual format. + * The format is described in: + * http://www.cs.utexas.edu/~marijn/publications/drat-trim.pdf + * + * @param os the stream to write to + */ + void outputAsText(std::ostream& os) const; + + private: + /** + * Create an DRAT proof with no instructions. + */ + DratProof(); + + /** + * The instructions of the DRAT proof. + */ + std::vector d_instructions; +}; + +} // namespace drat +} // namespace proof +} // namespace CVC4 + +#endif // __CVC4__PROOF__DRAT__DRAT_PROOF_H diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt index 4ba7205a5..4659f2647 100644 --- a/test/unit/CMakeLists.txt +++ b/test/unit/CMakeLists.txt @@ -89,5 +89,6 @@ add_subdirectory(expr) add_subdirectory(main) add_subdirectory(parser) add_subdirectory(prop) +add_subdirectory(proof) add_subdirectory(theory) add_subdirectory(util) diff --git a/test/unit/proof/CMakeLists.txt b/test/unit/proof/CMakeLists.txt new file mode 100644 index 000000000..9f462f756 --- /dev/null +++ b/test/unit/proof/CMakeLists.txt @@ -0,0 +1,4 @@ +#-----------------------------------------------------------------------------# +# Add unit tests + +cvc4_add_unit_test_black(drat_proof_black proof) diff --git a/test/unit/proof/drat_proof_black.h b/test/unit/proof/drat_proof_black.h new file mode 100644 index 000000000..3d8a1b5e6 --- /dev/null +++ b/test/unit/proof/drat_proof_black.h @@ -0,0 +1,148 @@ +/********************* */ +/*! \file drat_proof_black.h + ** \verbatim + ** Top contributors (to current version): + ** Alex Ozdemir + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2018 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Black box testing of the DRAT proof class + ** + ** In particular, tests DRAT binary parsing. + **/ + + +#include + +#include "proof/drat/drat_proof.h" + +using namespace CVC4::proof::drat; + +class DratProofBlack : public CxxTest::TestSuite +{ + public: + void setUp() override {} + void tearDown() override {} + + void testParseOneAdd(); + void testParseOneMediumAdd(); + void testParseOneBigAdd(); + void testParseLiteralIsTooBig(); + void testParseLiteralOverflow(); + void testParseClauseOverflow(); + + void testParseTwo(); + + void testOutputTwoAsText(); +}; + +void DratProofBlack::testParseOneAdd() +{ + // a 1; + std::string input("a\x02\x00", 3); + DratProof proof = DratProof::fromBinary(input); + + + TS_ASSERT_EQUALS(proof.getInstructions()[0].d_kind, ADDITION); + TS_ASSERT_EQUALS(proof.getInstructions()[0].d_clause.size(), 1); + TS_ASSERT_EQUALS(proof.getInstructions()[0].d_clause[0], SatLiteral(0, false)); +} + +void DratProofBlack::testParseOneMediumAdd() +{ + // a -255; + std::string input("a\xff\x01\x00", 4); + DratProof proof = DratProof::fromBinary(input); + + + TS_ASSERT_EQUALS(proof.getInstructions()[0].d_kind, ADDITION); + TS_ASSERT_EQUALS(proof.getInstructions()[0].d_clause.size(), 1); + TS_ASSERT_EQUALS(proof.getInstructions()[0].d_clause[0], SatLiteral(126, true)); +} + +void DratProofBlack::testParseOneBigAdd() +{ + // a -2199023255551; + std::string input("a\xff\xff\xff\xff\xff\x7f\x00", 8); + DratProof proof = DratProof::fromBinary(input); + + + TS_ASSERT_EQUALS(proof.getInstructions()[0].d_kind, ADDITION); + TS_ASSERT_EQUALS(proof.getInstructions()[0].d_clause.size(), 1); + TS_ASSERT_EQUALS(proof.getInstructions()[0].d_clause[0], SatLiteral(2199023255550, true)); +} + +void DratProofBlack::testParseLiteralIsTooBig() +{ + std::string input("a\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x7f\x00", 14); + TS_ASSERT_THROWS(DratProof::fromBinary(input), InvalidDratProofException); +} + +void DratProofBlack::testParseLiteralOverflow() +{ + std::string input("a\x80", 2); + TS_ASSERT_THROWS(DratProof::fromBinary(input), InvalidDratProofException); +} + +void DratProofBlack::testParseClauseOverflow() +{ + std::string input("a\x80\x01", 3); + TS_ASSERT_THROWS(DratProof::fromBinary(input), InvalidDratProofException); +} + +void DratProofBlack::testParseTwo() +{ + // d -63 -8193 + // 129 -8191 + std::string input("\x64\x7f\x83\x80\x01\x00\x61\x82\x02\xff\x7f\x00", 12); + DratProof proof = DratProof::fromBinary(input); + + + TS_ASSERT_EQUALS(proof.getInstructions()[0].d_kind, DELETION); + TS_ASSERT_EQUALS(proof.getInstructions()[0].d_clause.size(), 2); + TS_ASSERT_EQUALS(proof.getInstructions()[0].d_clause[0], SatLiteral(62, true)); + TS_ASSERT_EQUALS(proof.getInstructions()[0].d_clause[1], SatLiteral(8192, true)); + + TS_ASSERT_EQUALS(proof.getInstructions()[1].d_kind, ADDITION); + TS_ASSERT_EQUALS(proof.getInstructions()[1].d_clause.size(), 2); + TS_ASSERT_EQUALS(proof.getInstructions()[1].d_clause[0], SatLiteral(128, false)); + TS_ASSERT_EQUALS(proof.getInstructions()[1].d_clause[1], SatLiteral(8190, true)); +} + +void DratProofBlack::testOutputTwoAsText() +{ + // d -63 -8193 + // 129 -8191 + std::string input("\x64\x7f\x83\x80\x01\x00\x61\x82\x02\xff\x7f\x00", 12); + DratProof proof = DratProof::fromBinary(input); + + std::ostringstream output; + proof.outputAsText(output); + + std::istringstream tokens(output.str()); + std::string token; + + tokens >> token; + TS_ASSERT_EQUALS(token, "d"); + + tokens >> token; + TS_ASSERT_EQUALS(token, "-63"); + + tokens >> token; + TS_ASSERT_EQUALS(token, "-8193"); + + tokens >> token; + TS_ASSERT_EQUALS(token, "0"); + + tokens >> token; + TS_ASSERT_EQUALS(token, "129"); + + tokens >> token; + TS_ASSERT_EQUALS(token, "-8191"); + + tokens >> token; + TS_ASSERT_EQUALS(token, "0"); +}