From 35637c2a760c35ca1c61edc276875cd91df0ab11 Mon Sep 17 00:00:00 2001
From: =?utf8?q?Dejan=20Jovanovi=C4=87?= <dejan.jovanovic@gmail.com>
Date: Sat, 14 Jul 2012 13:07:19 +0000
Subject: [PATCH] an example that uses bitvectors to simulate sha1 computation
 and dumps an smt problem corresponding to the hash-inversion problem

---
 examples/Makefile.am         |   2 +-
 examples/hashsmt/Makefile    |   4 +
 examples/hashsmt/Makefile.am |  21 ++++
 examples/hashsmt/sha1.hpp    | 206 +++++++++++++++++++++++++++++++++++
 examples/hashsmt/sha1smt.cpp |  88 +++++++++++++++
 examples/hashsmt/word.cpp    | 143 ++++++++++++++++++++++++
 examples/hashsmt/word.h      | 107 ++++++++++++++++++
 7 files changed, 570 insertions(+), 1 deletion(-)
 create mode 100644 examples/hashsmt/Makefile
 create mode 100644 examples/hashsmt/Makefile.am
 create mode 100644 examples/hashsmt/sha1.hpp
 create mode 100644 examples/hashsmt/sha1smt.cpp
 create mode 100644 examples/hashsmt/word.cpp
 create mode 100644 examples/hashsmt/word.h
diff --git a/examples/Makefile.am b/examples/Makefile.am
index 484097740..efa35efd5 100644
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -1,4 +1,4 @@
-SUBDIRS = nra-translate .
+SUBDIRS = nra-translate hashsmt .
 
 AM_CPPFLAGS = \
 	-I@srcdir@/../src/include -I@srcdir@/../src -I@builddir@/../src $(ANTLR_INCLUDES)
diff --git a/examples/hashsmt/Makefile b/examples/hashsmt/Makefile
new file mode 100644
index 000000000..7bb553677
--- /dev/null
+++ b/examples/hashsmt/Makefile
@@ -0,0 +1,4 @@
+topdir = ../..
+srcdir = examples/hashsmt
+
+include $(topdir)/Makefile.subdir
diff --git a/examples/hashsmt/Makefile.am b/examples/hashsmt/Makefile.am
new file mode 100644
index 000000000..e6670c9bd
--- /dev/null
+++ b/examples/hashsmt/Makefile.am
@@ -0,0 +1,21 @@
+AM_CPPFLAGS = \
+	-I@srcdir@/../../src/include -I@srcdir@/../../src -I@builddir@/../../src $(ANTLR_INCLUDES)
+AM_CXXFLAGS = -Wall
+AM_CFLAGS = -Wall
+
+noinst_PROGRAMS = \
+	sha1smt	
+
+noinst_DATA =
+
+sha1smt_SOURCES = \
+	sha1smt.cpp \
+	word.h \
+	word.cpp \
+	sha1.hpp
+sha1smt_LDADD = \
+	@builddir@/../../src/libcvc4.la
+
+EXTRA_DIST = \
+	linkedin.small
+	
\ No newline at end of file
diff --git a/examples/hashsmt/sha1.hpp b/examples/hashsmt/sha1.hpp
new file mode 100644
index 000000000..a6ac011f1
--- /dev/null
+++ b/examples/hashsmt/sha1.hpp
@@ -0,0 +1,206 @@
+// boost/uuid/sha1.hpp header file  ----------------------------------------------//
+
+// Copyright 2007 Andy Tompkins.
+// Distributed under the Boost Software License, Version 1.0. (See
+// accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+
+// Revision History
+//  29 May 2007 - Initial Revision
+//  25 Feb 2008 - moved to namespace boost::uuids::detail
+
+// This is a byte oriented implementation
+// Note: this implementation does not handle message longer than
+//       2^32 bytes.
+
+#pragma once 
+
+#include <boost/static_assert.hpp>
+#include <cstddef>
+
+#include "word.h"
+
+#ifdef BOOST_NO_STDC_NAMESPACE
+namespace std {
+    using ::size_t;
+} // namespace std
+#endif
+
+namespace hashsmt {
+
+BOOST_STATIC_ASSERT(sizeof(unsigned char)*8 == 8);
+BOOST_STATIC_ASSERT(sizeof(unsigned int)*8 == 32);
+
+inline cvc4_uint32 left_rotate(cvc4_uint32 x, std::size_t n)
+{
+    return (x<<n) ^ (x>> (32-n));
+}
+
+class sha1
+{
+public:
+    typedef cvc4_uint32(&digest_type)[5];
+public:
+    sha1();
+
+    void reset();
+
+    void process_byte(cvc4_uchar8 byte);
+    void process_block(void const* bytes_begin, void const* bytes_end);
+    void process_bytes(void const* buffer, std::size_t byte_count);
+
+    void get_digest(digest_type digest);
+
+private:
+    void process_block();
+
+private:
+    cvc4_uint32 h_[5];
+
+    cvc4_uchar8 block_[64];
+
+    std::size_t block_byte_index_;
+    std::size_t byte_count_;
+};
+
+inline sha1::sha1()
+{
+    reset();
+}
+
+inline void sha1::reset()
+{
+    h_[0] = 0x67452301;
+    h_[1] = 0xEFCDAB89;
+    h_[2] = 0x98BADCFE;
+    h_[3] = 0x10325476;
+    h_[4] = 0xC3D2E1F0;
+
+    block_byte_index_ = 0;
+    byte_count_ = 0;
+}
+
+inline void sha1::process_byte(cvc4_uchar8 byte)
+{
+    block_[block_byte_index_++] = byte;
+    ++byte_count_;
+    if (block_byte_index_ == 64) {
+        block_byte_index_ = 0;
+        process_block();
+    }
+}
+
+inline void sha1::process_block(void const* bytes_begin, void const* bytes_end)
+{
+    cvc4_uchar8 const* begin = static_cast<cvc4_uchar8 const*>(bytes_begin);
+    cvc4_uchar8 const* end = static_cast<cvc4_uchar8 const*>(bytes_end);
+    for(; begin != end; ++begin) {
+        process_byte(*begin);
+    }
+}
+
+inline void sha1::process_bytes(void const* buffer, std::size_t byte_count)
+{
+    cvc4_uchar8 const* b = static_cast<cvc4_uchar8 const*>(buffer);
+    process_block(b, b+byte_count);
+}
+
+inline void sha1::process_block()
+{
+    cvc4_uint32 w[80];
+    for (std::size_t i=0; i<16; ++i) {
+        w[i]  = (block_[i*4 + 0] << 24);
+        w[i] |= (block_[i*4 + 1] << 16);
+        w[i] |= (block_[i*4 + 2] << 8);
+        w[i] |= (block_[i*4 + 3]);
+    }
+    for (std::size_t i=16; i<80; ++i) {
+        w[i] = left_rotate((w[i-3] ^ w[i-8] ^ w[i-14] ^ w[i-16]), 1);
+    }
+
+    cvc4_uint32 a = h_[0];
+    cvc4_uint32 b = h_[1];
+    cvc4_uint32 c = h_[2];
+    cvc4_uint32 d = h_[3];
+    cvc4_uint32 e = h_[4];
+
+    for (std::size_t i=0; i<80; ++i) {
+        cvc4_uint32 f;
+        cvc4_uint32 k;
+
+        if (i<20) {
+            f = (b & c) | (~b & d);
+            k = 0x5A827999;
+        } else if (i<40) {
+            f = b ^ c ^ d;
+            k = 0x6ED9EBA1;
+        } else if (i<60) {
+            f = (b & c) | (b & d) | (c & d);
+            k = 0x8F1BBCDC;
+        } else {
+            f = b ^ c ^ d;
+            k = 0xCA62C1D6;
+        }
+
+        cvc4_uint32 temp = left_rotate(a, 5) + f + e + k + w[i];
+        e = d;
+        d = c;
+        c = left_rotate(b, 30);
+        b = a;
+        a = temp;
+    }
+
+    h_[0] += a;
+    h_[1] += b;
+    h_[2] += c;
+    h_[3] += d;
+    h_[4] += e;
+}
+
+inline void sha1::get_digest(digest_type digest)
+{
+    std::size_t bit_count = byte_count_*8;
+
+    // append the bit '1' to the message
+    process_byte(0x80);
+
+    // append k bits '0', where k is the minimum number >= 0
+    // such that the resulting message length is congruent to 56 (mod 64)
+    // check if there is enough space for padding and bit_count
+    if (block_byte_index_ > 56) {
+        // finish this block
+        while (block_byte_index_ != 0) {
+            process_byte(0);
+        }
+
+        // one more block
+        while (block_byte_index_ < 56) {
+            process_byte(0);
+        }
+    } else {
+        while (block_byte_index_ < 56) {
+            process_byte(0);
+        }
+    }
+
+    // append length of message (before pre-processing) 
+    // as a 64-bit big-endian integer
+    process_byte(0);
+    process_byte(0);
+    process_byte(0);
+    process_byte(0);
+    process_byte( static_cast<unsigned char>((bit_count>>24) & 0xFF));
+    process_byte( static_cast<unsigned char>((bit_count>>16) & 0xFF));
+    process_byte( static_cast<unsigned char>((bit_count>>8 ) & 0xFF));
+    process_byte( static_cast<unsigned char>((bit_count)     & 0xFF));
+
+    // get final digest
+    digest[0] = h_[0];
+    digest[1] = h_[1];
+    digest[2] = h_[2];
+    digest[3] = h_[3];
+    digest[4] = h_[4];
+}
+
+} // namespace hashsmt
+
diff --git a/examples/hashsmt/sha1smt.cpp b/examples/hashsmt/sha1smt.cpp
new file mode 100644
index 000000000..3a33a4bc2
--- /dev/null
+++ b/examples/hashsmt/sha1smt.cpp
@@ -0,0 +1,88 @@
+/*
+ * sha1smt.cpp
+ *
+ *  Created on: Jul 13, 2012
+ *      Author: dejan
+ */
+
+#include <string>
+#include <fstream>
+#include <iostream>
+#include <sstream>
+
+#include "word.h"
+#include "sha1.hpp"
+#include "expr/command.h"
+
+#include <boost/uuid/sha1.hpp>
+
+using namespace std;
+using namespace CVC4;
+
+int main(int argc, char* argv[]) {
+
+  try {
+
+    // Check the arguments
+    if (argc != 3) {
+      cerr << "usage: sha1smt message output-file" << std::endl;
+      return 1;
+    }
+
+    // Get the message to encode and the output file
+    string msg = argv[1];
+    unsigned msgSize = msg.size();
+    ofstream output(argv[2]);
+    output << expr::ExprSetDepth(-1) << expr::ExprSetLanguage(language::output::LANG_SMTLIB_V2);
+    output << SetBenchmarkLogicCommand("QF_BV") << endl;
+    output << SetBenchmarkStatusCommand(SMT_SATISFIABLE) << endl;
+
+    // Make the variables the size of the string    
+    hashsmt::cvc4_uchar8 cvc4input[msgSize];
+    for (unsigned i = 0; i < msgSize; ++ i) {
+      stringstream ss;
+      ss << "x" << i; 
+      cvc4input[i] = hashsmt::cvc4_uchar8(ss.str());
+      output << DeclareFunctionCommand(ss.str(), cvc4input[i].getExpr(), cvc4input[i].getExpr().getType()) << endl;
+
+      // Ouput the solution also
+      BoolExpr solution = (cvc4input[i] == hashsmt::cvc4_uchar8(msg.c_str()[i]));
+      output << "; " << AssertCommand(solution) << endl;
+    }
+
+    // Do the cvc4 encoding
+    hashsmt::sha1 cvc4encoder;
+    cvc4encoder.process_bytes(cvc4input, msgSize);
+    
+    // Get the digest as bitvectors
+    hashsmt::cvc4_uint32 cvc4digest[5];
+    cvc4encoder.get_digest(cvc4digest);
+                
+    // Do the actual sha1 encoding
+    boost::uuids::detail::sha1 sha1encoder;
+    sha1encoder.process_bytes(msg.c_str(), msgSize);
+    unsigned sha1digest[5];
+    sha1encoder.get_digest(sha1digest);
+                
+    // Create the assertion
+    BoolExpr assertion;
+    for (unsigned i = 0; i < 5; ++ i) {
+      BoolExpr conjunct = (cvc4digest[i] == hashsmt::cvc4_uint32(sha1digest[i]));
+      if (i > 0) {
+        assertion = assertion.andExpr(conjunct);
+      } else {
+        assertion = conjunct;
+      }
+    }
+    output << AssertCommand(assertion) << endl;
+    
+    // Checksat command
+    output << CheckSatCommand() << endl;
+                
+  } catch (CVC4::Exception& e) {
+    cerr << e << endl;
+  }
+}
+
+
+
diff --git a/examples/hashsmt/word.cpp b/examples/hashsmt/word.cpp
new file mode 100644
index 000000000..46687db14
--- /dev/null
+++ b/examples/hashsmt/word.cpp
@@ -0,0 +1,143 @@
+/*
+ * word.cpp
+ *
+ *  Created on: Jul 13, 2012
+ *      Author: dejan
+ */
+
+#include "word.h"
+
+using namespace std;
+using namespace hashsmt;
+using namespace CVC4;
+
+Expr Word::extendToSize(unsigned newSize) const {
+  if (newSize <= size()) {
+    return d_expr;
+  } else {
+    // 0-extend to size
+    Expr extendOp = em()->mkConst(BitVectorZeroExtend(newSize - size()));
+    return em()->mkExpr(extendOp, d_expr);    
+  }
+}
+
+ExprManager* Word::s_manager = 0;
+
+ExprManager* Word::em() {
+  if (s_manager == 0) {
+    CVC4::Options options;
+    options.inputLanguage = language::input::LANG_SMTLIB_V2;
+    options.outputLanguage = language::output::LANG_SMTLIB_V2;
+    s_manager = new CVC4::ExprManager(options);
+  }
+  return s_manager;
+}
+
+BoolExpr Word::operator == (const Word& b) const {
+  return em()->mkExpr(kind::EQUAL, d_expr, b.getExpr());
+}
+
+void Word::print(ostream& out) const {
+  out << CVC4::Expr::setdepth(-1) << d_expr;
+}
+
+Word::Word(unsigned newSize, unsigned value) {
+  d_expr = em()->mkConst(BitVector(newSize, value));
+};
+
+Word::Word(unsigned newSize, string name) {
+  d_expr = em()->mkVar(name, em()->mkBitVectorType(newSize));
+};
+
+Word& Word::operator = (const Word& b) {
+  d_expr = b.d_expr;
+  return *this;
+}
+
+Word Word::operator + (const Word& b) const {
+  unsigned newSize = std::max(size(), b.size());
+  Expr lhs = extendToSize(newSize);
+  Expr rhs = b.extendToSize(newSize);
+  return em()->mkExpr(kind::BITVECTOR_PLUS, lhs, rhs);
+}
+
+Word& Word::operator += (const Word& b) {
+  (*this) = (*this) + b;
+  return (*this);
+}
+
+Word Word::operator ~ () const {
+  return em()->mkExpr(kind::BITVECTOR_NOT, d_expr);
+}
+
+Word Word::operator & (const Word& b) const {
+  unsigned newSize = std::max(size(), b.size());
+  Expr lhs = extendToSize(newSize);
+  Expr rhs = b.extendToSize(newSize);
+  return em()->mkExpr(kind::BITVECTOR_AND, lhs, rhs);
+}
+
+Word Word::operator | (const Word& b) const {
+  unsigned newSize = std::max(size(), b.size());
+  Expr lhs = extendToSize(newSize);
+  Expr rhs = b.extendToSize(newSize);
+  return em()->mkExpr(kind::BITVECTOR_OR, lhs, rhs);
+}
+
+Word& Word::operator |= (const Word& b) {
+  (*this) = (*this) | b;
+  return (*this);
+}
+
+Word Word::operator ^ (const Word& b) const {
+  unsigned newSize = std::max(size(), b.size());
+  Expr lhs = extendToSize(newSize);
+  Expr rhs = b.extendToSize(newSize);
+  return em()->mkExpr(kind::BITVECTOR_XOR, lhs, rhs);
+}
+
+Word Word::operator << (unsigned amount) const {
+  // Instead of shifting we just add zeroes, to ensure that ((char)x << 24) return 32 bits
+  Word padding(amount, 0);
+  return em()->mkExpr(kind::BITVECTOR_CONCAT, d_expr, padding.d_expr);
+}
+
+Word Word::operator >> (unsigned amount) const {
+  Word shiftAmount(size(), amount);
+  return em()->mkExpr(kind::BITVECTOR_LSHR, d_expr, shiftAmount.d_expr);
+}
+
+unsigned Word::size() const {
+  BitVectorType type = d_expr.getType();
+  return type.getSize();
+}
+
+cvc4_uint32::cvc4_uint32(const Word& b) {
+  if (b.size() > 32) {
+    // Extract the first 32 bits
+    Expr extractOp = em()->mkConst(BitVectorExtract(31, 0));
+    d_expr = em()->mkExpr(extractOp, b.getExpr());
+  } else if (b.size() < 32) {
+    // 0-extend to 32 bits
+    Expr extendOp = em()->mkConst(BitVectorZeroExtend(32 - b.size()));
+    d_expr = em()->mkExpr(extendOp, b.getExpr());    
+  } else {
+    d_expr = b.getExpr();
+  }
+}
+
+cvc4_uchar8::cvc4_uchar8(const Word& b) {
+  if (b.size() > 8) {
+    // Extract the first 8 bits
+    Expr extractOp = em()->mkConst(BitVectorExtract(7, 0));
+    d_expr = em()->mkExpr(extractOp, b.getExpr());
+  } else if (b.size() < 8) {
+    // 0-extend to 8 bits
+    Expr extendOp = em()->mkConst(BitVectorZeroExtend(8 - b.size()));
+    d_expr = em()->mkExpr(extendOp, b.getExpr());    
+  } else {
+    d_expr = b.getExpr();
+  }
+}
+
+
diff --git a/examples/hashsmt/word.h b/examples/hashsmt/word.h
new file mode 100644
index 000000000..6195d0699
--- /dev/null
+++ b/examples/hashsmt/word.h
@@ -0,0 +1,107 @@
+/*
+ * word.h
+ *
+ *  Created on: Jul 13, 2012
+ *      Author: dejan
+ */
+
+#ifndef WORD_H_
+#define WORD_H_
+
+#include <string>
+#include <iostream>
+
+#include "expr/expr.h"
+#include "expr/expr_manager.h"
+#include "util/options.h"
+
+namespace hashsmt {
+
+class Word {
+
+  /** Expression managaer we're using for all word expressions */
+  static CVC4::ExprManager* s_manager;
+
+protected:
+
+  /** The expression of this word */
+  CVC4::Expr d_expr;
+
+  /** Get the expression manager words are using */
+  static CVC4::ExprManager* em();
+
+  Word(CVC4::Expr expr = CVC4::Expr())
+  : d_expr(expr) {}
+
+  /** Extend the representing expression to the given size >= size() */
+  CVC4::Expr extendToSize(unsigned size) const;
+
+public:
+
+  Word(unsigned size, unsigned value = 0);
+  Word(unsigned size, std::string name);
+
+  Word& operator =  (const Word& b);
+  Word  operator +  (const Word& b) const;
+  Word& operator += (const Word& b);
+  Word  operator ~  () const;
+  Word  operator &  (const Word& b) const;
+  Word  operator |  (const Word& b) const;
+  Word& operator |= (const Word& b);
+  Word  operator ^  (const Word& b) const;
+  Word  operator << (unsigned amount) const;
+  Word  operator >> (unsigned amount) const;
+
+  unsigned size() const;
+
+  void print(std::ostream& out) const;
+
+  CVC4::Expr getExpr() const {
+    return d_expr;
+  }
+
+  /** Returns the comparison expression */  
+  CVC4::BoolExpr operator == (const Word& b) const;
+};
+
+inline std::ostream& operator << (std::ostream& out, const Word& word) {
+  word.print(out);
+  return out;
+}
+
+/** Symbolic 32-bit unsigned integer as a CVC4 bitvector expression */
+class cvc4_uint32 : public Word {
+public:
+
+  /** Construction from constants of the right size */
+  cvc4_uint32(unsigned value = 0)
+  : Word(32, value) {}
+
+  /** Construction of variables of the right size */
+  cvc4_uint32(std::string name)
+  : Word(32, name) {}
+
+  /** Automatic extend/cut to uint32 */
+  cvc4_uint32(const Word& word);
+};
+
+/** Symbolic 8-bit unsigned char as a CVC4 bitvector expression */
+class cvc4_uchar8 : public Word {
+public:
+
+  /** Construction from constants of the right size */
+  cvc4_uchar8(unsigned value = 0)
+  : Word(8, value) {}
+
+  /** Construction of variables of the right size */
+  cvc4_uchar8(std::string name)
+  : Word(8, name) {}
+
+  /** Automatic extend/cut to uchar8 */
+  cvc4_uchar8(const Word& word);
+};
+
+
+}
+
+#endif /* WORD_H_ */
-- 
2.30.2