Adding listeners to Options.

[cvc5.git] / src / parser / antlr_input.cpp
diff --git a/src/parser/antlr_input.cpp b/src/parser/antlr_input.cpp

index fbf2b86507d975f0dfdd2af3334be2430d705cf1..6428017f5c4f3078a8ebc660ce203b8fdd423bce 100644 (file)
--- a/src/parser/antlr_input.cpp
+++ b/src/parser/antlr_input.cpp
@@ -1,11 +1,11 @@
  /*********************                                                        */
  /*! \file antlr_input.cpp
   ** \verbatim
- ** Original author: cconway
- ** Major contributors: mdeters
- ** Minor contributors (to current version): bobot
- ** This file is part of the CVC4 prototype.
- ** Copyright (c) 2009-2012  New York University and The University of Iowa
+ ** Original author: Christopher L. Conway
+ ** Major contributors: Morgan Deters, Kshitij Bansal
+ ** Minor contributors (to current version): Francois Bobot
+ ** This file is part of the CVC4 project.
+ ** Copyright (c) 2009-2014  New York University and The University of Iowa
   ** See the file COPYING in the top-level source directory for licensing
   ** information.\endverbatim
   **
@@ -14,26 +14,30 @@
   ** A super-class for ANTLR-generated input language parsers
   **/
  
+#include "parser/antlr_input.h"
+// We rely on the inclusion of #include <antlr3.h> in "parser/antlr_input.h".
+// This is avoid having to undefine the symbols in <antlr3.h>.
+// See the documentation in "parser/antlr_undefines.h" for more
+// details.
+
  #include <limits.h>
-#include <antlr3.h>
  #include <stdint.h>
  
-#include "parser/antlr_input.h"
-#include "parser/input.h"
+#include "base/output.h"
+#include "expr/type.h"
+#include "parser/antlr_line_buffered_input.h"
  #include "parser/bounded_token_buffer.h"
  #include "parser/bounded_token_factory.h"
-#include "parser/antlr_line_buffered_input.h"
+#include "parser/cvc/cvc_input.h"
+#include "parser/input.h"
  #include "parser/memory_mapped_input_buffer.h"
-#include "parser/parser_exception.h"
  #include "parser/parser.h"
-
-#include "expr/command.h"
-#include "expr/type.h"
-#include "parser/cvc/cvc_input.h"
+#include "parser/parser_exception.h"
  #include "parser/smt1/smt1_input.h"
  #include "parser/smt2/smt2_input.h"
+#include "parser/smt2/sygus_input.h"
  #include "parser/tptp/tptp_input.h"
-#include "util/output.h"
+#include "smt_util/command.h"
  
  using namespace std;
  using namespace CVC4;
@@ -43,22 +47,91 @@ using namespace CVC4::kind;
  namespace CVC4 {
  namespace parser {
  
-AntlrInputStream::AntlrInputStream(std::string name, 
+// These functions exactly wrap the antlr3 source inconsistencies.
+// These are the only location CVC4_ANTLR3_OLD_INPUT_STREAM ifdefs appear.
+// No other sanity checking happens;
+pANTLR3_INPUT_STREAM newAntlr3BufferedStream(std::istream& input, const std::string& name);
+pANTLR3_INPUT_STREAM newAntlr3FileStream(const std::string& name);
+pANTLR3_INPUT_STREAM newAntrl3InPlaceStream(pANTLR3_UINT8 basep,
+                                            uint32_t size,
+                                            const std::string& name);
+
+pANTLR3_INPUT_STREAM
+newAntlr3BufferedStream(std::istream& input, const std::string& name){
+  pANTLR3_INPUT_STREAM inputStream = NULL;
+  pANTLR3_UINT8 name_duplicate = (pANTLR3_UINT8) strdup(name.c_str());
+
+#ifdef CVC4_ANTLR3_OLD_INPUT_STREAM
+  inputStream =
+    antlr3LineBufferedStreamNew(input, 0, name_duplicate);
+#else /* CVC4_ANTLR3_OLD_INPUT_STREAM */
+  inputStream =
+    antlr3LineBufferedStreamNew(input, ANTLR3_ENC_8BIT, name_duplicate);
+#endif /* CVC4_ANTLR3_OLD_INPUT_STREAM */
+
+  free(name_duplicate);
+  return inputStream;
+}
+
+pANTLR3_INPUT_STREAM newAntlr3FileStream(const std::string& name){
+  pANTLR3_INPUT_STREAM input = NULL;
+  pANTLR3_UINT8 name_duplicate = (pANTLR3_UINT8) strdup(name.c_str());
+
+  // libantlr3c v3.2 isn't source-compatible with v3.4
+#ifdef CVC4_ANTLR3_OLD_INPUT_STREAM
+  input = antlr3AsciiFileStreamNew(name_duplicate);
+#else /* CVC4_ANTLR3_OLD_INPUT_STREAM */
+  input = antlr3FileStreamNew(name_duplicate, ANTLR3_ENC_8BIT);
+#endif /* CVC4_ANTLR3_OLD_INPUT_STREAM */
+
+  free(name_duplicate);
+  return input;
+}
+
+
+pANTLR3_INPUT_STREAM newAntrl3InPlaceStream(pANTLR3_UINT8 basep,
+                                            uint32_t size,
+                                            const std::string& name){
+  pANTLR3_UINT8 name_duplicate = (pANTLR3_UINT8) strdup(name.c_str());
+  pANTLR3_INPUT_STREAM inputStream = NULL;
+  /* Create an ANTLR input backed by the buffer. */
+#ifdef CVC4_ANTLR3_OLD_INPUT_STREAM
+  inputStream =
+    antlr3NewAsciiStringInPlaceStream(basep, size, name_duplicate);
+#else /* CVC4_ANTLR3_OLD_INPUT_STREAM */
+  inputStream =
+    antlr3StringStreamNew(basep, ANTLR3_ENC_8BIT, size,
+                          name_duplicate);
+#endif /* CVC4_ANTLR3_OLD_INPUT_STREAM */
+  free(name_duplicate);
+  return inputStream;
+}
+
+AntlrInputStream::AntlrInputStream(std::string name,
                                     pANTLR3_INPUT_STREAM input,
-                                   bool fileIsTemporary) :
-  InputStream(name,fileIsTemporary),
-  d_input(input) {
+                                   bool fileIsTemporary,
+                                   pANTLR3_UINT8 inputString) :
+  InputStream(name, fileIsTemporary),
+  d_input(input),
+  d_inputString(inputString)
+{
    assert( input != NULL );
+  input->fileName = input->strFactory->newStr8(input->strFactory, (pANTLR3_UINT8)name.c_str());
  }
  
  AntlrInputStream::~AntlrInputStream() {
    d_input->free(d_input);
+  if(d_inputString != NULL){
+    free(d_inputString);
+  }
  }
  
  pANTLR3_INPUT_STREAM AntlrInputStream::getAntlr3InputStream() const {
    return d_input;
  }
  
+
+
  AntlrInputStream*
  AntlrInputStream::newFileInputStream(const std::string& name,
                                       bool useMmap)
@@ -72,19 +145,15 @@ AntlrInputStream::newFileInputStream(const std::string& name,
    if(useMmap) {
      input = MemoryMappedInputBufferNew(name);
    } else {
-    // libantlr3c v3.2 isn't source-compatible with v3.4
-#ifdef CVC4_ANTLR3_OLD_INPUT_STREAM
-    input = antlr3AsciiFileStreamNew((pANTLR3_UINT8) name.c_str());
-#else /* CVC4_ANTLR3_OLD_INPUT_STREAM */
-    input = antlr3FileStreamNew((pANTLR3_UINT8) name.c_str(), ANTLR3_ENC_8BIT);
-#endif /* CVC4_ANTLR3_OLD_INPUT_STREAM */
+    input = newAntlr3FileStream(name);
    }
    if(input == NULL) {
      throw InputStreamException("Couldn't open file: " + name);
    }
-  return new AntlrInputStream( name, input );
+  return new AntlrInputStream( name, input, false, NULL );
  }
  
+
  AntlrInputStream*
  AntlrInputStream::newStreamInputStream(std::istream& input,
                                         const std::string& name,
@@ -92,19 +161,10 @@ AntlrInputStream::newStreamInputStream(std::istream& input,
    throw (InputStreamException) {
  
    pANTLR3_INPUT_STREAM inputStream = NULL;
+  pANTLR3_UINT8 inputStringCopy = NULL;
  
    if(lineBuffered) {
-#ifdef CVC4_ANTLR3_OLD_INPUT_STREAM
-    inputStream =
-      antlr3LineBufferedStreamNew(input,
-                                  0,
-                                  (pANTLR3_UINT8) strdup(name.c_str()));
-#else /* CVC4_ANTLR3_OLD_INPUT_STREAM */
-    inputStream =
-      antlr3LineBufferedStreamNew(input,
-                                  ANTLR3_ENC_8BIT,
-                                  (pANTLR3_UINT8) strdup(name.c_str()));
-#endif /* CVC4_ANTLR3_OLD_INPUT_STREAM */
+    inputStream = newAntlr3BufferedStream(input, name);
    } else {
  
      // Since these are all NULL on entry, realloc will be called
@@ -136,28 +196,18 @@ AntlrInputStream::newStreamInputStream(std::istream& input,
      if( !input.eof() ) {
        throw InputStreamException("Stream input failed: " + name);
      }
-
-    /* Create an ANTLR input backed by the buffer. */
-#ifdef CVC4_ANTLR3_OLD_INPUT_STREAM
-    inputStream =
-      antlr3NewAsciiStringInPlaceStream((pANTLR3_UINT8) basep,
-                                        cp - basep,
-                                        (pANTLR3_UINT8) strdup(name.c_str()));
-#else /* CVC4_ANTLR3_OLD_INPUT_STREAM */
-    inputStream =
-      antlr3StringStreamNew((pANTLR3_UINT8) basep,
-                            ANTLR3_ENC_8BIT,
-                            cp - basep,
-                            (pANTLR3_UINT8) strdup(name.c_str()));
-#endif /* CVC4_ANTLR3_OLD_INPUT_STREAM */
-
+    ptrdiff_t offset = cp - basep;
+    assert(offset >= 0);
+    assert(offset <= std::numeric_limits<uint32_t>::max());
+    inputStringCopy = (pANTLR3_UINT8)basep;
+    inputStream = newAntrl3InPlaceStream(inputStringCopy, (uint32_t) offset, name);
    }
  
    if( inputStream == NULL ) {
      throw InputStreamException("Couldn't initialize input: " + name);
    }
  
-  return new AntlrInputStream( name, inputStream );
+  return new AntlrInputStream( name, inputStream, false, inputStringCopy );
  }
  
  
@@ -165,25 +215,22 @@ AntlrInputStream*
  AntlrInputStream::newStringInputStream(const std::string& input,
                                         const std::string& name)
    throw (InputStreamException) {
-  char* inputStr = strdup(input.c_str());
-  char* nameStr = strdup(name.c_str());
-  assert( inputStr!=NULL && nameStr!=NULL );
-#ifdef CVC4_ANTLR3_OLD_INPUT_STREAM
-  pANTLR3_INPUT_STREAM inputStream =
-      antlr3NewAsciiStringInPlaceStream((pANTLR3_UINT8) inputStr,
-                                        input.size(),
-                                        (pANTLR3_UINT8) nameStr);
-#else /* CVC4_ANTLR3_OLD_INPUT_STREAM */
-  pANTLR3_INPUT_STREAM inputStream =
-      antlr3StringStreamNew((pANTLR3_UINT8) inputStr,
-                            ANTLR3_ENC_8BIT,
-                            input.size(),
-                            (pANTLR3_UINT8) nameStr);
-#endif /* CVC4_ANTLR3_OLD_INPUT_STREAM */
+
+  size_t input_size = input.size();
+  assert(input_size <= std::numeric_limits<uint32_t>::max());
+
+  // Ownership of input_duplicate  is transferred to the AntlrInputStream.
+  pANTLR3_UINT8 input_duplicate = (pANTLR3_UINT8) strdup(input.c_str());
+
+  if( input_duplicate == NULL ) {
+    throw InputStreamException("Couldn't initialize string input: '" + input + "'");
+  }
+
+  pANTLR3_INPUT_STREAM inputStream = newAntrl3InPlaceStream(input_duplicate, (uint32_t)input_size, name);
    if( inputStream==NULL ) {
      throw InputStreamException("Couldn't initialize string input: '" + input + "'");
    }
-  return new AntlrInputStream( name, inputStream );
+  return new AntlrInputStream( name, inputStream, false, input_duplicate );
  }
  
  AntlrInput* AntlrInput::newInput(InputLanguage lang, AntlrInputStream& inputStream) {
@@ -200,8 +247,13 @@ AntlrInput* AntlrInput::newInput(InputLanguage lang, AntlrInputStream& inputStre
      input = new Smt1Input(inputStream);
      break;
  
-  case LANG_SMTLIB_V2:
-    input = new Smt2Input(inputStream);
+  case LANG_SMTLIB_V2_0:
+  case LANG_SMTLIB_V2_5:
+    input = new Smt2Input(inputStream, lang);
+    break;
+
+  case LANG_SYGUS:
+    input = new SygusInput(inputStream);
      break;
  
    case LANG_TPTP:
@@ -283,16 +335,193 @@ void AntlrInput::warning(const std::string& message) {
    Warning() << getInputStream()->getName() << ':' << d_lexer->getLine(d_lexer) << '.' << d_lexer->getCharPositionInLine(d_lexer) << ": " << message << endl;
  }
  
-void AntlrInput::parseError(const std::string& message)
+
+
+/**
+ * characters considered part of a simple symbol in SMTLIB.
+ *
+ * TODO: Ideally this code shouldn't be smtlib specific (should work
+ * with CVC language too), but this per-language specialization has
+ * been left for a later point.
+ */
+inline bool isSimpleChar(char ch) {
+  return isalnum(ch) || (strchr("~!@$%^&*_-+=<>.?/", ch) != NULL);
+}
+
+size_t wholeWordMatch(string input, string pattern, bool (*isWordChar)(char)) {
+  size_t st = 0;
+  size_t N = input.size();
+  while(st < N) {
+    while( st < N && (*isWordChar)(input[st])  == false ) st++;
+    size_t en = st;
+    while(en + 1 < N && (*isWordChar)(input[en + 1]) == true) en++;
+    if(en - st + 1 == pattern.size()) {
+      bool match = true;
+      for(size_t i = 0; match && i < pattern.size(); ++i) {
+        match &= (pattern[i] == input[st+i]);
+      }
+      if(match == true) {
+        return st;
+      }
+    }
+    st = en + 1;
+  }
+  return string::npos;
+}
+
+/**
+ * Gets part of original input and tries to visually hint where the
+ * error might be.
+ *
+ * Something like:
+ *
+ *   ...nd (= alpha beta) (= beta delta))
+ *                                ^
+ *
+ * Implementation (as on 2014/04/24):
+ *
+ * > if suggested pointer by lexer is under a "simple char", move to
+ *   start of the word and print pointer there.
+ *
+ * > in the other case, it tries to find the nearest word in the error
+ *   message passed along. if it can't find it, we don't add this
+ *   visual hint, as experimentally position suggested by lexer was
+ *   found to be totally unhelpful. (TODO: fix this upstream to
+ *   improve)
+ */
+std::string parseErrorHelper(const char* lineStart, int charPositionInLine, const std::string& message)
+{
+  // Is it a multi-line message
+  bool multilineMessage = (message.find('\n') != string::npos);
+  // Useful only if it is a multi-line message
+  int firstLineEnd = message.find('\n');
+
+  std::ostringstream ss, slicess;
+
+  // Keep first line of message
+  if(multilineMessage) {
+    ss << message.substr(0, firstLineEnd) << endl << endl;
+  } else {
+    ss << message << endl << endl;
+  }
+
+  int posSliceStart = (charPositionInLine - 50 <= 0) ? 0 : charPositionInLine - 50 + 5;
+  int posSliceEnd = posSliceStart + 70;
+  int caretPos = 0;
+  int caretPosExtra = 0; // for inital intendation, epilipses etc.
+
+  ss << "  "; caretPosExtra += 2;
+  if(posSliceStart > 0) {
+    ss << "..."; caretPosExtra += 3;
+  }
+
+  for(int i = posSliceStart; lineStart[i] != '\n'; ++i) {
+    if(i == posSliceEnd) {
+      ss << "...";
+      break;
+    }
+    if(i < charPositionInLine) { caretPos++; }
+
+    if(!isprint(lineStart[i])) {
+      // non-printable character, something wrong, bail out
+      return message;
+    }
+
+    ss << (lineStart[i]);
+    slicess << (lineStart[i]);
+  }
+
+  // adjust position of caret, based on slice and message
+  {
+    int caretPosOrig = caretPos;
+    string slice = slicess.str();
+    if(isSimpleChar(slice[caretPos])) {
+      // if alphanumeric, try to go to beginning of word/number
+      while(caretPos > 0 && isSimpleChar(slice[caretPos - 1])) { --caretPos; }
+      if(caretPos == 0 && posSliceStart > 0) {
+        // reached start and this is not really the start? bail out
+        return message;
+      } else {
+        // likely it is also in original message? if so, very likely
+        // we found the right place
+        string word = slice.substr(caretPos, (caretPosOrig - caretPos + 1));
+        size_t matchLoc = wholeWordMatch(message, word, isSimpleChar);
+        Debug("friendlyparser") << "[friendlyparser] matchLoc = " << matchLoc << endl;
+        if( matchLoc != string::npos ) {
+          Debug("friendlyparser") << "[friendlyparser] Feeling good." << std::endl;
+        }
+      }
+    } else {
+      bool foundCaretPos = false;
+
+      for(int tries = 0; tries < 2 && caretPos > 0 && !foundCaretPos; ++tries) {
+        // go to nearest alphanumeric string (before current position),
+        // see if that word can be found in original message. If so,
+        // point to that, else keep pointer where it was.
+        int nearestWordEn = caretPos - 1;
+        while(nearestWordEn > 0 && !isSimpleChar(slice[nearestWordEn])) {
+          --nearestWordEn;
+        }
+        if(isSimpleChar(slice[nearestWordEn])) {
+          int nearestWordSt = nearestWordEn;
+          while(nearestWordSt > 0 && isSimpleChar(slice[nearestWordSt - 1])) {
+            --nearestWordSt;
+          }
+          string word = slice.substr(nearestWordSt, (nearestWordEn - nearestWordSt + 1));
+          size_t matchLoc = wholeWordMatch(message, word, isSimpleChar);
+          Debug("friendlyparser") << "[friendlyparser] nearest word = " << word << std::endl;
+          Debug("friendlyparser") << "[friendlyparser] matchLoc = " << matchLoc << endl;
+          if( matchLoc != string::npos ) {
+            Debug("friendlyparser") << "[friendlyparser] strong evidence that caret should be at "
+                                    << nearestWordSt << std::endl;
+            foundCaretPos = true;
+          }
+          caretPos = nearestWordSt;
+        }
+      }
+      if( !foundCaretPos) {
+        // this doesn't look good. caret generally getting printed
+        // at unhelpful positions. improve upstream?
+        return message;
+      }
+    }
+    caretPos += caretPosExtra;
+  }// end of caret position computation/heuristics
+
+  ss << std::endl;
+  while( caretPos-- > 0 ) {
+    ss << ' ';
+  }
+  ss << '^' << endl;
+  if(multilineMessage) {
+     ss << message.substr(firstLineEnd, message.size() - firstLineEnd);;
+  }
+  return ss.str();
+}
+
+void AntlrInput::parseError(const std::string& message, bool eofException)
    throw (ParserException) {
+
+  string updatedMessage = parseErrorHelper((const char*)d_antlr3InputStream->getLineBuf(d_antlr3InputStream),
+                                           d_lexer->getCharPositionInLine(d_lexer),
+                                           message);
+
    Debug("parser") << "Throwing exception: "
-      << getInputStream()->getName() << ":"
+      << (const char*)d_lexer->rec->state->tokSource->fileName->chars << ":"
        << d_lexer->getLine(d_lexer) << "."
        << d_lexer->getCharPositionInLine(d_lexer) << ": "
-      << message << endl;
-  throw ParserException(message, getInputStream()->getName(),
-                        d_lexer->getLine(d_lexer),
-                        d_lexer->getCharPositionInLine(d_lexer));
+      << updatedMessage << endl;
+  if(eofException) {
+    throw ParserEndOfFileException(message,
+                                   (const char*)d_lexer->rec->state->tokSource->fileName->chars,
+                                   d_lexer->getLine(d_lexer),
+                                   d_lexer->getCharPositionInLine(d_lexer));
+  } else {
+    throw ParserException(updatedMessage,
+                          (const char*)d_lexer->rec->state->tokSource->fileName->chars,
+                          d_lexer->getLine(d_lexer),
+                          d_lexer->getCharPositionInLine(d_lexer));
+  }
  }