From 0da095c3f5be79a85c085b4b46d7a0a0513ecdd6 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Andres=20N=C3=B6tzli?= Date: Fri, 16 Jun 2017 17:42:22 -0700 Subject: [PATCH] Fix stream parsing This commit fixes bug 811. Bug 811 was caused because tokens were referring to a buffer that was reallocated and thus the pointers were not valid anymore. Background: The buffered input stream avoids copying the whole input stream before handing it to ANTLR (in contrast to the non-buffered input stream that first copies everything into a buffer). This enables interactivity (e.g. with kind2) and may save memory. CVC4 uses it when reading from stdin in competition mode for the application track (the incremental benchmarks) and in non-competition mode. To set the CVC4_SMTCOMP_APPLICATION_TRACK flag, the {C,CXX}FLAGS have to be modified at configure time. Solution: This commit fixes the issue by changing how a stream gets buffered. Instead of storing the stream into a single buffer, CVC4 now stores each line in a separate buffer, making sure that they do not have to move, keeping tokens valid. The commit adds the LineBuffer class for managing those buffers. It further modifies CVC4's LA and consume functions to use line number and position within a line to index into the line buffer. This allows us to use the standard mark()/etc. functions because they automatically store and restore that state. The solution also (arguably) simplifies the code. Disadvantages: Tokens split across lines would cause problems (seems reasonable to me). One allocation per line. Alternatives considered: Pull request 162 by Tim was a first attempt to solve the problem. The issues with this solution are: memory usage (old versions of the buffer do not get deleted), tokens split across buffers would be problematic, and mark()/rewind()/etc. would have to be overwritten for the approach to work. I had a partially working fix that used indexes into the stream instead of pointers to memory. The solution stored the content of the stream into a segmented buffer (lines were not guaranteed to be consecutive in memory. This approach was working for basic use cases but had the following issues: ugly casting (the solution requires casting the index to a pointer and storing it in the input stream's nextChar because that's where ANTLR is taking the location information from when creating a token), more modifications (not only would this solution require overwriting more functions of the input stream such as substr, it also requires changes to the use of GETCHARINDEX() in the Smt2 parser and AntlrInput::tokenText() for example), more complex code. --- src/parser/Makefile.am | 2 + src/parser/antlr_input.cpp | 43 +-- src/parser/antlr_input.h | 22 +- src/parser/antlr_line_buffered_input.cpp | 319 +++++++++++++---------- src/parser/antlr_line_buffered_input.h | 22 +- src/parser/input.h | 3 - src/parser/line_buffer.cpp | 88 +++++++ src/parser/line_buffer.h | 76 ++++++ 8 files changed, 395 insertions(+), 180 deletions(-) create mode 100644 src/parser/line_buffer.cpp create mode 100644 src/parser/line_buffer.h diff --git a/src/parser/Makefile.am b/src/parser/Makefile.am index ca10de684..a316019fd 100644 --- a/src/parser/Makefile.am +++ b/src/parser/Makefile.am @@ -46,6 +46,8 @@ libcvc4parser_la_SOURCES = \ bounded_token_factory.h \ input.cpp \ input.h \ + line_buffer.cpp \ + line_buffer.h \ memory_mapped_input_buffer.cpp \ memory_mapped_input_buffer.h \ parser.cpp \ diff --git a/src/parser/antlr_input.cpp b/src/parser/antlr_input.cpp index b2f2cdd53..cbdaaa2bf 100644 --- a/src/parser/antlr_input.cpp +++ b/src/parser/antlr_input.cpp @@ -50,23 +50,26 @@ namespace parser { // These functions exactly wrap the antlr3 source inconsistencies. // These are the only location CVC4_ANTLR3_OLD_INPUT_STREAM ifdefs appear. // No other sanity checking happens; -pANTLR3_INPUT_STREAM newAntlr3BufferedStream(std::istream& input, const std::string& name); +pANTLR3_INPUT_STREAM newAntlr3BufferedStream(std::istream& input, + const std::string& name, + LineBuffer* line_buffer); pANTLR3_INPUT_STREAM newAntlr3FileStream(const std::string& name); pANTLR3_INPUT_STREAM newAntrl3InPlaceStream(pANTLR3_UINT8 basep, uint32_t size, const std::string& name); -pANTLR3_INPUT_STREAM -newAntlr3BufferedStream(std::istream& input, const std::string& name){ +pANTLR3_INPUT_STREAM newAntlr3BufferedStream(std::istream& input, + const std::string& name, + LineBuffer* line_buffer) { pANTLR3_INPUT_STREAM inputStream = NULL; pANTLR3_UINT8 name_duplicate = (pANTLR3_UINT8) strdup(name.c_str()); #ifdef CVC4_ANTLR3_OLD_INPUT_STREAM inputStream = - antlr3LineBufferedStreamNew(input, 0, name_duplicate); + antlr3LineBufferedStreamNew(input, 0, name_duplicate, line_buffer); #else /* CVC4_ANTLR3_OLD_INPUT_STREAM */ - inputStream = - antlr3LineBufferedStreamNew(input, ANTLR3_ENC_8BIT, name_duplicate); + inputStream = antlr3LineBufferedStreamNew(input, ANTLR3_ENC_8BIT, + name_duplicate, line_buffer); #endif /* CVC4_ANTLR3_OLD_INPUT_STREAM */ free(name_duplicate); @@ -107,14 +110,14 @@ pANTLR3_INPUT_STREAM newAntrl3InPlaceStream(pANTLR3_UINT8 basep, return inputStream; } -AntlrInputStream::AntlrInputStream(std::string name, - pANTLR3_INPUT_STREAM input, +AntlrInputStream::AntlrInputStream(std::string name, pANTLR3_INPUT_STREAM input, bool fileIsTemporary, - pANTLR3_UINT8 inputString) : - InputStream(name, fileIsTemporary), - d_input(input), - d_inputString(inputString) -{ + pANTLR3_UINT8 inputString, + LineBuffer* line_buffer) + : InputStream(name, fileIsTemporary), + d_input(input), + d_inputString(inputString), + d_line_buffer(line_buffer) { assert( input != NULL ); input->fileName = input->strFactory->newStr8(input->strFactory, (pANTLR3_UINT8)name.c_str()); } @@ -124,6 +127,9 @@ AntlrInputStream::~AntlrInputStream() { if(d_inputString != NULL){ free(d_inputString); } + if (d_line_buffer != NULL) { + delete d_line_buffer; + } } pANTLR3_INPUT_STREAM AntlrInputStream::getAntlr3InputStream() const { @@ -150,7 +156,7 @@ AntlrInputStream::newFileInputStream(const std::string& name, if(input == NULL) { throw InputStreamException("Couldn't open file: " + name); } - return new AntlrInputStream( name, input, false, NULL ); + return new AntlrInputStream(name, input, false, NULL, NULL); } @@ -162,9 +168,11 @@ AntlrInputStream::newStreamInputStream(std::istream& input, pANTLR3_INPUT_STREAM inputStream = NULL; pANTLR3_UINT8 inputStringCopy = NULL; + LineBuffer* line_buffer = NULL; if(lineBuffered) { - inputStream = newAntlr3BufferedStream(input, name); + line_buffer = new LineBuffer(&input); + inputStream = newAntlr3BufferedStream(input, name, line_buffer); } else { // Since these are all NULL on entry, realloc will be called @@ -207,7 +215,8 @@ AntlrInputStream::newStreamInputStream(std::istream& input, throw InputStreamException("Couldn't initialize input: " + name); } - return new AntlrInputStream( name, inputStream, false, inputStringCopy ); + return new AntlrInputStream(name, inputStream, false, inputStringCopy, + line_buffer); } @@ -230,7 +239,7 @@ AntlrInputStream::newStringInputStream(const std::string& input, if( inputStream==NULL ) { throw InputStreamException("Couldn't initialize string input: '" + input + "'"); } - return new AntlrInputStream( name, inputStream, false, input_duplicate ); + return new AntlrInputStream(name, inputStream, false, input_duplicate, NULL); } AntlrInput* AntlrInput::newInput(InputLanguage lang, AntlrInputStream& inputStream) { diff --git a/src/parser/antlr_input.h b/src/parser/antlr_input.h index 8e5e82811..293be0087 100644 --- a/src/parser/antlr_input.h +++ b/src/parser/antlr_input.h @@ -35,12 +35,12 @@ #include "base/output.h" #include "parser/bounded_token_buffer.h" #include "parser/input.h" +#include "parser/line_buffer.h" #include "parser/parser_exception.h" #include "util/bitvector.h" #include "util/integer.h" #include "util/rational.h" - namespace CVC4 { class Command; @@ -62,10 +62,11 @@ private: */ pANTLR3_UINT8 d_inputString; - AntlrInputStream(std::string name, - pANTLR3_INPUT_STREAM input, - bool fileIsTemporary, - pANTLR3_UINT8 inputString); + LineBuffer* d_line_buffer; + + AntlrInputStream(std::string name, pANTLR3_INPUT_STREAM input, + bool fileIsTemporary, pANTLR3_UINT8 inputString, + LineBuffer* line_buffer); /* This is private and unimplemented, because you should never use it. */ AntlrInputStream(const AntlrInputStream& inputStream) CVC4_UNDEFINED; @@ -201,9 +202,6 @@ public: /** Get a bitvector constant from the text of the number and the size token */ static BitVector tokenToBitvector(pANTLR3_COMMON_TOKEN number, pANTLR3_COMMON_TOKEN size); - /** Retrieve the remaining text in this input. */ - std::string getUnparsedText(); - /** Get the ANTLR3 lexer for this input. */ pANTLR3_LEXER getAntlr3Lexer() { return d_lexer; } @@ -243,14 +241,6 @@ protected: virtual void setParser(Parser& parser); };/* class AntlrInput */ -inline std::string AntlrInput::getUnparsedText() { - const char *base = (const char *)d_antlr3InputStream->data; - const char *cur = (const char *)d_antlr3InputStream->nextChar; - - return std::string(cur, d_antlr3InputStream->sizeBuf - (cur - base)); -} - - inline std::string AntlrInput::tokenText(pANTLR3_COMMON_TOKEN token) { if( token->type == ANTLR3_TOKEN_EOF ) { return "<>"; diff --git a/src/parser/antlr_line_buffered_input.cpp b/src/parser/antlr_line_buffered_input.cpp index 22bbaf1db..e65125ad9 100644 --- a/src/parser/antlr_line_buffered_input.cpp +++ b/src/parser/antlr_line_buffered_input.cpp @@ -2,17 +2,30 @@ /*! \file antlr_line_buffered_input.cpp ** \verbatim ** Top contributors (to current version): - ** Morgan Deters, Tim King + ** Morgan Deters, Tim King, Andres Noetzli ** This file is part of the CVC4 project. ** Copyright (c) 2009-2016 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** - ** \brief [[ Add one-line brief description here ]] + ** \brief A custom ANTLR input stream that reads from the input stream lazily ** - ** [[ Add lengthier description here ]] - ** \todo document this file + ** WARNING: edits to this and related files should be done carefully due to the + *interaction with ANTLR internals. + ** + ** This overwrites the _LA and the consume functions of the ANTLR input stream + ** to use a LineBuffer instead of accessing a buffer. The lines are kept in + ** memory to make sure that existing tokens remain valid (tokens store pointers + ** to the corresponding input). We do not overwrite mark(), etc. + *because + ** we can use the line number and the position within that line to index into + *the + ** line buffer and the default markers already store and restore that + ** information. The line buffer guarantees that lines are consecutive in + ** memory, so ANTLR3_INPUT_STREAM::getLineBuf() should work as intended and + ** tokens themselves are consecutive in memory (we are assuming that tokens + ** are not split across multiple lines). **/ // We rely on the inclusion of #include in @@ -32,7 +45,8 @@ namespace CVC4 { namespace parser { -static pANTLR3_INPUT_STREAM antlr3CreateLineBufferedStream(std::istream& in); +static pANTLR3_INPUT_STREAM antlr3CreateLineBufferedStream( + std::istream& in, LineBuffer* line_buffer); static void setupInputStream(pANTLR3_INPUT_STREAM input) @@ -206,111 +220,133 @@ setupInputStream(pANTLR3_INPUT_STREAM input) #endif /* 0 */ } -static ANTLR3_UCHAR -myLA(pANTLR3_INT_STREAM is, ANTLR3_INT32 la) { - pANTLR3_INPUT_STREAM input; +static ANTLR3_UCHAR bufferedInputLA(pANTLR3_INT_STREAM is, ANTLR3_INT32 la) { + pANTLR3_INPUT_STREAM input = ((pANTLR3_INPUT_STREAM)(is->super)); + CVC4::parser::pANTLR3_LINE_BUFFERED_INPUT_STREAM line_buffered_input = + (CVC4::parser::pANTLR3_LINE_BUFFERED_INPUT_STREAM)input; + uint8_t* result = line_buffered_input->line_buffer->getPtrWithOffset( + input->line, input->charPositionInLine, la - 1); + return (result != NULL) ? *result : ANTLR3_CHARSTREAM_EOF; +} - input = ((pANTLR3_INPUT_STREAM) (is->super)); +static void bufferedInputRewind(pANTLR3_INT_STREAM is, ANTLR3_MARKER mark) { + // This function is essentially the same as the original + // antlr38BitRewind() but does not do any seek. The seek in the + // original function does not do anything and also calls + // antlr38BitSeek() instead of the overloaded seek() function, which + // leads to subtle bugs. + pANTLR3_LEX_STATE state; + pANTLR3_INPUT_STREAM input; + + input = ((pANTLR3_INPUT_STREAM)is->super); + + // Perform any clean up of the marks + input->istream->release(input->istream, mark); + + // Find the supplied mark state + state = (pANTLR3_LEX_STATE)input->markers->get(input->markers, + (ANTLR3_UINT32)(mark - 1)); + if (state == NULL) { + return; + } + + // Reset the information in the mark + input->charPositionInLine = state->charPositionInLine; + input->currentLine = state->currentLine; + input->line = state->line; + input->nextChar = state->nextChar; +} - Debug("pipe") << "LA" << std::endl; - if (( ((pANTLR3_UINT8)input->nextChar) + la - 1) >= (((pANTLR3_UINT8)input->data) + input->sizeBuf)) - { - std::istream& in = *((CVC4::parser::pANTLR3_LINE_BUFFERED_INPUT_STREAM)input)->in; - //MGD - // in.clear(); - if(!in) { - Debug("pipe") << "EOF" << std::endl; - return ANTLR3_CHARSTREAM_EOF; - } - Debug("pipe") << "READ" << std::endl; - if(input->data == NULL) { - Debug("pipe") << "ALLOC" << std::endl; - input->data = malloc(1024); - input->nextChar = input->data; - } else { - Debug("pipe") << "REALLOC" << std::endl; - size_t pos = (char*)input->nextChar - (char*)input->data; - input->data = realloc(input->data, input->sizeBuf + 1024); - input->nextChar = (char*)input->data + pos; - } - in.getline((((char*)input->data) + input->sizeBuf), 1024); - while(in.fail() && !in.eof()) { - Debug("pipe") << "input string too long, reallocating" << std::endl; - input->sizeBuf += strlen(((char*)input->data) + input->sizeBuf); - size_t pos = (char*)input->nextChar - (char*)input->data; - input->data = realloc(input->data, input->sizeBuf + 1024); - input->nextChar = (char*)input->data + pos; - in.clear(); - in.getline((((char*)input->data) + input->sizeBuf), 1024); - } - input->sizeBuf += strlen(((char*)input->data) + input->sizeBuf); - assert(*(((char*)input->data) + input->sizeBuf) == '\0'); - Debug("pipe") << "SIZEBUF now " << input->sizeBuf << std::endl; - *(((char*)input->data) + input->sizeBuf) = '\n'; - ++input->sizeBuf; +static void bufferedInputConsume(pANTLR3_INT_STREAM is) { + pANTLR3_INPUT_STREAM input = ((pANTLR3_INPUT_STREAM)(is->super)); + CVC4::parser::pANTLR3_LINE_BUFFERED_INPUT_STREAM line_buffered_input = + (CVC4::parser::pANTLR3_LINE_BUFFERED_INPUT_STREAM)input; + + uint8_t* current = line_buffered_input->line_buffer->getPtr( + input->line, input->charPositionInLine); + if (current != NULL) { + input->charPositionInLine++; + + if (*current == LineBuffer::NewLineChar) { + // Reset for start of a new line of input + input->line++; + input->charPositionInLine = 0; + input->currentLine = line_buffered_input->line_buffer->getPtr( + input->line, input->charPositionInLine); + Debug("pipe") << "-- newline!" << std::endl; } - Debug("pipe") << "READ POINTER[" << la << "] AT: >>" << std::string(((char*)input->nextChar), input->sizeBuf - (((char*)input->nextChar) - (char*)input->data)) << "<< returning '" << (char)(*((pANTLR3_UINT8)input->nextChar + la - 1)) << "' (" << (unsigned)(*((pANTLR3_UINT8)input->nextChar + la - 1)) << ")" << std::endl; - return (ANTLR3_UCHAR)(*((pANTLR3_UINT8)input->nextChar + la - 1)); + input->nextChar = line_buffered_input->line_buffer->getPtr( + input->line, input->charPositionInLine); + } } - -static void -myConsume(pANTLR3_INT_STREAM is) -{ - pANTLR3_INPUT_STREAM input; - - input = ((pANTLR3_INPUT_STREAM) (is->super)); - - Debug("pipe") << "consume! '" << *(char*)input->nextChar << "' (" << (unsigned)*(char*)input->nextChar << ")" << std::endl; - if ((pANTLR3_UINT8)(input->nextChar) < (((pANTLR3_UINT8)input->data) + input->sizeBuf)) - { - /* Indicate one more character in this line - */ - input->charPositionInLine++; - - if ((ANTLR3_UCHAR)(*((pANTLR3_UINT8)input->nextChar)) == input->newlineChar) - { - /* Reset for start of a new line of input - */ - input->line++; - input->charPositionInLine = 0; - input->currentLine = (void *)(((pANTLR3_UINT8)input->nextChar) + 1); - Debug("pipe") << "-- newline!" << std::endl; - } - - /* Increment to next character position - */ - input->nextChar = (void *)(((pANTLR3_UINT8)input->nextChar) + 1); - Debug("pipe") << "-- advance nextChar! looking at '" << *(char*)input->nextChar << "' (" << (unsigned)*(char*)input->nextChar << ")" << std::endl; - } else Debug("pipe") << "-- nothing!" << std::endl; +static void bufferedInputSeek(pANTLR3_INT_STREAM is, ANTLR3_MARKER seekPoint) { + // In contrast to the original antlr38BitSeek() function, we only + // support seeking forward (seeking backwards is only supported for + // rewinding in the original code, which we do not do when rewinding, + // so this should be fine). + pANTLR3_INPUT_STREAM input = ((pANTLR3_INPUT_STREAM)(is->super)); + pANTLR3_LINE_BUFFERED_INPUT_STREAM line_buffered_input = + (CVC4::parser::pANTLR3_LINE_BUFFERED_INPUT_STREAM)input; + + // Check that we are not seeking backwards. + assert(!line_buffered_input->line_buffer->isPtrBefore( + (uint8_t*)seekPoint, input->line, input->charPositionInLine)); + + ssize_t count = (ssize_t)(seekPoint - (ANTLR3_MARKER)(input->nextChar)); + while (count > 0) { + is->consume(is); + count--; + } } -pANTLR3_INPUT_STREAM -antlr3LineBufferedStreamNew(std::istream& in, ANTLR3_UINT32 encoding, pANTLR3_UINT8 name) -{ - pANTLR3_INPUT_STREAM input; - - if(!in) { - return NULL; - } +static ANTLR3_UINT32 bufferedInputSize(pANTLR3_INPUT_STREAM input) { + // Not supported for this type of stream + assert(false); + return 0; +} - // First order of business is to set up the stream and install the data pointer. - // Then we will work out the encoding and byte order and adjust the API functions that are installed for the - // default 8Bit stream accordingly. - // - input = antlr3CreateLineBufferedStream(in); - if (input == NULL) - { - return NULL; - } +static void bufferedInputSetNewLineChar(pANTLR3_INPUT_STREAM input, + ANTLR3_UINT32 newlineChar) { + // Not supported for this type of stream + assert(false); +} - // Size (in bytes) of the given 'string' - // - input->sizeBuf = 0; +static void bufferedInputSetUcaseLA(pANTLR3_INPUT_STREAM input, + ANTLR3_BOOLEAN flag) { + // Not supported for this type of stream + assert(false); +} - input->istream->_LA = myLA; - input->istream->consume = myConsume; +pANTLR3_INPUT_STREAM antlr3LineBufferedStreamNew(std::istream& in, + ANTLR3_UINT32 encoding, + pANTLR3_UINT8 name, + LineBuffer* line_buffer) { + pANTLR3_INPUT_STREAM input; + + if (!in) { + return NULL; + } + + // First order of business is to set up the stream and install the data + // pointer. + // Then we will work out the encoding and byte order and adjust the API + // functions that are installed for the + // default 8Bit stream accordingly. + // + input = antlr3CreateLineBufferedStream(in, line_buffer); + if (input == NULL) { + return NULL; + } + + input->istream->_LA = bufferedInputLA; + input->istream->consume = bufferedInputConsume; + input->istream->seek = bufferedInputSeek; + input->istream->rewind = bufferedInputRewind; + input->size = bufferedInputSize; + input->SetNewLineChar = bufferedInputSetNewLineChar; + input->setUcaseLA = bufferedInputSetUcaseLA; #ifndef CVC4_ANTLR3_OLD_INPUT_STREAM // We have the data in memory now so we can deal with it according to @@ -326,53 +362,60 @@ antlr3LineBufferedStreamNew(std::istream& in, ANTLR3_UINT32 encoding, pANTLR3_UI // Now we can set up the file name // - input->istream->streamName = input->strFactory->newStr8(input->strFactory, name); - input->fileName = input->istream->streamName; + input->istream->streamName = + input->strFactory->newStr8(input->strFactory, name); + input->fileName = input->istream->streamName; return input; } -static pANTLR3_INPUT_STREAM -antlr3CreateLineBufferedStream(std::istream& in) -{ - // Pointer to the input stream we are going to create - // - pANTLR3_INPUT_STREAM input; - - if (!in) - { - return NULL; - } - - // Allocate memory for the input stream structure - // - input = (pANTLR3_INPUT_STREAM) - ANTLR3_CALLOC(1, sizeof(ANTLR3_LINE_BUFFERED_INPUT_STREAM)); - - if (input == NULL) - { - return NULL; - } - - // Structure was allocated correctly, now we can install the pointer - // - input->data = malloc(1024); - input->isAllocated = ANTLR3_FALSE; - - ((pANTLR3_LINE_BUFFERED_INPUT_STREAM)input)->in = ∈ - // Call the common 8 bit input stream handler - // initialization. - // +static pANTLR3_INPUT_STREAM antlr3CreateLineBufferedStream( + std::istream& in, LineBuffer* line_buffer) { + // Pointer to the input stream we are going to create + // + pANTLR3_INPUT_STREAM input; + + if (!in) { + return NULL; + } + + // Allocate memory for the input stream structure + // + input = (pANTLR3_INPUT_STREAM)ANTLR3_CALLOC( + 1, sizeof(ANTLR3_LINE_BUFFERED_INPUT_STREAM)); + + if (input == NULL) { + return NULL; + } + + // Structure was allocated correctly, now we can install the pointer + // + input->data = NULL; + input->isAllocated = ANTLR3_FALSE; + + ((pANTLR3_LINE_BUFFERED_INPUT_STREAM)input)->in = ∈ + ((pANTLR3_LINE_BUFFERED_INPUT_STREAM)input)->line_buffer = line_buffer; +// Call the common 8 bit input stream handler +// initialization. +// #ifdef CVC4_ANTLR3_OLD_INPUT_STREAM - antlr3AsciiSetupStream(input, ANTLR3_CHARSTREAM); + antlr3AsciiSetupStream(input, ANTLR3_CHARSTREAM); #else /* CVC4_ANTLR3_OLD_INPUT_STREAM */ - antlr38BitSetupStream(input); - // In some libantlr3c 3.4-beta versions, this call is not included in the above. - // This is probably an erroneously-deleted line in the libantlr3c source since 3.2. - antlr3GenericSetupStream(input); + antlr38BitSetupStream(input); + // In some libantlr3c 3.4-beta versions, this call is not included in the + // above. + // This is probably an erroneously-deleted line in the libantlr3c source since + // 3.2. + antlr3GenericSetupStream(input); #endif /* CVC4_ANTLR3_OLD_INPUT_STREAM */ - return input; + input->sizeBuf = 0; + input->newlineChar = LineBuffer::NewLineChar; + input->charPositionInLine = 0; + input->line = 0; + input->nextChar = line_buffer->getPtr(0, 0); + input->currentLine = line_buffer->getPtr(0, 0); + return input; } }/* CVC4::parser namespace */ diff --git a/src/parser/antlr_line_buffered_input.h b/src/parser/antlr_line_buffered_input.h index 2c01a9bde..4535ffe6b 100644 --- a/src/parser/antlr_line_buffered_input.h +++ b/src/parser/antlr_line_buffered_input.h @@ -2,17 +2,22 @@ /*! \file antlr_line_buffered_input.h ** \verbatim ** Top contributors (to current version): - ** Morgan Deters, Tim King + ** Morgan Deters, Tim King, Andres Noetzli ** This file is part of the CVC4 project. ** Copyright (c) 2009-2016 by the authors listed in the file AUTHORS ** in the top-level source directory) and their institutional affiliations. ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** - ** \brief [[ Add one-line brief description here ]] + ** \brief A custom ANTLR input stream that reads from the input stream lazily ** - ** [[ Add lengthier description here ]] - ** \todo document this file + ** By default, ANTLR expects the whole input to be in a single, consecutive + ** buffer. When doing incremental solving and the input is coming from the + ** standard input, this is problematic because CVC4 might receive new input + ** based on the result of solving the existing input. + ** + ** This file overwrites the _LA and the consume functions of the input streamto + ** achieve that and stores the lines received so far in a LineBuffer. **/ // These headers should be the first two included. @@ -27,16 +32,21 @@ #include +#include "parser/line_buffer.h" + namespace CVC4 { namespace parser { typedef struct ANTLR3_LINE_BUFFERED_INPUT_STREAM { ANTLR3_INPUT_STREAM antlr; std::istream* in; + LineBuffer* line_buffer; } *pANTLR3_LINE_BUFFERED_INPUT_STREAM; -pANTLR3_INPUT_STREAM -antlr3LineBufferedStreamNew(std::istream& in, ANTLR3_UINT32 encoding, pANTLR3_UINT8 name); +pANTLR3_INPUT_STREAM antlr3LineBufferedStreamNew(std::istream& in, + ANTLR3_UINT32 encoding, + pANTLR3_UINT8 name, + LineBuffer* line_buffer); }/* CVC4::parser namespace */ }/* CVC4 namespace */ diff --git a/src/parser/input.h b/src/parser/input.h index 7dce369c5..6aec8a2b1 100644 --- a/src/parser/input.h +++ b/src/parser/input.h @@ -140,9 +140,6 @@ public: /** Destructor. Frees the input stream and closes the input. */ virtual ~Input(); - /** Retrieve the remaining text in this input. */ - virtual std::string getUnparsedText() = 0; - /** Get the language that this Input is reading. */ virtual InputLanguage getLanguage() const throw() = 0; diff --git a/src/parser/line_buffer.cpp b/src/parser/line_buffer.cpp new file mode 100644 index 000000000..cdb06193e --- /dev/null +++ b/src/parser/line_buffer.cpp @@ -0,0 +1,88 @@ +/********************* */ +/*! \file line_buffer.cpp + ** \verbatim + ** Top contributors (to current version): + ** Andres Noetzli + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2016 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief The LineBuffer class stores lines from an input stream + ** + ** For each line, the class allocates a separate buffer. + **/ + +#include "parser/line_buffer.h" + +#include +#include +#include +#include + +namespace CVC4 { +namespace parser { + +LineBuffer::LineBuffer(std::istream* stream) : d_stream(stream) {} + +LineBuffer::~LineBuffer() { + for (size_t i = 0; i < d_lines.size(); i++) { + delete[] d_lines[i]; + } +} + +uint8_t* LineBuffer::getPtr(size_t line, size_t pos_in_line) { + if (!readToLine(line)) { + return NULL; + } + assert(pos_in_line < d_sizes[line]); + return d_lines[line] + pos_in_line; +} + +uint8_t* LineBuffer::getPtrWithOffset(size_t line, size_t pos_in_line, + size_t offset) { + if (!readToLine(line)) { + return NULL; + } + if (pos_in_line + offset >= d_sizes[line]) { + return getPtrWithOffset(line + 1, 0, + offset - (d_sizes[line] - pos_in_line - 1)); + } + assert(pos_in_line + offset < d_sizes[line]); + return d_lines[line] + pos_in_line + offset; +} + +bool LineBuffer::isPtrBefore(uint8_t* ptr, size_t line, size_t pos_in_line) { + for (ssize_t i = line; i >= 0; i--) { + // NOTE: std::less is guaranteed to give consistent results when comparing + // pointers of different arrays (in contrast to built-in comparison + // operators). + uint8_t* end = d_lines[i] + ((i == line) ? pos_in_line : d_sizes[i]); + if (std::less()(d_lines[i] - 1, ptr) && + std::less()(ptr, end)) { + return true; + } + } + return false; +} + +bool LineBuffer::readToLine(size_t line) { + while (line >= d_lines.size()) { + if (!(*d_stream)) { + return false; + } + + std::string line; + std::getline(*d_stream, line); + uint8_t* segment = new uint8_t[line.size() + 1]; + std::memcpy(segment, line.c_str(), line.size()); + segment[line.size()] = LineBuffer::NewLineChar; + d_lines.push_back(segment); + d_sizes.push_back(line.size() + 1); + } + return true; +} + +} // namespace parser +} // namespace CVC4 diff --git a/src/parser/line_buffer.h b/src/parser/line_buffer.h new file mode 100644 index 000000000..d7ccb5a10 --- /dev/null +++ b/src/parser/line_buffer.h @@ -0,0 +1,76 @@ +/********************* */ +/*! \file line_buffer.h + ** \verbatim + ** Top contributors (to current version): + ** Andres Noetzli + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2016 by the authors listed in the file AUTHORS + ** in the top-level source directory) and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief The LineBuffer class stores lines from an input stream + ** + ** Each line is guaranteed to be consecutive in memory. The content in + ** the line buffer can be addressed using line number and the position + ** within the line. + **/ + +#include "cvc4parser_private.h" + +#ifndef __CVC4__PARSER__LINE_BUFFER_H +#define __CVC4__PARSER__LINE_BUFFER_H + +#include +#include +#include + +namespace CVC4 { +namespace parser { + +class LineBuffer { + public: + static const uint8_t NewLineChar = '\n'; + + LineBuffer(std::istream* stream); + ~LineBuffer(); + + /** + * Gets a pointer to a char at a specific line and position within that + * line. + */ + uint8_t* getPtr(size_t line, size_t pos_in_line); + + /** + * Gets a pointer to a char at an offset relative to a specific line and + * position within that line. + */ + uint8_t* getPtrWithOffset(size_t line, size_t pos_in_line, size_t offset); + + /** + * Tests whether a given pointer points to a location before a given + * line and position within that line. + */ + bool isPtrBefore(uint8_t* ptr, size_t line, size_t pos_in_line); + + private: + /** + * Reads lines up to a line number from the input if needed (it does + * nothing for the lines that were already read). Returns false if the end + * of the input stream has been reached and not all lines could be read. + */ + bool readToLine(size_t line); + + std::istream* d_stream; + // Each element in this vector corresponds to a line from the input stream. + // WARNING: not null-terminated. + std::vector d_lines; + // Each element in this vector corresponds to the length of a line from the + // input stream. + std::vector d_sizes; +}; + +} // namespace parser +} // namespace CVC4 + +#endif /* __CVC4__PARSER__LINE_BUFFER_H */ -- 2.30.2