From 4afa6c4c3e7bb4ce0aea876120bf997e42353f67 Mon Sep 17 00:00:00 2001 From: Andreas Hansson Date: Mon, 7 Jan 2013 13:05:37 -0500 Subject: [PATCH] base: Add wrapped protobuf input stream This patch adds support for inputting protobuf messages through a ProtoInputStream which hides the internal streams used by the library. The stream is created based on the name of an input file and optionally includes decompression using gzip. The input stream will start by getting a magic number from the file, and also verify that it matches with the expected value. Once opened, messages can be read incrementally from the stream, returning true/false until an error occurs or the end of the file is reached. --- src/proto/protoio.cc | 105 +++++++++++++++++++++++++++++++++++++++++-- src/proto/protoio.hh | 103 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 202 insertions(+), 6 deletions(-) diff --git a/src/proto/protoio.cc b/src/proto/protoio.cc index 852ba56fa..ae1e2d8c2 100644 --- a/src/proto/protoio.cc +++ b/src/proto/protoio.cc @@ -62,10 +62,8 @@ ProtoOutputStream::ProtoOutputStream(const string& filename) : codedStream = new io::CodedOutputStream(zeroCopyStream); } - // Use the ASCII characters gem5 as our magic number and write it - // to the file - const uint32_t magic_number = 0x356d6567; - codedStream->WriteLittleEndian32(magic_number); + // Write the magic number to the file + codedStream->WriteLittleEndian32(magicNumber); // Note that each type of stream (packet, instruction etc) should // add its own header and perform the appropriate checks @@ -91,3 +89,102 @@ ProtoOutputStream::write(const Message& msg) if (!msg.SerializeToCodedStream(codedStream)) panic("Unable to write message to coded stream\n"); } + +ProtoInputStream::ProtoInputStream(const string& filename) : + fileStream(filename, ios::in | ios::binary), fileName(filename), + useGzip(false), + zeroCopyStream(NULL), gzipStream(NULL), codedStream(NULL) +{ + if (!fileStream.good()) + panic("Could not open %s for reading\n", filename); + + // check the magic number to see if this is a gzip stream + unsigned char bytes[2]; + fileStream.read((char*) bytes, 2); + useGzip = fileStream.good() && bytes[0] == 0x1f && bytes[1] == 0x8b; + + // seek to the start of the input file and clear any flags + fileStream.clear(); + fileStream.seekg(0, ifstream::beg); + + createStreams(); +} + +void +ProtoInputStream::createStreams() +{ + // All streams should be NULL at this point + assert(zeroCopyStream == NULL && gzipStream == NULL && + codedStream == NULL); + + // Wrap the input file in a zero copy stream, that in turn is + // wrapped in a gzip stream if the filename ends with .gz. The + // latter stream is in turn wrapped in a coded stream + zeroCopyStream = new io::IstreamInputStream(&fileStream); + if (useGzip) { + gzipStream = new io::GzipInputStream(zeroCopyStream); + codedStream = new io::CodedInputStream(gzipStream); + } else { + codedStream = new io::CodedInputStream(zeroCopyStream); + } + + uint32_t magic_check; + if (!codedStream->ReadLittleEndian32(&magic_check) || + magic_check != magicNumber) + panic("Input file %s is not a valid gem5 proto format.\n", + fileName); +} + +void +ProtoInputStream::destroyStreams() +{ + delete codedStream; + codedStream = NULL; + // As the compression is optional, see if the stream exists + if (gzipStream != NULL) { + delete gzipStream; + gzipStream = NULL; + } + delete zeroCopyStream; + zeroCopyStream = NULL; +} + + +ProtoInputStream::~ProtoInputStream() +{ + destroyStreams(); + fileStream.close(); +} + + +void +ProtoInputStream::reset() +{ + destroyStreams(); + // seek to the start of the input file and clear any flags + fileStream.clear(); + fileStream.seekg(0, ifstream::beg); + createStreams(); +} + +bool +ProtoInputStream::read(Message& msg) +{ + // Read a message from the stream by getting the size, using it as + // a limit when parsing the message, then popping the limit again + uint32_t size; + if (codedStream->ReadVarint32(&size)) { + io::CodedInputStream::Limit limit = codedStream->PushLimit(size); + if (msg.ParseFromCodedStream(codedStream)) { + codedStream->PopLimit(limit); + // All went well, the message is parsed and the limit is + // popped again + return true; + } else { + panic("Unable to read message from coded stream %s\n", + fileName); + } + } + + return false; +} diff --git a/src/proto/protoio.hh b/src/proto/protoio.hh index bb11bc7e9..d5c6a4bac 100644 --- a/src/proto/protoio.hh +++ b/src/proto/protoio.hh @@ -40,7 +40,7 @@ /** * @file - * Declaration of a wrapper for protobuf output streams. + * Declaration of a wrapper for protobuf output streams and input streams. */ #ifndef __PROTO_PROTOIO_HH__ @@ -53,6 +53,34 @@ #include +/** + * A ProtoStream provides the shared functionality of the input and + * output streams. At the moment this is limited to magic number. + */ +class ProtoStream +{ + + protected: + + /// Use the ASCII characters gem5 as our magic number + static const uint32_t magicNumber = 0x356d6567; + + /** + * Create a ProtoStream. + */ + ProtoStream() {} + + private: + + /** + * Hide the copy constructor and assignment operator. + * @{ + */ + ProtoStream(const ProtoStream&); + ProtoStream& operator=(const ProtoStream&); + /** @} */ +}; + /** * A ProtoOutputStream wraps a coded stream, potentially with * compression, based on looking at the file name. Writing to the @@ -61,7 +89,7 @@ * is made possible by encoding the length of each message in the * stream. */ -class ProtoOutputStream +class ProtoOutputStream : public ProtoStream { public: @@ -104,4 +132,75 @@ class ProtoOutputStream }; +/** + * A ProtoInputStream wraps a coded stream, potentially with + * decompression, based on looking at the file name. Reading from the + * stream is done on a per-message basis to avoid having to deal with + * huge data structures. The latter assumes the length of each message + * is encoded in the stream when it is written. + */ +class ProtoInputStream : public ProtoStream +{ + + public: + + /** + * Create an input stream for a given file name. If the filename + * ends with .gz then the file will be decompressed accordingly. + * + * @param filename Path to the file to read from + */ + ProtoInputStream(const std::string& filename); + + /** + * Destruct the input stream, and also close the underlying file + * streams and coded streams. + */ + ~ProtoInputStream(); + + /** + * Read a message from the stream. + * + * @param msg Message read from the stream + * @param return True if a message was read, false if reading fails + */ + bool read(google::protobuf::Message& msg); + + /** + * Reset the input stream and seek to the beginning of the file. + */ + void reset(); + + private: + + /** + * Create the internal streams that are wrapping the input file. + */ + void createStreams(); + + /** + * Destroy the internal streams that are wrapping the input file. + */ + void destroyStreams(); + + /// Underlying file input stream + std::ifstream fileStream; + + /// Hold on to the file name for debug messages + const std::string fileName; + + /// Boolean flag to remember whether we use gzip or not + bool useGzip; + + /// Zero Copy stream wrapping the STL input stream + google::protobuf::io::IstreamInputStream* zeroCopyStream; + + /// Optional Gzip stream to wrap the Zero Copy stream + google::protobuf::io::GzipInputStream* gzipStream; + + /// Top-level coded stream that messages are read from + google::protobuf::io::CodedInputStream* codedStream; + +}; + #endif //__PROTO_PROTOIO_HH -- 2.30.2