+++ /dev/null
-from core import *
-from adapters import *
-from macros import *
-
-
-#===============================================================================
-# exceptions
-#===============================================================================
-class QuotedStringError(ConstructError):
- __slots__ = []
-
-
-#===============================================================================
-# constructs
-#===============================================================================
-class QuotedString(Construct):
- r"""
- A quoted string (begins with an opening-quote, terminated by a
- closing-quote, which may be escaped by an escape character)
-
- Parameters:
- * name - the name of the field
- * start_quote - the opening quote character. default is '"'
- * end_quote - the closing quote character. default is '"'
- * esc_char - the escape character, or None to disable escaping. defualt
- is "\" (backslash)
- * encoding - the character encoding (e.g., "utf8"), or None to return
- raw bytes. defualt is None.
- * allow_eof - whether to allow EOF before the closing quote is matched.
- if False, an exception will be raised when EOF is reached by the closing
- quote is missing. default is False.
-
- Example:
- QuotedString("foo", start_quote = "{", end_quote = "}", esc_char = None)
- """
- __slots__ = [
- "start_quote", "end_quote", "char", "esc_char", "encoding",
- "allow_eof"
- ]
- def __init__(self, name, start_quote = '"', end_quote = None,
- esc_char = '\\', encoding = None, allow_eof = False):
- Construct.__init__(self, name)
- if end_quote is None:
- end_quote = start_quote
- self.start_quote = Literal(start_quote)
- self.char = Char("char")
- self.end_quote = end_quote
- self.esc_char = esc_char
- self.encoding = encoding
- self.allow_eof = allow_eof
-
- def _parse(self, stream, context):
- self.start_quote._parse(stream, context)
- text = []
- escaped = False
- try:
- while True:
- ch = self.char._parse(stream, context)
- if ch == self.esc_char:
- if escaped:
- text.append(ch)
- escaped = False
- else:
- escaped = True
- elif ch == self.end_quote and not escaped:
- break
- else:
- text.append(ch)
- escaped = False
- except FieldError:
- if not self.allow_eof:
- raise
- text = "".join(text)
- if self.encoding is not None:
- text = text.decode(self.encoding)
- return text
-
- def _build(self, obj, stream, context):
- self.start_quote._build(None, stream, context)
- if self.encoding:
- obj = obj.encode(self.encoding)
- for ch in obj:
- if ch == self.esc_char:
- self.char._build(self.esc_char, stream, context)
- elif ch == self.end_quote:
- if self.esc_char is None:
- raise QuotedStringError("found ending quote in data, "
- "but no escape char defined", ch)
- else:
- self.char._build(self.esc_char, stream, context)
- self.char._build(ch, stream, context)
- self.char._build(self.end_quote, stream, context)
-
- def _sizeof(self, context):
- raise SizeofError("can't calculate size")
-
-
-#===============================================================================
-# macros
-#===============================================================================
-class WhitespaceAdapter(Adapter):
- """
- Adapter for whitespace sequences; do not use directly.
- See Whitespace.
-
- Parameters:
- * subcon - the subcon to adapt
- * build_char - the character used for encoding (building)
- """
- __slots__ = ["build_char"]
- def __init__(self, subcon, build_char):
- Adapter.__init__(self, subcon)
- self.build_char = build_char
- def _encode(self, obj, context):
- return self.build_char
- def _decode(self, obj, context):
- return None
-
-def Whitespace(charset = " \t", optional = True):
- """whitespace (space that is ignored between tokens). when building, the
- first character of the charset is used.
- * charset - the set of characters that are considered whitespace. default
- is space and tab.
- * optional - whether or not whitespace is optional. default is True.
- """
- con = CharOf(None, charset)
- if optional:
- con = OptionalGreedyRange(con)
- else:
- con = GreedyRange(con)
- return WhitespaceAdapter(con, build_char = charset[0])
-
-def Literal(text):
- """matches a literal string in the text
- * text - the text (string) to match
- """
- return ConstAdapter(Field(None, len(text)), text)
-
-def Char(name):
- """a one-byte character"""
- return Field(name, 1)
-
-def CharOf(name, charset):
- """matches only characters of a given charset
- * name - the name of the field
- * charset - the set of valid characters
- """
- return OneOf(Char(name), charset)
-
-def CharNoneOf(name, charset):
- """matches only characters that do not belong to a given charset
- * name - the name of the field
- * charset - the set of invalid characters
- """
- return NoneOf(Char(name), charset)
-
-def Alpha(name):
- """a letter character (A-Z, a-z)"""
- return CharOf(name, set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'))
-
-def Digit(name):
- """a digit character (0-9)"""
- return CharOf(name, set('0123456789'))
-
-def AlphaDigit(name):
- """an alphanumeric character (A-Z, a-z, 0-9)"""
- return CharOf(name, set("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"))
-
-def BinDigit(name):
- """a binary digit (0-1)"""
- return CharOf(name, set('01'))
-
-def HexDigit(name):
- """a hexadecimal digit (0-9, A-F, a-f)"""
- return CharOf(name, set('0123456789abcdefABCDEF'))
-
-def Word(name):
- """a sequence of letters"""
- return StringAdapter(GreedyRange(Alpha(name)))
-
-class TextualIntAdapter(Adapter):
- """
- Adapter for textual integers
-
- Parameters:
- * subcon - the subcon to adapt
- * radix - the base of the integer (decimal, hexadecimal, binary, ...)
- * digits - the sequence of digits of that radix
- """
- __slots__ = ["radix", "digits"]
- def __init__(self, subcon, radix = 10, digits = "0123456789abcdef"):
- Adapter.__init__(self, subcon)
- if radix > len(digits):
- raise ValueError("not enough digits for radix %d" % (radix,))
- self.radix = radix
- self.digits = digits
- def _encode(self, obj, context):
- chars = []
- if obj < 0:
- chars.append("-")
- n = -obj
- else:
- n = obj
- r = self.radix
- digs = self.digits
- while n > 0:
- n, d = divmod(n, r)
- chars.append(digs[d])
- # obj2 = "".join(reversed(chars))
- # filler = digs[0] * (self._sizeof(context) - len(obj2))
- # return filler + obj2
- return "".join(reversed(chars))
- def _decode(self, obj, context):
- return int("".join(obj), self.radix)
-
-def DecNumber(name):
- """decimal number"""
- return TextualIntAdapter(GreedyRange(Digit(name)))
-
-def BinNumber(name):
- """binary number"""
- return TextualIntAdapter(GreedyRange(Digit(name)), 2)
-
-def HexNumber(name):
- """hexadecimal number"""
- return TextualIntAdapter(GreedyRange(Digit(name)), 16)
-
-def StringUpto(name, charset):
- """a string that stretches up to a terminator, or EOF. unlike CString,
- StringUpto will no consume the terminator char.
- * name - the name of the field
- * charset - the set of terminator characters"""
- return StringAdapter(OptionalGreedyRange(CharNoneOf(name, charset)))
-
-def Line(name):
- r"""a textual line (up to "\n")"""
- return StringUpto(name, "\n")
-
-class IdentifierAdapter(Adapter):
- """
- Adapter for programmatic identifiers
-
- Parameters:
- * subcon - the subcon to adapt
- """
- def _encode(self, obj, context):
- return obj[0], obj[1:]
- def _decode(self, obj, context):
- return obj[0] + "".join(obj[1])
-
-def Identifier(name,
- headset = set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_"),
- tailset = set("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_")
- ):
- """a programmatic identifier (symbol). must start with a char of headset,
- followed by a sequence of tailset characters
- * name - the name of the field
- * headset - charset for the first character. default is A-Z, a-z, and _
- * tailset - charset for the tail. default is A-Z, a-z, 0-9 and _
- """
- return IdentifierAdapter(
- Sequence(name,
- CharOf("head", headset),
- OptionalGreedyRange(CharOf("tail", tailset)),
- )
- )
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-