From bd1a09f92c4ecdffd45cc01b3847ddea4fb90701 Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Thu, 26 Jan 2012 06:49:19 +0200 Subject: [PATCH] imported new version of construct with Python 3 compatibility --- elftools/construct/__init__.py | 139 ++-- elftools/construct/adapters.py | 98 ++- elftools/construct/core.py | 597 +++++++++++------- elftools/construct/debug.py | 52 +- elftools/construct/lib/__init__.py | 15 +- elftools/construct/lib/binary.py | 93 ++- elftools/construct/lib/bitstream.py | 39 +- elftools/construct/lib/container.py | 271 +++----- elftools/construct/lib/hex.py | 46 +- elftools/construct/macros.py | 448 ++++++++----- examples/elf_relocations.py | 7 +- examples/reference_output/elf_relocations.out | 9 +- 12 files changed, 992 insertions(+), 822 deletions(-) diff --git a/elftools/construct/__init__.py b/elftools/construct/__init__.py index 4814bcc..de335ae 100644 --- a/elftools/construct/__init__.py +++ b/elftools/construct/__init__.py @@ -1,43 +1,50 @@ """ -Construct 2.00 -- parsing made even more fun (and faster) + #### #### + ## #### ## ## #### ###### ##### ## ## #### ###### ## ## + ## ## ## ### ## ## ## ## ## ## ## ## ## #### ## + ## ## ## ###### ### ## ##### ## ## ## ## ## + ## ## ## ## ### ## ## ## ## ## ## ## ## ## + #### #### ## ## #### ## ## ## ##### #### ## ###### + + Parsing made even more fun (and faster too) Homepage: -http://construct.wikispaces.com + http://construct.wikispaces.com (including online tutorial) Typical usage: ->>> from construct import * - -Example: ->>> from construct import * ->>> ->>> s = Struct("foo", -... UBInt8("a"), -... UBInt16("b"), -... ) ->>> ->>> s.parse("\x01\x02\x03") -Container(a = 1, b = 515) ->>> print s.parse("\x01\x02\x03") -Container: - a = 1 - b = 515 ->>> s.build(Container(a = 1, b = 0x0203)) -"\x01\x02\x03" + >>> from construct import * + +Hands-on example: + >>> from construct import * + >>> s = Struct("foo", + ... UBInt8("a"), + ... UBInt16("b"), + ... ) + >>> s.parse("\\x01\\x02\\x03") + Container(a = 1, b = 515) + >>> print s.parse("\\x01\\x02\\x03") + Container: + a = 1 + b = 515 + >>> s.build(Container(a = 1, b = 0x0203)) + "\\x01\\x02\\x03" """ -from core import * -from adapters import * -from macros import * -from debug import Probe, Debugger + +from .core import * +from .adapters import * +from .macros import * +from .debug import Probe, Debugger #=============================================================================== -# meta data +# Metadata #=============================================================================== __author__ = "tomer filiba (tomerfiliba [at] gmail.com)" -__version__ = "2.00" +__maintainer__ = "Corbin Simpson " +__version__ = "2.06" #=============================================================================== -# shorthands +# Shorthand expressions #=============================================================================== Bits = BitField Byte = UBInt8 @@ -47,35 +54,57 @@ Tunnel = TunnelAdapter Embed = Embedded #=============================================================================== -# backward compatibility with RC1 +# Deprecated names +# Next scheduled name cleanout: 2.1 #=============================================================================== -MetaField = Field -MetaBytes = Field -GreedyRepeater = GreedyRange -OptionalGreedyRepeater = OptionalGreedyRange -Repeater = Array -StrictRepeater = Array -MetaRepeater = Array -OneOfValidator = OneOf -NoneOfValidator = NoneOf +import functools, warnings + +def deprecated(f): + @functools.wraps(f) + def wrapper(*args, **kwargs): + warnings.warn( + "This name is deprecated, use %s instead" % f.__name__, + DeprecationWarning, stacklevel=2) + return f(*args, **kwargs) + return wrapper + +MetaBytes = deprecated(MetaField) +GreedyRepeater = deprecated(GreedyRange) +OptionalGreedyRepeater = deprecated(OptionalGreedyRange) +Repeater = deprecated(Range) +StrictRepeater = deprecated(Array) +MetaRepeater = deprecated(Array) +OneOfValidator = deprecated(OneOf) +NoneOfValidator = deprecated(NoneOf) #=============================================================================== -# don't want to leek these out... +# exposed names #=============================================================================== -del encode_bin, decode_bin, int_to_bin, bin_to_int, swap_bytes -del Packer, StringIO -del HexString, LazyContainer, AttrDict - - - - - - - - - - - - - - +__all__ = [ + 'AdaptationError', 'Adapter', 'Alias', 'Aligned', 'AlignedStruct', + 'Anchor', 'Array', 'ArrayError', 'BFloat32', 'BFloat64', 'Bit', 'BitField', + 'BitIntegerAdapter', 'BitIntegerError', 'BitStruct', 'Bits', 'Bitwise', + 'Buffered', 'Byte', 'Bytes', 'CString', 'CStringAdapter', 'Const', + 'ConstAdapter', 'ConstError', 'Construct', 'ConstructError', 'Container', + 'Debugger', 'Embed', 'Embedded', 'EmbeddedBitStruct', 'Enum', 'ExprAdapter', + 'Field', 'FieldError', 'Flag', 'FlagsAdapter', 'FlagsContainer', + 'FlagsEnum', 'FormatField', 'GreedyRange', 'GreedyRepeater', + 'HexDumpAdapter', 'If', 'IfThenElse', 'IndexingAdapter', 'LFloat32', + 'LFloat64', 'LazyBound', 'LengthValueAdapter', 'ListContainer', + 'MappingAdapter', 'MappingError', 'MetaArray', 'MetaBytes', 'MetaField', + 'MetaRepeater', 'NFloat32', 'NFloat64', 'Nibble', 'NoneOf', + 'NoneOfValidator', 'Octet', 'OnDemand', 'OnDemandPointer', 'OneOf', + 'OneOfValidator', 'OpenRange', 'Optional', 'OptionalGreedyRange', + 'OptionalGreedyRepeater', 'PaddedStringAdapter', 'Padding', + 'PaddingAdapter', 'PaddingError', 'PascalString', 'Pass', 'Peek', + 'Pointer', 'PrefixedArray', 'Probe', 'Range', 'RangeError', 'Reconfig', + 'Rename', 'RepeatUntil', 'Repeater', 'Restream', 'SBInt16', 'SBInt32', + 'SBInt64', 'SBInt8', 'SLInt16', 'SLInt32', 'SLInt64', 'SLInt8', 'SNInt16', + 'SNInt32', 'SNInt64', 'SNInt8', 'Select', 'SelectError', 'Sequence', + 'SizeofError', 'SlicingAdapter', 'StaticField', 'StrictRepeater', 'String', + 'StringAdapter', 'Struct', 'Subconstruct', 'Switch', 'SwitchError', + 'SymmetricMapping', 'Terminator', 'TerminatorError', 'Tunnel', + 'TunnelAdapter', 'UBInt16', 'UBInt32', 'UBInt64', 'UBInt8', 'ULInt16', + 'ULInt32', 'ULInt64', 'ULInt8', 'UNInt16', 'UNInt32', 'UNInt64', 'UNInt8', + 'Union', 'ValidationError', 'Validator', 'Value', "Magic", +] diff --git a/elftools/construct/adapters.py b/elftools/construct/adapters.py index 182e108..26e5c67 100644 --- a/elftools/construct/adapters.py +++ b/elftools/construct/adapters.py @@ -1,6 +1,7 @@ -from core import Adapter, AdaptationError, Pass -from lib import int_to_bin, bin_to_int, swap_bytes, StringIO -from lib import FlagsContainer, HexString +from .core import Adapter, AdaptationError, Pass +from .lib import int_to_bin, bin_to_int, swap_bytes +from .lib import FlagsContainer, HexString +from .lib.py3compat import BytesIO, decodebytes #=============================================================================== @@ -95,7 +96,7 @@ class MappingAdapter(Adapter): return self.decoding[obj] except (KeyError, TypeError): if self.decdefault is NotImplemented: - raise MappingError("no decoding mapping for %r [%s]" % ( + raise MappingError("no decoding mapping for %r [%s]" % ( obj, self.subcon.name)) if self.decdefault is Pass: return obj @@ -117,13 +118,13 @@ class FlagsAdapter(Adapter): self.flags = flags def _encode(self, obj, context): flags = 0 - for name, value in self.flags.iteritems(): + for name, value in self.flags.items(): if getattr(obj, name, False): flags |= value return flags def _decode(self, obj, context): obj2 = FlagsContainer() - for name, value in self.flags.iteritems(): + for name, value in self.flags.items(): setattr(obj2, name, bool(obj & value)) return obj2 @@ -147,7 +148,6 @@ class StringAdapter(Adapter): obj = obj.encode(self.encoding) return obj def _decode(self, obj, context): - obj = "".join(obj) if self.encoding: obj = obj.decode(self.encoding) return obj @@ -228,13 +228,13 @@ class CStringAdapter(StringAdapter): encoding. """ __slots__ = ["terminators"] - def __init__(self, subcon, terminators = "\x00", encoding = None): + def __init__(self, subcon, terminators = b"\x00", encoding = None): StringAdapter.__init__(self, subcon, encoding = encoding) self.terminators = terminators def _encode(self, obj, context): - return StringAdapter._encode(self, obj, context) + self.terminators[0] + return StringAdapter._encode(self, obj, context) + self.terminators[0:1] def _decode(self, obj, context): - return StringAdapter._decode(self, obj[:-1], context) + return StringAdapter._decode(self, b''.join(obj[:-1]), context) class TunnelAdapter(Adapter): """ @@ -263,9 +263,9 @@ class TunnelAdapter(Adapter): Adapter.__init__(self, subcon) self.inner_subcon = inner_subcon def _decode(self, obj, context): - return self.inner_subcon._parse(StringIO(obj), context) + return self.inner_subcon._parse(BytesIO(obj), context) def _encode(self, obj, context): - stream = StringIO() + stream = BytesIO() self.inner_subcon._build(obj, stream, context) return stream.getvalue() @@ -380,7 +380,7 @@ class PaddingAdapter(Adapter): Parameters: * subcon - the subcon to pad - * pattern - the padding pattern (character). default is "\x00") + * pattern - the padding pattern (character). default is "\x00" * strict - whether or not to verify, during parsing, that the given padding matches the padding pattern. default is False (unstrict) """ @@ -422,11 +422,24 @@ class Validator(Adapter): class OneOf(Validator): """ - Validates that the value is one of the listed values - - Parameters: - * subcon - the subcon to validate - * valids - a set of valid values + Validates that the object is one of the listed values. + + :param ``Construct`` subcon: object to validate + :param iterable valids: a set of valid values + + >>> OneOf(UBInt8("foo"), [4,5,6,7]).parse("\\x05") + 5 + >>> OneOf(UBInt8("foo"), [4,5,6,7]).parse("\\x08") + Traceback (most recent call last): + ... + construct.core.ValidationError: ('invalid object', 8) + >>> + >>> OneOf(UBInt8("foo"), [4,5,6,7]).build(5) + '\\x05' + >>> OneOf(UBInt8("foo"), [4,5,6,7]).build(9) + Traceback (most recent call last): + ... + construct.core.ValidationError: ('invalid object', 9) """ __slots__ = ["valids"] def __init__(self, subcon, valids): @@ -437,11 +450,17 @@ class OneOf(Validator): class NoneOf(Validator): """ - Validates that the value is none of the listed values - - Parameters: - * subcon - the subcon to validate - * invalids - a set of invalid values + Validates that the object is none of the listed values. + + :param ``Construct`` subcon: object to validate + :param iterable invalids: a set of invalid values + + >>> NoneOf(UBInt8("foo"), [4,5,6,7]).parse("\\x08") + 8 + >>> NoneOf(UBInt8("foo"), [4,5,6,7]).parse("\\x06") + Traceback (most recent call last): + ... + construct.core.ValidationError: ('invalid object', 6) """ __slots__ = ["invalids"] def __init__(self, subcon, invalids): @@ -449,36 +468,3 @@ class NoneOf(Validator): self.invalids = invalids def _validate(self, obj, context): return obj not in self.invalids - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/elftools/construct/core.py b/elftools/construct/core.py index 773d830..214c58f 100644 --- a/elftools/construct/core.py +++ b/elftools/construct/core.py @@ -1,5 +1,7 @@ -from lib import StringIO, Packer -from lib import Container, ListContainer, AttrDict, LazyContainer +from struct import Struct as Packer + +from .lib.py3compat import BytesIO, advance_iterator, bchr +from .lib import Container, ListContainer, LazyContainer #=============================================================================== @@ -29,53 +31,64 @@ class TerminatorError(ConstructError): #=============================================================================== class Construct(object): """ - The mother of all constructs! - - User API: - * parse(buf) - parses an in-memory buffer (usually a string) - * parse_stream(stream) - parses a stream (in-memory, file, pipe, ...) - * build(obj) - builds the object into an in-memory buffer (a string) - * build_stream(obj, stream) - builds the object into the given stream - * sizeof(context) - calculates the size of the construct, if possible, - based on the context - - Overriable methods for subclassing: - * _parse(stream, context) - low-level parse from stream - * _build(obj, stream, context) - low-level build to stream - * _sizeof(context) - low-level compute size - - Flags API: - * _set_flag(flag) - sets the given flag/flags - * _clear_flag(flag) - clears the given flag/flags - * _inherit_flags(*subcons) - inherits the flag of subcons - * _is_flag(flag) - is the flag set? (predicate) - - Overridable methods for the copy-API: - * __getstate__() - returns a dict of the attributes of self - * __setstate__(attrs) - sets the attrs to self - - Attributes: - All constructs have a name and flags. The name is used for naming - struct-members and context dicts. Note that the name must be a string or - None (if the name is not needed). A single underscore ("_") is a reserved - name, and so are names starting with a less-than character ("<"). The name - should be descriptive, short, and valid as a python identifier (although - these rules are not enforced). - + The mother of all constructs. + + This object is generally not directly instantiated, and it does not + directly implement parsing and building, so it is largely only of interest + to subclass implementors. + + The external user API: + + * parse() + * parse_stream() + * build() + * build_stream() + * sizeof() + + Subclass authors should not override the external methods. Instead, + another API is available: + + * _parse() + * _build() + * _sizeof() + + There is also a flag API: + + * _set_flag() + * _clear_flag() + * _inherit_flags() + * _is_flag() + + And stateful copying: + + * __getstate__() + * __setstate__() + + Attributes and Inheritance + ========================== + + All constructs have a name and flags. The name is used for naming struct + members and context dictionaries. Note that the name can either be a + string, or None if the name is not needed. A single underscore ("_") is a + reserved name, and so are names starting with a less-than character ("<"). + The name should be descriptive, short, and valid as a Python identifier, + although these rules are not enforced. + The flags specify additional behavioral information about this construct. - The flags are used by enclosing constructs to determine a proper course - of action. Usually, flags are "inherited", i.e., an enclosing construct - inherits the flags of its subconstruct. The enclosing construct may - set new flags or clear existing ones, as necessary. - - For example, if FLAG_COPY_CONTEXT is set, repeaters will pass a copy of + Flags are used by enclosing constructs to determine a proper course of + action. Flags are inherited by default, from inner subconstructs to outer + constructs. The enclosing construct may set new flags or clear existing + ones, as necessary. + + For example, if FLAG_COPY_CONTEXT is set, repeaters will pass a copy of the context for each iteration, which is necessary for OnDemand parsing. """ + FLAG_COPY_CONTEXT = 0x0001 FLAG_DYNAMIC = 0x0002 FLAG_EMBED = 0x0004 FLAG_NESTING = 0x0008 - + __slots__ = ["name", "conflags"] def __init__(self, name, flags = 0): if name is not None: @@ -85,20 +98,50 @@ class Construct(object): raise ValueError("reserved name", name) self.name = name self.conflags = flags + def __repr__(self): return "%s(%r)" % (self.__class__.__name__, self.name) - + def _set_flag(self, flag): + """ + Set the given flag or flags. + + :param int flag: flag to set; may be OR'd combination of flags + """ + self.conflags |= flag + def _clear_flag(self, flag): + """ + Clear the given flag or flags. + + :param int flag: flag to clear; may be OR'd combination of flags + """ + self.conflags &= ~flag + def _inherit_flags(self, *subcons): + """ + Pull flags from subconstructs. + """ + for sc in subcons: self._set_flag(sc.conflags) + def _is_flag(self, flag): + """ + Check whether a given flag is set. + + :param int flag: flag to check + """ + return bool(self.conflags & flag) - + def __getstate__(self): + """ + Obtain a dictionary representing this construct's state. + """ + attrs = {} if hasattr(self, "__dict__"): attrs.update(self.__dict__) @@ -112,49 +155,101 @@ class Construct(object): if hasattr(self, name): attrs[name] = getattr(self, name) return attrs + def __setstate__(self, attrs): - for name, value in attrs.iteritems(): + """ + Set this construct's state to a given state. + """ + for name, value in attrs.items(): setattr(self, name, value) + def __copy__(self): """returns a copy of this construct""" self2 = object.__new__(self.__class__) self2.__setstate__(self.__getstate__()) return self2 - + def parse(self, data): - """parses data given as a buffer or a string (in-memory)""" - return self.parse_stream(StringIO(data)) + """ + Parse an in-memory buffer. + + Strings, buffers, memoryviews, and other complete buffers can be + parsed with this method. + """ + + return self.parse_stream(BytesIO(data)) + def parse_stream(self, stream): - """parses data read directly from a stream""" - return self._parse(stream, AttrDict()) + """ + Parse a stream. + + Files, pipes, sockets, and other streaming sources of data are handled + by this method. + """ + + return self._parse(stream, Container()) + def _parse(self, stream, context): + """ + Override me in your subclass. + """ + raise NotImplementedError() - + def build(self, obj): - """builds an object in a string (in memory)""" - stream = StringIO() + """ + Build an object in memory. + """ + stream = BytesIO() self.build_stream(obj, stream) return stream.getvalue() + def build_stream(self, obj, stream): - """builds an object into a stream""" - self._build(obj, stream, AttrDict()) + """ + Build an object directly into a stream. + """ + self._build(obj, stream, Container()) + def _build(self, obj, stream, context): + """ + Override me in your subclass. + """ + raise NotImplementedError() - - def sizeof(self, context = None): - """calculates the size of the construct (if possible) using the - given context""" + + def sizeof(self, context=None): + """ + Calculate the size of this object, optionally using a context. + + Some constructs have no fixed size and can only know their size for a + given hunk of data; these constructs will raise an error if they are + not passed a context. + + :param ``Container`` context: contextual data + + :returns: int of the length of this construct + :raises SizeofError: the size could not be determined + """ + if context is None: - context = AttrDict() - return self._sizeof(context) + context = Container() + try: + return self._sizeof(context) + except Exception as e: + raise SizeofError(e) + def _sizeof(self, context): - raise SizeofError("can't calculate size") + """ + Override me in your subclass. + """ + + raise SizeofError("Raw Constructs have no size!") class Subconstruct(Construct): """ - Abstract subconstruct (wraps an inner construct, inheriting it's - name and flags). - + Abstract subconstruct (wraps an inner construct, inheriting its + name and flags). + Parameters: * subcon - the construct to wrap """ @@ -172,7 +267,7 @@ class Subconstruct(Construct): class Adapter(Subconstruct): """ Abstract adapter: calls _decode for parsing and _encode for building. - + Parameters: * subcon - the construct to wrap """ @@ -188,7 +283,7 @@ class Adapter(Subconstruct): #=============================================================================== -# primitives +# Fields #=============================================================================== def _read_stream(stream, length): if length < 0: @@ -207,15 +302,12 @@ def _write_stream(stream, length, data): class StaticField(Construct): """ - A field of a fixed size - - Parameters: - * name - the name of the field - * length - the length (an integer) - - Example: - StaticField("foo", 5) + A fixed-size byte field. + + :param str name: field name + :param int length: number of bytes in the field """ + __slots__ = ["length"] def __init__(self, name, length): Construct.__init__(self, name) @@ -229,23 +321,19 @@ class StaticField(Construct): class FormatField(StaticField): """ - A field that uses python's built-in struct module to pack/unpack data - according to a format string. - Note: this field has been originally implemented as an Adapter, but it - was made a construct for performance reasons. - - Parameters: - * name - the name - * endianity - "<" for little endian, ">" for big endian, or "=" for native - * format - a single format character - - Example: - FormatField("foo", ">", "L") + A field that uses ``struct`` to pack and unpack data. + + See ``struct`` documentation for instructions on crafting format strings. + + :param str name: name of the field + :param str endianness: format endianness string; one of "<", ">", or "=" + :param str format: a single format character """ + __slots__ = ["packer"] def __init__(self, name, endianity, format): if endianity not in (">", "<", "="): - raise ValueError("endianity must be be '=', '<', or '>'", + raise ValueError("endianity must be be '=', '<', or '>'", endianity) if len(format) != 1: raise ValueError("must specify one and only one format char") @@ -261,27 +349,33 @@ class FormatField(StaticField): def _parse(self, stream, context): try: return self.packer.unpack(_read_stream(stream, self.length))[0] - except Exception, ex: + except Exception as ex: raise FieldError(ex) def _build(self, obj, stream, context): try: _write_stream(stream, self.length, self.packer.pack(obj)) - except Exception, ex: + except Exception as ex: raise FieldError(ex) class MetaField(Construct): """ - A field of a meta-length. The length is computed at runtime based on - the context. - - Parameters: - * name - the name of the field - * lengthfunc - a function that takes the context as a parameter and return - the length of the field - - Example: - MetaField("foo", lambda ctx: 5) + A variable-length field. The length is obtained at runtime from a + function. + + :param str name: name of the field + :param callable lengthfunc: callable that takes a context and returns + length as an int + + >>> foo = Struct("foo", + ... Byte("length"), + ... MetaField("data", lambda ctx: ctx["length"]) + ... ) + >>> foo.parse("\\x03ABC") + Container(data = 'ABC', length = 3) + >>> foo.parse("\\x04ABCD") + Container(data = 'ABCD', length = 4) """ + __slots__ = ["lengthfunc"] def __init__(self, name, lengthfunc): Construct.__init__(self, name) @@ -300,15 +394,15 @@ class MetaField(Construct): #=============================================================================== class MetaArray(Subconstruct): """ - An array (repeater) of a meta-count. The array will iterate exactly + An array (repeater) of a meta-count. The array will iterate exactly `countfunc()` times. Will raise ArrayError if less elements are found. See also Array, Range and RepeatUntil. - + Parameters: * countfunc - a function that takes the context as a parameter and returns the number of elements of the array (count) * subcon - the subcon to repeat `countfunc()` times - + Example: MetaArray(lambda ctx: 5, UBInt8("foo")) """ @@ -331,7 +425,7 @@ class MetaArray(Subconstruct): while c < count: obj.append(self.subcon._parse(stream, context)) c += 1 - except ConstructError, ex: + except ConstructError as ex: raise ArrayError("expected %d, found %d" % (count, c), ex) return obj def _build(self, obj, stream, context): @@ -352,18 +446,44 @@ class Range(Subconstruct): A range-array. The subcon will iterate between `mincount` to `maxcount` times. If less than `mincount` elements are found, raises RangeError. See also GreedyRange and OptionalGreedyRange. - - Notes: - * requires a seekable stream. - - Parameters: - * mincount - the minimal count (an integer) - * maxcount - the maximal count (an integer) - * subcon - the subcon to repeat - - Example: - Range(5, 8, UBInt8("foo")) + + The general-case repeater. Repeats the given unit for at least mincount + times, and up to maxcount times. If an exception occurs (EOF, validation + error), the repeater exits. If less than mincount units have been + successfully parsed, a RangeError is raised. + + .. note:: + This object requires a seekable stream for parsing. + + :param int mincount: the minimal count + :param int maxcount: the maximal count + :param Construct subcon: the subcon to repeat + + >>> c = Range(3, 7, UBInt8("foo")) + >>> c.parse("\\x01\\x02") + Traceback (most recent call last): + ... + construct.core.RangeError: expected 3..7, found 2 + >>> c.parse("\\x01\\x02\\x03") + [1, 2, 3] + >>> c.parse("\\x01\\x02\\x03\\x04\\x05\\x06") + [1, 2, 3, 4, 5, 6] + >>> c.parse("\\x01\\x02\\x03\\x04\\x05\\x06\\x07") + [1, 2, 3, 4, 5, 6, 7] + >>> c.parse("\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\x09") + [1, 2, 3, 4, 5, 6, 7] + >>> c.build([1,2]) + Traceback (most recent call last): + ... + construct.core.RangeError: expected 3..7, found 2 + >>> c.build([1,2,3,4]) + '\\x01\\x02\\x03\\x04' + >>> c.build([1,2,3,4,5,6,7,8]) + Traceback (most recent call last): + ... + construct.core.RangeError: expected 3..7, found 8 """ + __slots__ = ["mincount", "maxcout"] def __init__(self, mincount, maxcout, subcon): Subconstruct.__init__(self, subcon) @@ -385,47 +505,51 @@ class Range(Subconstruct): pos = stream.tell() obj.append(self.subcon._parse(stream, context)) c += 1 - except ConstructError: + except ConstructError as ex: if c < self.mincount: - raise RangeError("expected %d to %d, found %d" % - (self.mincount, self.maxcout, c)) + raise RangeError("expected %d to %d, found %d" % + (self.mincount, self.maxcout, c), ex) stream.seek(pos) return obj def _build(self, obj, stream, context): if len(obj) < self.mincount or len(obj) > self.maxcout: - raise RangeError("expected %d to %d, found %d" % + raise RangeError("expected %d to %d, found %d" % (self.mincount, self.maxcout, len(obj))) cnt = 0 try: if self.subcon.conflags & self.FLAG_COPY_CONTEXT: for subobj in obj: + if isinstance(obj, bytes): + subobj = bchr(subobj) self.subcon._build(subobj, stream, context.__copy__()) cnt += 1 else: for subobj in obj: + if isinstance(obj, bytes): + subobj = bchr(subobj) self.subcon._build(subobj, stream, context) cnt += 1 - except ConstructError: + except ConstructError as ex: if cnt < self.mincount: - raise RangeError("expected %d to %d, found %d" % - (self.mincount, self.maxcout, len(obj))) + raise RangeError("expected %d to %d, found %d" % + (self.mincount, self.maxcout, len(obj)), ex) def _sizeof(self, context): raise SizeofError("can't calculate size") class RepeatUntil(Subconstruct): """ - An array that repeat until the predicate indicates it to stop. Note that - the last element (which caused the repeat to exit) is included in the + An array that repeats until the predicate indicates it to stop. Note that + the last element (which caused the repeat to exit) is included in the return value. - + Parameters: * predicate - a predicate function that takes (obj, context) and returns True if the stop-condition is met, or False to continue. * subcon - the subcon to repeat. - + Example: - # will read chars until \x00 (inclusive) - RepeatUntil(lambda obj, ctx: obj == "\x00", + # will read chars until b\x00 (inclusive) + RepeatUntil(lambda obj, ctx: obj == b"\x00", Field("chars", 1) ) """ @@ -450,7 +574,7 @@ class RepeatUntil(Subconstruct): obj.append(subobj) if self.predicate(subobj, context): break - except ConstructError, ex: + except ConstructError as ex: raise ArrayError("missing terminator", ex) return obj def _build(self, obj, stream, context): @@ -463,6 +587,7 @@ class RepeatUntil(Subconstruct): break else: for subobj in obj: + subobj = bchr(subobj) self.subcon._build(subobj, stream, context.__copy__()) if self.predicate(subobj, context): terminated = True @@ -481,14 +606,14 @@ class Struct(Construct): A sequence of named constructs, similar to structs in C. The elements are parsed and built in the order they are defined. See also Embedded. - + Parameters: * name - the name of the structure * subcons - a sequence of subconstructs that make up this structure. - * nested - a keyword-only argument that indicates whether this struct - creates a nested context. The default is True. This parameter is + * nested - a keyword-only argument that indicates whether this struct + creates a nested context. The default is True. This parameter is considered "advanced usage", and may be removed in the future. - + Example: Struct("foo", UBInt8("first_element"), @@ -513,7 +638,7 @@ class Struct(Construct): else: obj = Container() if self.nested: - context = AttrDict(_ = context) + context = Container(_ = context) for sc in self.subcons: if sc.conflags & self.FLAG_EMBED: context[""] = obj @@ -528,7 +653,7 @@ class Struct(Construct): if "" in context: del context[""] elif self.nested: - context = AttrDict(_ = context) + context = Container(_ = context) for sc in self.subcons: if sc.conflags & self.FLAG_EMBED: context[""] = True @@ -541,7 +666,7 @@ class Struct(Construct): sc._build(subobj, stream, context) def _sizeof(self, context): if self.nested: - context = AttrDict(_ = context) + context = Container(_ = context) return sum(sc._sizeof(context) for sc in self.subcons) class Sequence(Struct): @@ -549,14 +674,14 @@ class Sequence(Struct): A sequence of unnamed constructs. The elements are parsed and built in the order they are defined. See also Embedded. - + Parameters: * name - the name of the structure * subcons - a sequence of subconstructs that make up this structure. - * nested - a keyword-only argument that indicates whether this struct - creates a nested context. The default is True. This parameter is + * nested - a keyword-only argument that indicates whether this struct + creates a nested context. The default is True. This parameter is considered "advanced usage", and may be removed in the future. - + Example: Sequence("foo", UBInt8("first_element"), @@ -573,7 +698,7 @@ class Sequence(Struct): else: obj = ListContainer() if self.nested: - context = AttrDict(_ = context) + context = Container(_ = context) for sc in self.subcons: if sc.conflags & self.FLAG_EMBED: context[""] = obj @@ -588,7 +713,7 @@ class Sequence(Struct): if "" in context: del context[""] elif self.nested: - context = AttrDict(_ = context) + context = Container(_ = context) objiter = iter(obj) for sc in self.subcons: if sc.conflags & self.FLAG_EMBED: @@ -597,30 +722,30 @@ class Sequence(Struct): elif sc.name is None: subobj = None else: - subobj = objiter.next() + subobj = advance_iterator(objiter) context[sc.name] = subobj sc._build(subobj, stream, context) class Union(Construct): """ - a set of overlapping fields (like unions in C). when parsing, + a set of overlapping fields (like unions in C). when parsing, all fields read the same data; when building, only the first subcon - (called "master") is used. - + (called "master") is used. + Parameters: * name - the name of the union - * master - the master subcon, i.e., the subcon used for building and + * master - the master subcon, i.e., the subcon used for building and calculating the total size * subcons - additional subcons - + Example: Union("what_are_four_bytes", UBInt32("one_dword"), Struct("two_words", UBInt16("first"), UBInt16("second")), - Struct("four_bytes", - UBInt8("a"), - UBInt8("b"), - UBInt8("c"), + Struct("four_bytes", + UBInt8("a"), + UBInt8("b"), + UBInt8("c"), UBInt8("d") ), ) @@ -645,22 +770,22 @@ class Union(Construct): class Switch(Construct): """ A conditional branch. Switch will choose the case to follow based on - the return value of keyfunc. If no case is matched, and no default value + the return value of keyfunc. If no case is matched, and no default value is given, SwitchError will be raised. See also Pass. - + Parameters: * name - the name of the construct - * keyfunc - a function that takes the context and returns a key, which + * keyfunc - a function that takes the context and returns a key, which will ne used to choose the relevant case. - * cases - a dictionary mapping keys to constructs. the keys can be any + * cases - a dictionary mapping keys to constructs. the keys can be any values that may be returned by keyfunc. * default - a default value to use when the key is not found in the cases. if not supplied, an exception will be raised when the key is not found. You can use the builtin construct Pass for 'do-nothing'. * include_key - whether or not to include the key in the return value of parsing. defualt is False. - + Example: Struct("foo", UBInt8("type"), @@ -673,7 +798,7 @@ class Switch(Construct): ), ) """ - + class NoDefault(Construct): def _parse(self, stream, context): raise SwitchError("no default case defined") @@ -681,11 +806,11 @@ class Switch(Construct): raise SwitchError("no default case defined") def _sizeof(self, context): raise SwitchError("no default case defined") - NoDefault = NoDefault("NoDefault") - + NoDefault = NoDefault("No default value specified") + __slots__ = ["subcons", "keyfunc", "cases", "default", "include_key"] - - def __init__(self, name, keyfunc, cases, default = NoDefault, + + def __init__(self, name, keyfunc, cases, default = NoDefault, include_key = False): Construct.__init__(self, name) self._inherit_flags(*cases.values()) @@ -717,17 +842,17 @@ class Select(Construct): """ Selects the first matching subconstruct. It will literally try each of the subconstructs, until one matches. - + Notes: * requires a seekable stream. - + Parameters: * name - the name of the construct * subcons - the subcons to try (order-sensitive) - * include_name - a keyword only argument, indicating whether to include + * include_name - a keyword only argument, indicating whether to include the name of the selected subcon in the return value of parsing. default is false. - + Example: Select("foo", UBInt64("large"), @@ -769,9 +894,9 @@ class Select(Construct): if sc.name == name: sc._build(obj, stream, context) return - else: + else: for sc in self.subcons: - stream2 = StringIO() + stream2 = BytesIO() context2 = context.__copy__() try: sc._build(obj, stream2, context2) @@ -794,15 +919,15 @@ class Pointer(Subconstruct): Changes the stream position to a given offset, where the construction should take place, and restores the stream position when finished. See also Anchor, OnDemand and OnDemandPointer. - + Notes: * requires a seekable stream. - + Parameters: - * offsetfunc: a function that takes the context and returns an absolute + * offsetfunc: a function that takes the context and returns an absolute stream position, where the construction would take place * subcon - the subcon to use at `offsetfunc()` - + Example: Struct("foo", UBInt32("spam_pointer"), @@ -836,15 +961,15 @@ class Peek(Subconstruct): Peeks at the stream: parses without changing the stream position. See also Union. If the end of the stream is reached when peeking, returns None. - + Notes: * requires a seekable stream. - + Parameters: * subcon - the subcon to peek at - * perform_build - whether or not to perform building. by default this + * perform_build - whether or not to perform building. by default this parameter is set to False, meaning building is a no-op. - + Example: Peek(UBInt8("foo")) """ @@ -855,10 +980,9 @@ class Peek(Subconstruct): def _parse(self, stream, context): pos = stream.tell() try: - try: - return self.subcon._parse(stream, context) - except FieldError: - pass + return self.subcon._parse(stream, context) + except FieldError: + pass finally: stream.seek(pos) def _build(self, obj, stream, context): @@ -869,25 +993,25 @@ class Peek(Subconstruct): class OnDemand(Subconstruct): """ - Allows for on-demand (lazy) parsing. When parsing, it will return a + Allows for on-demand (lazy) parsing. When parsing, it will return a LazyContainer that represents a pointer to the data, but does not actually parses it from stream until it's "demanded". - By accessing the 'value' property of LazyContainers, you will demand the + By accessing the 'value' property of LazyContainers, you will demand the data from the stream. The data will be parsed and cached for later use. - You can use the 'has_value' property to know whether the data has already + You can use the 'has_value' property to know whether the data has already been demanded. See also OnDemandPointer. - + Notes: * requires a seekable stream. - + Parameters: - * subcon - - * advance_stream - whether or not to advance the stream position. by + * subcon - + * advance_stream - whether or not to advance the stream position. by default this is True, but if subcon is a pointer, this should be False. * force_build - whether or not to force build. If set to False, and the LazyContainer has not been demaned, building is a no-op. - + Example: OnDemand(Array(10000, UBInt8("foo")) """ @@ -911,13 +1035,13 @@ class OnDemand(Subconstruct): class Buffered(Subconstruct): """ - Creates an in-memory buffered stream, which can undergo encoding and + Creates an in-memory buffered stream, which can undergo encoding and decoding prior to being passed on to the subconstruct. See also Bitwise. - + Note: * Do not use pointers inside Buffered - + Parameters: * subcon - the subcon which will operate on the buffer * encoder - a function that takes a string and returns an encoded @@ -926,7 +1050,7 @@ class Buffered(Subconstruct): string (used before parsing) * resizer - a function that takes the size of the subcon and "adjusts" or "resizes" it according to the encoding/decoding process. - + Example: Buffered(BitField("foo", 16), encoder = decode_bin, @@ -942,11 +1066,11 @@ class Buffered(Subconstruct): self.resizer = resizer def _parse(self, stream, context): data = _read_stream(stream, self._sizeof(context)) - stream2 = StringIO(self.decoder(data)) + stream2 = BytesIO(self.decoder(data)) return self.subcon._parse(stream2, context) def _build(self, obj, stream, context): size = self._sizeof(context) - stream2 = StringIO() + stream2 = BytesIO() self.subcon._build(obj, stream2, context) data = self.encoder(stream2.getvalue()) assert len(data) == size @@ -956,27 +1080,27 @@ class Buffered(Subconstruct): class Restream(Subconstruct): """ - Wraps the stream with a read-wrapper (for parsing) or a + Wraps the stream with a read-wrapper (for parsing) or a write-wrapper (for building). The stream wrapper can buffer the data - internally, reading it from- or writing it to the underlying stream - as needed. For example, BitStreamReader reads whole bytes from the - underlying stream, but returns them as individual bits. + internally, reading it from- or writing it to the underlying stream + as needed. For example, BitStreamReader reads whole bytes from the + underlying stream, but returns them as individual bits. See also Bitwise. - - When the parsing or building is done, the stream's close method + + When the parsing or building is done, the stream's close method will be invoked. It can perform any finalization needed for the stream wrapper, but it must not close the underlying stream. - + Note: * Do not use pointers inside Restream - + Parameters: * subcon - the subcon * stream_reader - the read-wrapper * stream_writer - the write wrapper * resizer - a function that takes the size of the subcon and "adjusts" or "resizes" it according to the encoding/decoding process. - + Example: Restream(BitField("foo", 16), stream_reader = BitStreamReader, @@ -1010,13 +1134,13 @@ class Reconfig(Subconstruct): """ Reconfigures a subconstruct. Reconfig can be used to change the name and set and clear flags of the inner subcon. - + Parameters: * name - the new name * subcon - the subcon to reconfigure * setflags - the flags to set (default is 0) * clearflags - the flags to clear (default is 0) - + Example: Reconfig("foo", UBInt8("bar")) """ @@ -1035,13 +1159,13 @@ class Anchor(Construct): absolute pointer = anchor + relative offset size = anchor_after - anchor_before See also Pointer. - + Notes: * requires a seekable stream. - + Parameters: * name - the name of the anchor - + Example: Struct("foo", Anchor("base"), @@ -1062,11 +1186,11 @@ class Anchor(Construct): class Value(Construct): """ A computed value. - + Parameters: * name - the name of the value * func - a function that takes the context and return the computed value - + Example: Struct("foo", UBInt8("width"), @@ -1091,19 +1215,19 @@ class Value(Construct): # Dynamically creates a construct and uses it for parsing and building. # This allows you to create change the construction tree on the fly. # Deprecated. -# +# # Parameters: # * name - the name of the construct -# * factoryfunc - a function that takes the context and returns a new +# * factoryfunc - a function that takes the context and returns a new # construct object which will be used for parsing and building. -# +# # Example: # def factory(ctx): # if ctx.bar == 8: # return UBInt8("spam") # if ctx.bar == 9: # return String("spam", 9) -# +# # Struct("foo", # UBInt8("bar"), # Dynamic("spam", factory), @@ -1123,12 +1247,12 @@ class Value(Construct): class LazyBound(Construct): """ - Lazily bound construct, useful for constructs that need to make cyclic + Lazily bound construct, useful for constructs that need to make cyclic references (linked-lists, expression trees, etc.). - + Parameters: - - + + Example: foo = Struct("foo", UBInt8("bar"), @@ -1158,11 +1282,11 @@ class Pass(Construct): A do-nothing construct, useful as the default case for Switch, or to indicate Enums. See also Switch and Enum. - + Notes: - * this construct is a singleton. do not try to instatiate it, as it - will not work :) - + * this construct is a singleton. do not try to instatiate it, as it + will not work... + Example: Pass """ @@ -1179,11 +1303,13 @@ class Terminator(Construct): """ Asserts the end of the stream has been reached at the point it's placed. You can use this to ensure no more unparsed data follows. - + Notes: - * this construct is a singleton. do not try to instatiate it, as it - will not work :) - + * this construct is only meaningful for parsing. for building, it's + a no-op. + * this construct is a singleton. do not try to instatiate it, as it + will not work... + Example: Terminator """ @@ -1196,22 +1322,3 @@ class Terminator(Construct): def _sizeof(self, context): return 0 Terminator = Terminator(None) - - - - - - - - - - - - - - - - - - - diff --git a/elftools/construct/debug.py b/elftools/construct/debug.py index b2df465..6023df9 100644 --- a/elftools/construct/debug.py +++ b/elftools/construct/debug.py @@ -1,12 +1,13 @@ """ Debugging utilities for constructs """ +from __future__ import print_function import sys import traceback import pdb import inspect -from core import Construct, Subconstruct -from lib import HexString, Container, ListContainer, AttrDict +from .core import Construct, Subconstruct +from .lib import HexString, Container, ListContainer class Probe(Construct): @@ -79,14 +80,14 @@ class Probe(Construct): frames = [s[0] for s in inspect.stack()][1:-1] frames.reverse() for f in frames: - a = AttrDict() + a = Container() a.__update__(f.f_locals) obj.stack.append(a) - print "=" * 80 - print "Probe", self.printname - print obj - print "=" * 80 + print("=" * 80) + print("Probe", self.printname) + print(obj) + print("=" * 80) class Debugger(Subconstruct): """ @@ -123,38 +124,11 @@ class Debugger(Subconstruct): except Exception: self.handle_exc() def handle_exc(self, msg = None): - print "=" * 80 - print "Debugging exception of %s:" % (self.subcon,) - print "".join(traceback.format_exception(*sys.exc_info())[1:]) + print("=" * 80) + print("Debugging exception of %s:" % (self.subcon,)) + print("".join(traceback.format_exception(*sys.exc_info())[1:])) if msg: - print msg + print(msg) pdb.post_mortem(sys.exc_info()[2]) - print "=" * 80 - - - - - - - - - - - - - - - - - - - - - - - - - - - + print("=" * 80) diff --git a/elftools/construct/lib/__init__.py b/elftools/construct/lib/__init__.py index 84d88db..2e09578 100644 --- a/elftools/construct/lib/__init__.py +++ b/elftools/construct/lib/__init__.py @@ -1,10 +1,7 @@ -from binary import int_to_bin, bin_to_int, swap_bytes, encode_bin, decode_bin -from bitstream import BitStreamReader, BitStreamWriter -from container import (Container, AttrDict, FlagsContainer, - ListContainer, LazyContainer) -from hex import HexString, hexdump -from utils import Packer, StringIO - - - +from .binary import ( + int_to_bin, bin_to_int, swap_bytes, encode_bin, decode_bin) +from .bitstream import BitStreamReader, BitStreamWriter +from .container import (Container, FlagsContainer, ListContainer, + LazyContainer) +from .hex import HexString, hexdump diff --git a/elftools/construct/lib/binary.py b/elftools/construct/lib/binary.py index b348da2..c73b887 100644 --- a/elftools/construct/lib/binary.py +++ b/elftools/construct/lib/binary.py @@ -1,18 +1,55 @@ -def int_to_bin(number, width = 32): +from .py3compat import int2byte + + +def int_to_bin(number, width=32): + r""" + Convert an integer into its binary representation in a bytes object. + Width is the amount of bits to generate. If width is larger than the actual + amount of bits required to represent number in binary, sign-extension is + used. If it's smaller, the representation is trimmed to width bits. + Each "bit" is either '\x00' or '\x01'. The MSBit is first. + + Examples: + + >>> int_to_bin(19, 5) + b'\x01\x00\x00\x01\x01' + >>> int_to_bin(19, 8) + b'\x00\x00\x00\x01\x00\x00\x01\x01' + """ if number < 0: number += 1 << width i = width - 1 - bits = ["\x00"] * width + bits = bytearray(width) while number and i >= 0: - bits[i] = "\x00\x01"[number & 1] + bits[i] = number & 1 number >>= 1 i -= 1 - return "".join(bits) + return bytes(bits) + -_bit_values = {"\x00" : 0, "\x01" : 1, "0" : 0, "1" : 1} -def bin_to_int(bits, signed = False): +_bit_values = { + 0: 0, + 1: 1, + 48: 0, # '0' + 49: 1, # '1' + + # The following are for Python 2, in which iteration over a bytes object + # yields single-character bytes and not integers. + '\x00': 0, + '\x01': 1, + '0': 0, + '1': 1, + } + +def bin_to_int(bits, signed=False): + r""" + Logical opposite of int_to_bin. Both '0' and '\x00' are considered zero, + and both '1' and '\x01' are considered one. Set sign to True to interpret + the number as a 2-s complement signed integer. + """ number = 0 bias = 0 + ptr = 0 if signed and _bit_values[bits[0]] == 1: bits = bits[1:] bias = 1 << len(bits) @@ -21,41 +58,61 @@ def bin_to_int(bits, signed = False): number |= _bit_values[b] return number - bias -def swap_bytes(bits, bytesize = 8): + +def swap_bytes(bits, bytesize=8): + r""" + Bits is a b'' object containing a binary representation. Assuming each + bytesize bits constitute a bytes, perform a endianness byte swap. Example: + + >>> swap_bytes(b'00011011', 2) + b'11100100' + """ i = 0 l = len(bits) - output = [""] * ((l // bytesize) + 1) + output = [b""] * ((l // bytesize) + 1) j = len(output) - 1 while i < l: output[j] = bits[i : i + bytesize] i += bytesize j -= 1 - return "".join(output) + return b"".join(output) + _char_to_bin = {} _bin_to_char = {} for i in range(256): - ch = chr(i) + ch = int2byte(i) bin = int_to_bin(i, 8) + # Populate with for both keys i and ch, to support Python 2 & 3 _char_to_bin[ch] = bin - _bin_to_char[bin] = ch + _char_to_bin[i] = bin _bin_to_char[bin] = ch + def encode_bin(data): - return "".join(_char_to_bin[ch] for ch in data) + """ + Create a binary representation of the given b'' object. Assume 8-bit + ASCII. Example: + + >>> encode_bin('ab') + b"\x00\x01\x01\x00\x00\x00\x00\x01\x00\x01\x01\x00\x00\x00\x01\x00" + """ + return b"".join(_char_to_bin[ch] for ch in data) + def decode_bin(data): - assert len(data) & 7 == 0, "data length must be a multiple of 8" + """ + Locical opposite of decode_bin. + """ + if len(data) & 7: + raise ValueError("Data length must be a multiple of 8") i = 0 j = 0 l = len(data) // 8 - chars = [""] * l + chars = [b""] * l while j < l: chars[j] = _bin_to_char[data[i:i+8]] i += 8 j += 1 - return "".join(chars) - - - + return b"".join(chars) diff --git a/elftools/construct/lib/bitstream.py b/elftools/construct/lib/bitstream.py index e473864..0c521a4 100644 --- a/elftools/construct/lib/bitstream.py +++ b/elftools/construct/lib/bitstream.py @@ -1,24 +1,31 @@ -from binary import encode_bin, decode_bin - +from .binary import encode_bin, decode_bin class BitStreamReader(object): + __slots__ = ["substream", "buffer", "total_size"] + def __init__(self, substream): self.substream = substream self.total_size = 0 self.buffer = "" + def close(self): if self.total_size % 8 != 0: raise ValueError("total size of read data must be a multiple of 8", self.total_size) + def tell(self): return self.substream.tell() + def seek(self, pos, whence = 0): self.buffer = "" self.total_size = 0 self.substream.seek(pos, whence) + def read(self, count): - assert count >= 0 + if count < 0: + raise ValueError("count cannot be negative") + l = len(self.buffer) if count == 0: data = "" @@ -29,7 +36,7 @@ class BitStreamReader(object): data = self.buffer count -= l bytes = count // 8 - if count & 7: + if count & 7: bytes += 1 buf = encode_bin(self.substream.read(bytes)) data += buf[:count] @@ -37,44 +44,34 @@ class BitStreamReader(object): self.total_size += len(data) return data - class BitStreamWriter(object): + __slots__ = ["substream", "buffer", "pos"] + def __init__(self, substream): self.substream = substream self.buffer = [] self.pos = 0 + def close(self): self.flush() + def flush(self): bytes = decode_bin("".join(self.buffer)) self.substream.write(bytes) self.buffer = [] self.pos = 0 + def tell(self): return self.substream.tell() + self.pos // 8 + def seek(self, pos, whence = 0): self.flush() self.substream.seek(pos, whence) + def write(self, data): if not data: return if type(data) is not str: raise TypeError("data must be a string, not %r" % (type(data),)) self.buffer.append(data) - - - - - - - - - - - - - - - - diff --git a/elftools/construct/lib/container.py b/elftools/construct/lib/container.py index eb4b738..2f89b2d 100644 --- a/elftools/construct/lib/container.py +++ b/elftools/construct/lib/container.py @@ -1,3 +1,10 @@ +""" +Various containers. +""" + +from collections import MutableMapping +from pprint import pformat + def recursion_lock(retval, lock_name = "__recursion_lock__"): def decorator(func): def wrapper(self, *args, **kw): @@ -12,249 +19,143 @@ def recursion_lock(retval, lock_name = "__recursion_lock__"): return wrapper return decorator -class Container(object): +class Container(MutableMapping): """ - A generic container of attributes + A generic container of attributes. + + Containers are the common way to express parsed data. """ - __slots__ = ["__dict__", "__attrs__"] + def __init__(self, **kw): - self.__dict__.update(kw) - object.__setattr__(self, "__attrs__", kw.keys()) - + self.__dict__ = kw + + # The core dictionary interface. + + def __getitem__(self, name): + return self.__dict__[name] + + def __delitem__(self, name): + del self.__dict__[name] + + def __setitem__(self, name, value): + self.__dict__[name] = value + + def keys(self): + return self.__dict__.keys() + + def __len__(self): + return len(self.__dict__.keys()) + + # Extended dictionary interface. + + def update(self, other): + self.__dict__.update(other) + + __update__ = update + + def __contains__(self, value): + return value in self.__dict__ + + # Rich comparisons. + def __eq__(self, other): try: return self.__dict__ == other.__dict__ except AttributeError: return False + def __ne__(self, other): - return not (self == other) - - def __delattr__(self, name): - object.__delattr__(self, name) - self.__attrs__.remove(name) - def __setattr__(self, name, value): - d = self.__dict__ - if name not in d: - self.__attrs__.append(name) - d[name] = value - def __contains__(self, name): - return name in self.__dict__ - def __getitem__(self, name): - return self.__dict__[name] - def __delitem__(self, name): - self.__delattr__(name) - def __setitem__(self, name, value): - self.__setattr__(name, value) - def __update__(self, obj): - for name in obj.__attrs__: - self[name] = obj[name] - def __copy__(self): - new = self.__class__() - new.__attrs__ = self.__attrs__[:] - new.__dict__ = self.__dict__.copy() - return new - - @recursion_lock("<...>") + return not self == other + + # Copy interface. + + def copy(self): + return self.__class__(**self.__dict__) + + __copy__ = copy + + # Iterator interface. + + def __iter__(self): + return iter(self.__dict__) + def __repr__(self): - attrs = sorted("%s = %r" % (k, v) - for k, v in self.__dict__.iteritems() - if not k.startswith("_")) - return "%s(%s)" % (self.__class__.__name__, ", ".join(attrs)) + return "%s(%s)" % (self.__class__.__name__, repr(self.__dict__)) + def __str__(self): - return self.__pretty_str__() - @recursion_lock("<...>") - def __pretty_str__(self, nesting = 1, indentation = " "): - attrs = [] - ind = indentation * nesting - for k in self.__attrs__: - v = self.__dict__[k] - if not k.startswith("_"): - text = [ind, k, " = "] - if hasattr(v, "__pretty_str__"): - text.append(v.__pretty_str__(nesting + 1, indentation)) - else: - text.append(str(v)) - attrs.append("".join(text)) - if not attrs: - return "%s()" % (self.__class__.__name__,) - attrs.insert(0, self.__class__.__name__ + ":") - return "\n".join(attrs) + return "%s(%s)" % (self.__class__.__name__, str(self.__dict__)) class FlagsContainer(Container): """ - A container providing pretty-printing for flags. Only set flags are - displayed. + A container providing pretty-printing for flags. + + Only set flags are displayed. """ - def __pretty_str__(self, nesting = 1, indentation = " "): - attrs = [] - ind = indentation * nesting - for k in self.__attrs__: - v = self.__dict__[k] - if not k.startswith("_") and v: - attrs.append(ind + k) - if not attrs: - return "%s()" % (self.__class__.__name__,) - attrs.insert(0, self.__class__.__name__+ ":") - return "\n".join(attrs) + + @recursion_lock("<...>") + def __str__(self): + d = dict((k, self[k]) for k in self + if self[k] and not k.startswith("_")) + return "%s(%s)" % (self.__class__.__name__, pformat(d)) class ListContainer(list): """ - A container for lists + A container for lists. """ + __slots__ = ["__recursion_lock__"] - def __str__(self): - return self.__pretty_str__() + @recursion_lock("[...]") - def __pretty_str__(self, nesting = 1, indentation = " "): - if not self: - return "[]" - ind = indentation * nesting - lines = ["["] - for elem in self: - lines.append("\n") - lines.append(ind) - if hasattr(elem, "__pretty_str__"): - lines.append(elem.__pretty_str__(nesting + 1, indentation)) - else: - lines.append(repr(elem)) - lines.append("\n") - lines.append(indentation * (nesting - 1)) - lines.append("]") - return "".join(lines) - -class AttrDict(object): - """ - A dictionary that can be accessed both using indexing and attributes, - i.e., - x = AttrDict() - x.foo = 5 - print x["foo"] - """ - __slots__ = ["__dict__"] - def __init__(self, **kw): - self.__dict__ = kw - def __contains__(self, key): - return key in self.__dict__ - def __nonzero__(self): - return bool(self.__dict__) - def __repr__(self): - return repr(self.__dict__) def __str__(self): - return self.__pretty_str__() - def __pretty_str__(self, nesting = 1, indentation = " "): - if not self: - return "{}" - text = ["{\n"] - ind = nesting * indentation - for k in sorted(self.__dict__.keys()): - v = self.__dict__[k] - text.append(ind) - text.append(repr(k)) - text.append(" : ") - if hasattr(v, "__pretty_str__"): - try: - text.append(v.__pretty_str__(nesting+1, indentation)) - except Exception: - text.append(repr(v)) - else: - text.append(repr(v)) - text.append("\n") - text.append((nesting-1) * indentation) - text.append("}") - return "".join(text) - def __delitem__(self, key): - del self.__dict__[key] - def __getitem__(self, key): - return self.__dict__[key] - def __setitem__(self, key, value): - self.__dict__[key] = value - def __copy__(self): - new = self.__class__() - new.__dict__ = self.__dict__.copy() - return new - def __update__(self, other): - if isinstance(other, dict): - self.__dict__.update(other) - else: - self.__dict__.update(other.__dict__) + return pformat(self) class LazyContainer(object): + __slots__ = ["subcon", "stream", "pos", "context", "_value"] + def __init__(self, subcon, stream, pos, context): self.subcon = subcon self.stream = stream self.pos = pos self.context = context self._value = NotImplemented + def __eq__(self, other): try: return self._value == other._value except AttributeError: return False + def __ne__(self, other): return not (self == other) + def __str__(self): return self.__pretty_str__() + def __pretty_str__(self, nesting = 1, indentation = " "): if self._value is NotImplemented: text = "" elif hasattr(self._value, "__pretty_str__"): text = self._value.__pretty_str__(nesting, indentation) else: - text = repr(self._value) + text = str(self._value) return "%s: %s" % (self.__class__.__name__, text) + def read(self): self.stream.seek(self.pos) return self.subcon._parse(self.stream, self.context) + def dispose(self): self.subcon = None self.stream = None self.context = None self.pos = None + def _get_value(self): if self._value is NotImplemented: self._value = self.read() return self._value - value = property(_get_value) - has_value = property(lambda self: self._value is not NotImplemented) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + value = property(_get_value) + has_value = property(lambda self: self._value is not NotImplemented) diff --git a/elftools/construct/lib/hex.py b/elftools/construct/lib/hex.py index 0bb2f02..e378e22 100644 --- a/elftools/construct/lib/hex.py +++ b/elftools/construct/lib/hex.py @@ -1,34 +1,44 @@ -_printable = dict((chr(i), ".") for i in range(256)) -_printable.update((chr(i), chr(i)) for i in range(32, 128)) +from .py3compat import byte2int, int2byte, bytes2str -def hexdump(data, linesize = 16): + +# Map an integer in the inclusive range 0-255 to its string byte representation +_printable = dict((i, ".") for i in range(256)) +_printable.update((i, bytes2str(int2byte(i))) for i in range(32, 128)) + + +def hexdump(data, linesize): + """ + data is a bytes object. The returned result is a string. + """ prettylines = [] if len(data) < 65536: fmt = "%%04X %%-%ds %%s" else: fmt = "%%08X %%-%ds %%s" fmt = fmt % (3 * linesize - 1,) - for i in xrange(0, len(data), linesize): + for i in range(0, len(data), linesize): line = data[i : i + linesize] - hextext = " ".join(b.encode("hex") for b in line) - rawtext = "".join(_printable[b] for b in line) - prettylines.append(fmt % (i, hextext, rawtext)) + hextext = " ".join('%02x' % byte2int(b) for b in line) + rawtext = "".join(_printable[byte2int(b)] for b in line) + prettylines.append(fmt % (i, str(hextext), str(rawtext))) return prettylines -class HexString(str): + +class HexString(bytes): """ - represents a string that will be hex-dumped (only via __pretty_str__). - this class derives of str, and behaves just like a normal string in all - other contexts. + Represents bytes that will be hex-dumped to a string when its string + representation is requested. """ def __init__(self, data, linesize = 16): - str.__init__(self, data) self.linesize = linesize - def __new__(cls, data, *args, **kwargs): - return str.__new__(cls, data) - def __pretty_str__(self, nesting = 1, indentation = " "): - sep = "\n" + indentation * nesting - return sep + sep.join(hexdump(self)) - + def __new__(cls, data, *args, **kwargs): + return bytes.__new__(cls, data) + + def __str__(self): + if not self: + return "''" + sep = "\n" + return sep + sep.join( + hexdump(self, self.linesize)) diff --git a/elftools/construct/macros.py b/elftools/construct/macros.py index dcffb0e..5e893dd 100644 --- a/elftools/construct/macros.py +++ b/elftools/construct/macros.py @@ -1,16 +1,24 @@ -from lib import BitStreamReader, BitStreamWriter, encode_bin, decode_bin -from core import * -from adapters import * +from .lib.py3compat import int2byte +from .lib import (BitStreamReader, BitStreamWriter, encode_bin, + decode_bin) +from .core import (Struct, MetaField, StaticField, FormatField, + OnDemand, Pointer, Switch, Value, RepeatUntil, MetaArray, Sequence, Range, + Select, Pass, SizeofError, Buffered, Restream, Reconfig) +from .adapters import (BitIntegerAdapter, PaddingAdapter, + ConstAdapter, CStringAdapter, LengthValueAdapter, IndexingAdapter, + PaddedStringAdapter, FlagsAdapter, StringAdapter, MappingAdapter) #=============================================================================== # fields #=============================================================================== def Field(name, length): - """a field - * name - the name of the field - * length - the length of the field. the length can be either an integer - (StaticField), or a function that takes the context as an argument and + """ + A field consisting of a specified number of bytes. + + :param str name: the name of the field + :param length: the length of the field. the length can be either an integer + (StaticField), or a function that takes the context as an argument and returns the length (MetaField) """ if callable(length): @@ -19,49 +27,83 @@ def Field(name, length): return StaticField(name, length) def BitField(name, length, swapped = False, signed = False, bytesize = 8): - """a bit field; must be enclosed in a BitStruct - * name - the name of the field - * length - the length of the field in bits. the length can be either - an integer, or a function that takes the context as an argument and - returns the length - * swapped - whether the value is byte-swapped (little endian). the - default is False. - * signed - whether the value of the bitfield is a signed integer. the - default is False. - * bytesize - the number of bits in a byte (used for byte-swapping). the - default is 8. - """ - return BitIntegerAdapter(Field(name, length), + """ + BitFields, as the name suggests, are fields that operate on raw, unaligned + bits, and therefore must be enclosed in a BitStruct. Using them is very + similar to all normal fields: they take a name and a length (in bits). + + :param str name: name of the field + :param int length: number of bits in the field, or a function that takes + the context as its argument and returns the length + :param bool swapped: whether the value is byte-swapped + :param bool signed: whether the value is signed + :param int bytesize: number of bits per byte, for byte-swapping + + >>> foo = BitStruct("foo", + ... BitField("a", 3), + ... Flag("b"), + ... Padding(3), + ... Nibble("c"), + ... BitField("d", 5), + ... ) + >>> foo.parse("\\xe1\\x1f") + Container(a = 7, b = False, c = 8, d = 31) + >>> foo = BitStruct("foo", + ... BitField("a", 3), + ... Flag("b"), + ... Padding(3), + ... Nibble("c"), + ... Struct("bar", + ... Nibble("d"), + ... Bit("e"), + ... ) + ... ) + >>> foo.parse("\\xe1\\x1f") + Container(a = 7, b = False, bar = Container(d = 15, e = 1), c = 8) + """ + + return BitIntegerAdapter(Field(name, length), length, - swapped = swapped, - signed = signed, - bytesize = bytesize + swapped=swapped, + signed=signed, + bytesize=bytesize ) def Padding(length, pattern = "\x00", strict = False): r"""a padding field (value is discarded) * length - the length of the field. the length can be either an integer, - or a function that takes the context as an argument and returns the + or a function that takes the context as an argument and returns the length * pattern - the padding pattern (character) to use. default is "\x00" - * strict - whether or not to raise an exception is the actual padding + * strict - whether or not to raise an exception is the actual padding pattern mismatches the desired pattern. default is False. """ - return PaddingAdapter(Field(None, length), - pattern = pattern, + return PaddingAdapter(Field(None, length), + pattern = pattern, strict = strict, ) def Flag(name, truth = 1, falsehood = 0, default = False): - """a flag field (True or False) - * name - the name of the field - * truth - the numeric value of truth. the default is 1. - * falsehood - the numeric value of falsehood. the default is 0. - * default - the default value to assume, when the value is neither - `truth` nor `falsehood`. the default is False. - """ - return SymmetricMapping(Field(name, 1), - {True : chr(truth), False : chr(falsehood)}, + """ + A flag. + + Flags are usually used to signify a Boolean value, and this construct + maps values onto the ``bool`` type. + + .. note:: This construct works with both bit and byte contexts. + + .. warning:: Flags default to False, not True. This is different from the + C and Python way of thinking about truth, and may be subject to change + in the future. + + :param str name: field name + :param int truth: value of truth (default 1) + :param int falsehood: value of falsehood (default 0) + :param bool default: default value (default False) + """ + + return SymmetricMapping(Field(name, 1), + {True : int2byte(truth), False : int2byte(falsehood)}, default = default, ) @@ -181,11 +223,25 @@ def NFloat64(name): # arrays #=============================================================================== def Array(count, subcon): - """array of subcon repeated count times. - * subcon - the subcon. - * count - an integer, or a function taking the context as an argument, - returning the count """ + Repeats the given unit a fixed number of times. + + :param int count: number of times to repeat + :param ``Construct`` subcon: construct to repeat + + >>> c = Array(4, UBInt8("foo")) + >>> c.parse("\\x01\\x02\\x03\\x04") + [1, 2, 3, 4] + >>> c.parse("\\x01\\x02\\x03\\x04\\x05\\x06") + [1, 2, 3, 4] + >>> c.build([5,6,7,8]) + '\\x05\\x06\\x07\\x08' + >>> c.build([5,6,7,8,9]) + Traceback (most recent call last): + ... + construct.core.RangeError: expected 4..4, found 5 + """ + if callable(count): con = MetaArray(count, subcon) else: @@ -196,28 +252,67 @@ def Array(count, subcon): def PrefixedArray(subcon, length_field = UBInt8("length")): """an array prefixed by a length field. * subcon - the subcon to be repeated - * length_field - an integer construct + * length_field - a construct returning an integer """ return LengthValueAdapter( - Sequence(subcon.name, - length_field, + Sequence(subcon.name, + length_field, Array(lambda ctx: ctx[length_field.name], subcon), nested = False ) ) def OpenRange(mincount, subcon): - from sys import maxint - return Range(mincount, maxint, subcon) + from sys import maxsize + return Range(mincount, maxsize, subcon) def GreedyRange(subcon): - """an open range (1 or more times) of repeated subcon. - * subcon - the subcon to repeat""" + """ + Repeats the given unit one or more times. + + :param ``Construct`` subcon: construct to repeat + + >>> from construct import GreedyRange, UBInt8 + >>> c = GreedyRange(UBInt8("foo")) + >>> c.parse("\\x01") + [1] + >>> c.parse("\\x01\\x02\\x03") + [1, 2, 3] + >>> c.parse("\\x01\\x02\\x03\\x04\\x05\\x06") + [1, 2, 3, 4, 5, 6] + >>> c.parse("") + Traceback (most recent call last): + ... + construct.core.RangeError: expected 1..2147483647, found 0 + >>> c.build([1,2]) + '\\x01\\x02' + >>> c.build([]) + Traceback (most recent call last): + ... + construct.core.RangeError: expected 1..2147483647, found 0 + """ + return OpenRange(1, subcon) def OptionalGreedyRange(subcon): - """an open range (0 or more times) of repeated subcon. - * subcon - the subcon to repeat""" + """ + Repeats the given unit zero or more times. This repeater can't + fail, as it accepts lists of any length. + + :param ``Construct`` subcon: construct to repeat + + >>> from construct import OptionalGreedyRange, UBInt8 + >>> c = OptionalGreedyRange(UBInt8("foo")) + >>> c.parse("") + [] + >>> c.parse("\\x01\\x02") + [1, 2] + >>> c.build([]) + '' + >>> c.build([1,2]) + '\\x01\\x02' + """ + return OpenRange(0, subcon) @@ -234,23 +329,23 @@ def Bitwise(subcon): """converts the stream to bits, and passes the bitstream to subcon * subcon - a bitwise construct (usually BitField) """ - # subcons larger than MAX_BUFFER will be wrapped by Restream instead - # of Buffered. implementation details, don't stick your nose :) + # subcons larger than MAX_BUFFER will be wrapped by Restream instead + # of Buffered. implementation details, don't stick your nose in :) MAX_BUFFER = 1024 * 8 def resizer(length): if length & 7: raise SizeofError("size must be a multiple of 8", length) return length >> 3 if not subcon._is_flag(subcon.FLAG_DYNAMIC) and subcon.sizeof() < MAX_BUFFER: - con = Buffered(subcon, - encoder = decode_bin, - decoder = encode_bin, + con = Buffered(subcon, + encoder = decode_bin, + decoder = encode_bin, resizer = resizer ) else: - con = Restream(subcon, - stream_reader = BitStreamReader, - stream_writer = BitStreamWriter, + con = Restream(subcon, + stream_reader = BitStreamReader, + stream_writer = BitStreamWriter, resizer = resizer) return con @@ -262,22 +357,27 @@ def Aligned(subcon, modulus = 4, pattern = "\x00"): """ if modulus < 2: raise ValueError("modulus must be >= 2", modulus) - if modulus in (2, 4, 8, 16, 32, 64, 128, 256, 512, 1024): - def padlength(ctx): - m1 = modulus - 1 - return (modulus - (subcon._sizeof(ctx) & m1)) & m1 - else: - def padlength(ctx): - return (modulus - (subcon._sizeof(ctx) % modulus)) % modulus - return IndexingAdapter( - Sequence(subcon.name, - subcon, - Padding(padlength, pattern = pattern), - nested = False, - ), - 0 + def padlength(ctx): + return (modulus - (subcon._sizeof(ctx) % modulus)) % modulus + return SeqOfOne(subcon.name, + subcon, + # ?????? + # ?????? + # ?????? + # ?????? + Padding(padlength, pattern = pattern), + nested = False, ) +def SeqOfOne(name, *args, **kw): + """a sequence of one element. only the first element is meaningful, the + rest are discarded + * name - the name of the sequence + * args - subconstructs + * kw - any keyword arguments to Sequence + """ + return IndexingAdapter(Sequence(name, *args, **kw), index = 0) + def Embedded(subcon): """embeds a struct into the enclosing struct. * subcon - the struct to embed @@ -305,25 +405,25 @@ def Alias(newname, oldname): def SymmetricMapping(subcon, mapping, default = NotImplemented): """defines a symmetrical mapping: a->b, b->a. * subcon - the subcon to map - * mapping - the encoding mapping (a dict); the decoding mapping is + * mapping - the encoding mapping (a dict); the decoding mapping is achieved by reversing this mapping - * default - the default value to use when no mapping is found. if no + * default - the default value to use when no mapping is found. if no default value is given, and exception is raised. setting to Pass would return the value "as is" (unmapped) """ - reversed_mapping = dict((v, k) for k, v in mapping.iteritems()) - return MappingAdapter(subcon, - encoding = mapping, - decoding = reversed_mapping, + reversed_mapping = dict((v, k) for k, v in mapping.items()) + return MappingAdapter(subcon, + encoding = mapping, + decoding = reversed_mapping, encdefault = default, - decdefault = default, + decdefault = default, ) def Enum(subcon, **kw): - """a set of named values mapping. + """a set of named values mapping. * subcon - the subcon to map * kw - keyword arguments which serve as the encoding mapping - * _default_ - an optional, keyword-only argument that specifies the + * _default_ - an optional, keyword-only argument that specifies the default value to use when the mapping is undefined. if not given, and exception is raised when the mapping is undefined. use `Pass` to pass the unmapped value as-is @@ -365,43 +465,69 @@ def EmbeddedBitStruct(*subcons): #=============================================================================== # strings #=============================================================================== -def String(name, length, encoding = None, padchar = None, - paddir = "right", trimdir = "right"): - """a fixed-length, optionally padded string of characters - * name - the name of the field - * length - the length (integer) - * encoding - the encoding to use (e.g., "utf8"), or None, for raw bytes. - default is None - * padchar - the padding character (commonly "\x00"), or None to - disable padding. default is None - * paddir - the direction where padding is placed ("right", "left", or - "center"). the default is "right". this argument is meaningless if - padchar is None. - * trimdir - the direction where trimming will take place ("right" or - "left"). the default is "right". trimming is only meaningful for - building, when the given string is too long. this argument is - meaningless if padchar is None. - """ - con = StringAdapter(Field(name, length), encoding = encoding) +def String(name, length, encoding=None, padchar=None, paddir="right", + trimdir="right"): + """ + A configurable, fixed-length string field. + + The padding character must be specified for padding and trimming to work. + + :param str name: name + :param int length: length, in bytes + :param str encoding: encoding (e.g. "utf8") or None for no encoding + :param str padchar: optional character to pad out strings + :param str paddir: direction to pad out strings; one of "right", "left", + or "both" + :param str trim: direction to trim strings; one of "right", "left" + + >>> from construct import String + >>> String("foo", 5).parse("hello") + 'hello' + >>> + >>> String("foo", 12, encoding = "utf8").parse("hello joh\\xd4\\x83n") + u'hello joh\\u0503n' + >>> + >>> foo = String("foo", 10, padchar = "X", paddir = "right") + >>> foo.parse("helloXXXXX") + 'hello' + >>> foo.build("hello") + 'helloXXXXX' + """ + + con = StringAdapter(Field(name, length), encoding=encoding) if padchar is not None: - con = PaddedStringAdapter(con, - padchar = padchar, - paddir = paddir, - trimdir = trimdir - ) + con = PaddedStringAdapter(con, padchar=padchar, paddir=paddir, + trimdir=trimdir) return con -def PascalString(name, length_field = UBInt8("length"), encoding = None): - """a string prefixed with a length field. the data must directly follow - the length field. - * name - the name of the - * length_field - a numeric construct (i.e., UBInt8) that holds the - length. default is an unsigned, 8-bit integer field. note that this - argument must pass an instance of a construct, not a class - (`UBInt8("length")` rather than `UBInt8`) - * encoding - the encoding to use (e.g., "utf8"), or None, for raw bytes. - default is None +def PascalString(name, length_field=UBInt8("length"), encoding=None): + """ + A length-prefixed string. + + ``PascalString`` is named after the string types of Pascal, which are + length-prefixed. Lisp strings also follow this convention. + + The length field will appear in the same ``Container`` as the + ``PascalString``, with the given name. + + :param str name: name + :param ``Construct`` length_field: a field which will store the length of + the string + :param str encoding: encoding (e.g. "utf8") or None for no encoding + + >>> foo = PascalString("foo") + >>> foo.parse("\\x05hello") + 'hello' + >>> foo.build("hello world") + '\\x0bhello world' + >>> + >>> foo = PascalString("foo", length_field = UBInt16("length")) + >>> foo.parse("\\x00\\x05hello") + 'hello' + >>> foo.build("hello") + '\\x00\\x05hello' """ + return StringAdapter( LengthValueAdapter( Sequence(name, @@ -409,28 +535,46 @@ def PascalString(name, length_field = UBInt8("length"), encoding = None): Field("data", lambda ctx: ctx[length_field.name]), ) ), - encoding = encoding, + encoding=encoding, ) -def CString(name, terminators = "\x00", encoding = None, - char_field = Field(None, 1)): - r"""a c-style string (string terminated by a terminator char) - * name - the name fo the string - * terminators - a sequence of terminator chars. default is "\x00". - * encoding - the encoding to use (e.g., "utf8"), or None, for raw bytes. - default is None - * char_field - the construct that represents a single character. default - is a one-byte character. note that this argument must be an instance - of a construct, not a construct class (`Field("char", 1)` rather than - `Field`) +def CString(name, terminators=b"\x00", encoding=None, + char_field=Field(None, 1)): + """ + A string ending in a terminator. + + ``CString`` is similar to the strings of C, C++, and other related + programming languages. + + By default, the terminator is the NULL byte (b``0x00``). + + :param str name: name + :param iterable terminators: sequence of valid terminators, in order of + preference + :param str encoding: encoding (e.g. "utf8") or None for no encoding + :param ``Construct`` char_field: construct representing a single character + + >>> foo = CString("foo") + >>> foo.parse(b"hello\\x00") + b'hello' + >>> foo.build(b"hello") + b'hello\\x00' + >>> foo = CString("foo", terminators = b"XYZ") + >>> foo.parse(b"helloX") + b'hello' + >>> foo.parse(b"helloY") + b'hello' + >>> foo.parse(b"helloZ") + b'hello' + >>> foo.build(b"hello") + b'helloX' """ + return Rename(name, CStringAdapter( - RepeatUntil(lambda obj, ctx: obj in terminators, - char_field, - ), - terminators = terminators, - encoding = encoding, + RepeatUntil(lambda obj, ctx: obj in terminators, char_field), + terminators=terminators, + encoding=encoding, ) ) @@ -463,9 +607,9 @@ def If(predicate, subcon, elsevalue = None): * elsevalue - the value that will be used should the predicate return False. by default this value is None. """ - return IfThenElse(subcon.name, - predicate, - subcon, + return IfThenElse(subcon.name, + predicate, + subcon, Value("elsevalue", lambda ctx: elsevalue) ) @@ -474,41 +618,17 @@ def If(predicate, subcon, elsevalue = None): # misc #=============================================================================== def OnDemandPointer(offsetfunc, subcon, force_build = True): - """an on-demand pointer. - * offsetfunc - a function taking the context as an argument and returning + """an on-demand pointer. + * offsetfunc - a function taking the context as an argument and returning the absolute stream position - * subcon - the subcon that will be parsed from the `offsetfunc()` stream + * subcon - the subcon that will be parsed from the `offsetfunc()` stream position on demand * force_build - see OnDemand. by default True. """ - return OnDemand(Pointer(offsetfunc, subcon), - advance_stream = False, + return OnDemand(Pointer(offsetfunc, subcon), + advance_stream = False, force_build = force_build ) - - - - - - - - - - - - - - - - - - - - - - - - - - +def Magic(data): + return ConstAdapter(Field(None, len(data)), data) diff --git a/examples/elf_relocations.py b/examples/elf_relocations.py index c8405ad..1817595 100644 --- a/examples/elf_relocations.py +++ b/examples/elf_relocations.py @@ -38,12 +38,9 @@ def process_file(filename): reladyn_name, reladyn.num_relocations())) for reloc in reladyn.iter_relocations(): - # Use the Relocation's object ability to pretty-print itself to a - # string to examine it - print(' ', reloc) - + print(' Relocation (%s)' % 'RELA' if reloc.is_RELA() else 'REL') # Relocation entry attributes are available through item lookup - print(' offset = %s' % reloc['r_offset']) + print(' offset = %s' % reloc['r_offset']) if __name__ == '__main__': diff --git a/examples/reference_output/elf_relocations.out b/examples/reference_output/elf_relocations.out index 41bb576..55ffc05 100644 --- a/examples/reference_output/elf_relocations.out +++ b/examples/reference_output/elf_relocations.out @@ -1,9 +1,4 @@ Processing file: ./examples/sample_exe64.elf .rela.dyn section with 1 relocations - - offset = 6295520 + Relocation (RELA) + offset = 6295520 -- 2.30.2