From e0735d5c89f66d0add7b4036b20d2e31a4b54415 Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Thu, 8 Sep 2011 20:12:44 +0300 Subject: [PATCH] removed CRs --- binfiles/z32.elf | Bin 0 -> 5492 bytes elftools/common/exceptions.py | 26 +- elftools/common/utils.py | 62 +- elftools/construct/__init__.py | 162 +- elftools/construct/adapters.py | 968 +++++------ elftools/construct/core.py | 2434 +++++++++++++-------------- elftools/construct/debug.py | 320 ++-- elftools/construct/lib/__init__.py | 20 +- elftools/construct/lib/binary.py | 122 +- elftools/construct/lib/bitstream.py | 160 +- elftools/construct/lib/container.py | 516 +++--- elftools/construct/lib/hex.py | 68 +- elftools/construct/lib/utils.py | 44 +- elftools/construct/macros.py | 1028 +++++------ elftools/construct/text.py | 572 +++---- elftools/elf/constants.py | 84 +- elftools/elf/elffile.py | 286 ++-- elftools/elf/enums.py | 214 +-- elftools/elf/sections.py | 96 +- elftools/elf/segments.py | 50 +- elftools/elf/structs.py | 294 ++-- z.py | 115 +- 22 files changed, 3821 insertions(+), 3820 deletions(-) create mode 100755 binfiles/z32.elf diff --git a/binfiles/z32.elf b/binfiles/z32.elf new file mode 100755 index 0000000000000000000000000000000000000000..d6b3cd1ff7e422d1845ec7887ccb1e0cb1db058c GIT binary patch literal 5492 zcmeHLUuauZ7(Z!mw^S=;Tg2kxh?;8q(7SHS#41i**R{p5zqTr+%hD!o?#Q2!d!rk4 z*e(`Y(n`T8eOrqUzNic)B6JTe8SCa#6ofrUndl&;Rr^rKz|rgPcW+L~Y6tq@TR!;C z_kHL4o$q|-JLiU+^Fi~WgARv7NGcQM!m`qe(~nN~7Te<&Uf~qA!XvheP3WS}KL8Ba z8h}9+=u^Pu^uhKI!VlgCn@k^LlP%&{@I^St94}n9zv^)zsxB!`vVs9sV9rZ_8H5*p zFMQ-2Y%=|vgWL*yE9NF2fK8UZV|R=Y%zuy^e;oee`B#7>b3Ak38P&RXMnm;cEuK27 z>IrqX?L%sozvXZ{G&@hmM5@+UHzaFAfA$a8v#noG_Vq6Gee%=!^Ap+HeM|JoIk-mJ z>q+)A5362Y;U$LHFW>xShI%m(C&>$?P8yI~!B2rlKSYfox3IW@Kwm5o59mfPX#@fS zOYPwjUlf71Bj^oj@jyxshk&E2fQtpSxM(@lw!bN`TWvrnP?=Q=2U^yJOmb=3WAwdZ zn_?OJw~0wkD$dQ23MCfVK62+odP&I>);X9x`m_+4kMEnNIrM{3k>7#d{P%47G3K4l zn^|-3Jw!5V?xW=xvxX5PR2Obzm|4Tf5&9N#FzF!e`t9V%&*@*6kG^$0lEayg+(vV( zBQgV#p964Z`~fQDHSnqn;Y8}SDL3SFHD(=Lbfca9t1P|fXuPHG82g6t(=(3G2r=D5 zbX)MoTgm+O3I=ebuPL2f*)ijJF?RI@;t@Z+R6XSZc4)@f`YrpQRL&_r_L;ex+4cEk znw?#1ofyA9-rF4~@<$M6Fk9(CLQ9_frB%}$o36&{#?ox-=4Q>ZA=B{fx|?ZsjU4bf zGtE^H3PO)4KJ(%gqSDtLjjPG|^fgcC$v{^&c^CV~^Xss_Y_7OOr3#cPP^v(w0;LL+Dp0CGsRE@6{Qnie ze_ZQcASGwy)r5DKD)7Vw#fi&{h!*ZkqqtlCxoRdZL-H=>f}i)j9PCFRo57E=AN&NE z#Q(V~_=n7U9Q%1EZ1Fv98^ z``hY`U|+%Ni>K7?l!kk<77{A0NKlUmHFPenpNrYjNZOwF!bx3A#2+vM@Fl}h;%((j z)Zl~~S|fZGZ7&SC5}}|G6lypU=uHM=VWIXIiKLDRY;n2=fe<7Z(|RyL0t3;gRy2EL zw+?hqA{GnB4Oy3eBe}QS3+|=$)&Yb2C5b)~`!<7#v)&}&@IsfwyEo|>u*5}hh9bcE zERWqt+)FTd-WZQ(aI}Eq*&`80_Ji-@oDjz|dKow`^!2!X=;K<5H zej5W&isNyuv~{q=@mm@Oj(Kxlezy|W51qNPpWpNZa1GEU5l4C-EO9r0yRiX|*b7BC zfp-HIDJqKrA9ElLgJnGahD0$KH}W--^L|voVLCAk+%OzmpCpO<6djC5+&FOKa7Z7q zQlErQ9QV$8Q^SMQM)5u`!tt@f{Se$&ML0f6cz~00!L_DefzBFpUhAz6xEyruFNtHA z$5pV*kMBXt8^j}awg|`H&=uhL`zeVS@^@ev&xuzSCpKWC^)eUsGe5q@xE_N0u@J8T Zl0@8dwraJUOEP0g+5_CYG$4r~{SC5E+|~d9 literal 0 HcmV?d00001 diff --git a/elftools/common/exceptions.py b/elftools/common/exceptions.py index a4df582..8a40877 100644 --- a/elftools/common/exceptions.py +++ b/elftools/common/exceptions.py @@ -1,14 +1,14 @@ -#------------------------------------------------------------------------------- -# elftools: common/exceptions.py -# -# Exception classes for elftools -# -# Eli Bendersky (eliben@gmail.com) -# This code is in the public domain -#------------------------------------------------------------------------------- -class ELFError(Exception): +#------------------------------------------------------------------------------- +# elftools: common/exceptions.py +# +# Exception classes for elftools +# +# Eli Bendersky (eliben@gmail.com) +# This code is in the public domain +#------------------------------------------------------------------------------- +class ELFError(Exception): pass - -class ELFParseError(ELFError): - pass - + +class ELFParseError(ELFError): + pass + diff --git a/elftools/common/utils.py b/elftools/common/utils.py index 230ba96..666176f 100644 --- a/elftools/common/utils.py +++ b/elftools/common/utils.py @@ -1,32 +1,32 @@ -#------------------------------------------------------------------------------- -# elftools: common/utils.py -# -# Miscellaneous utilities for elftools -# -# Eli Bendersky (eliben@gmail.com) -# This code is in the public domain -#------------------------------------------------------------------------------- -from .exceptions import ELFParseError, ELFError - - -def struct_parse(struct, stream, stream_pos=None): - """ Convenience function for using the given struct to parse a stream (at - its current location). - If stream_pos is provided, the stream is seeked to this position before - the parsing is done. - Wraps the error thrown by construct with our own error. - """ - try: - if stream_pos is not None: - stream.seek(stream_pos) - return struct.parse_stream(stream) - except ConstructError as e: - raise ELFParseError(e.message) - - +#------------------------------------------------------------------------------- +# elftools: common/utils.py +# +# Miscellaneous utilities for elftools +# +# Eli Bendersky (eliben@gmail.com) +# This code is in the public domain +#------------------------------------------------------------------------------- +from .exceptions import ELFParseError, ELFError + + +def struct_parse(struct, stream, stream_pos=None): + """ Convenience function for using the given struct to parse a stream (at + its current location). + If stream_pos is provided, the stream is seeked to this position before + the parsing is done. + Wraps the error thrown by construct with our own error. + """ + try: + if stream_pos is not None: + stream.seek(stream_pos) + return struct.parse_stream(stream) + except ConstructError as e: + raise ELFParseError(e.message) + + def elf_assert(cond, msg=''): - """ Assert that cond is True, otherwise raise ELFError(msg) - """ - if not cond: - raise ELFError(msg) - + """ Assert that cond is True, otherwise raise ELFError(msg) + """ + if not cond: + raise ELFError(msg) + diff --git a/elftools/construct/__init__.py b/elftools/construct/__init__.py index 0b97feb..4814bcc 100644 --- a/elftools/construct/__init__.py +++ b/elftools/construct/__init__.py @@ -1,81 +1,81 @@ -""" -Construct 2.00 -- parsing made even more fun (and faster) - -Homepage: -http://construct.wikispaces.com - -Typical usage: ->>> from construct import * - -Example: ->>> from construct import * ->>> ->>> s = Struct("foo", -... UBInt8("a"), -... UBInt16("b"), -... ) ->>> ->>> s.parse("\x01\x02\x03") -Container(a = 1, b = 515) ->>> print s.parse("\x01\x02\x03") -Container: - a = 1 - b = 515 ->>> s.build(Container(a = 1, b = 0x0203)) -"\x01\x02\x03" -""" -from core import * -from adapters import * -from macros import * -from debug import Probe, Debugger - - -#=============================================================================== -# meta data -#=============================================================================== -__author__ = "tomer filiba (tomerfiliba [at] gmail.com)" -__version__ = "2.00" - -#=============================================================================== -# shorthands -#=============================================================================== -Bits = BitField -Byte = UBInt8 -Bytes = Field -Const = ConstAdapter -Tunnel = TunnelAdapter -Embed = Embedded - -#=============================================================================== -# backward compatibility with RC1 -#=============================================================================== -MetaField = Field -MetaBytes = Field -GreedyRepeater = GreedyRange -OptionalGreedyRepeater = OptionalGreedyRange -Repeater = Array -StrictRepeater = Array -MetaRepeater = Array -OneOfValidator = OneOf -NoneOfValidator = NoneOf - -#=============================================================================== -# don't want to leek these out... -#=============================================================================== -del encode_bin, decode_bin, int_to_bin, bin_to_int, swap_bytes -del Packer, StringIO -del HexString, LazyContainer, AttrDict - - - - - - - - - - - - - - +""" +Construct 2.00 -- parsing made even more fun (and faster) + +Homepage: +http://construct.wikispaces.com + +Typical usage: +>>> from construct import * + +Example: +>>> from construct import * +>>> +>>> s = Struct("foo", +... UBInt8("a"), +... UBInt16("b"), +... ) +>>> +>>> s.parse("\x01\x02\x03") +Container(a = 1, b = 515) +>>> print s.parse("\x01\x02\x03") +Container: + a = 1 + b = 515 +>>> s.build(Container(a = 1, b = 0x0203)) +"\x01\x02\x03" +""" +from core import * +from adapters import * +from macros import * +from debug import Probe, Debugger + + +#=============================================================================== +# meta data +#=============================================================================== +__author__ = "tomer filiba (tomerfiliba [at] gmail.com)" +__version__ = "2.00" + +#=============================================================================== +# shorthands +#=============================================================================== +Bits = BitField +Byte = UBInt8 +Bytes = Field +Const = ConstAdapter +Tunnel = TunnelAdapter +Embed = Embedded + +#=============================================================================== +# backward compatibility with RC1 +#=============================================================================== +MetaField = Field +MetaBytes = Field +GreedyRepeater = GreedyRange +OptionalGreedyRepeater = OptionalGreedyRange +Repeater = Array +StrictRepeater = Array +MetaRepeater = Array +OneOfValidator = OneOf +NoneOfValidator = NoneOf + +#=============================================================================== +# don't want to leek these out... +#=============================================================================== +del encode_bin, decode_bin, int_to_bin, bin_to_int, swap_bytes +del Packer, StringIO +del HexString, LazyContainer, AttrDict + + + + + + + + + + + + + + diff --git a/elftools/construct/adapters.py b/elftools/construct/adapters.py index 69c84d9..182e108 100644 --- a/elftools/construct/adapters.py +++ b/elftools/construct/adapters.py @@ -1,484 +1,484 @@ -from core import Adapter, AdaptationError, Pass -from lib import int_to_bin, bin_to_int, swap_bytes, StringIO -from lib import FlagsContainer, HexString - - -#=============================================================================== -# exceptions -#=============================================================================== -class BitIntegerError(AdaptationError): - __slots__ = [] -class MappingError(AdaptationError): - __slots__ = [] -class ConstError(AdaptationError): - __slots__ = [] -class ValidationError(AdaptationError): - __slots__ = [] -class PaddingError(AdaptationError): - __slots__ = [] - -#=============================================================================== -# adapters -#=============================================================================== -class BitIntegerAdapter(Adapter): - """ - Adapter for bit-integers (converts bitstrings to integers, and vice versa). - See BitField. - - Parameters: - * subcon - the subcon to adapt - * width - the size of the subcon, in bits - * swapped - whether to swap byte order (little endian/big endian). - default is False (big endian) - * signed - whether the value is signed (two's complement). the default - is False (unsigned) - * bytesize - number of bits per byte, used for byte-swapping (if swapped). - default is 8. - """ - __slots__ = ["width", "swapped", "signed", "bytesize"] - def __init__(self, subcon, width, swapped = False, signed = False, - bytesize = 8): - Adapter.__init__(self, subcon) - self.width = width - self.swapped = swapped - self.signed = signed - self.bytesize = bytesize - def _encode(self, obj, context): - if obj < 0 and not self.signed: - raise BitIntegerError("object is negative, but field is not signed", - obj) - obj2 = int_to_bin(obj, width = self.width) - if self.swapped: - obj2 = swap_bytes(obj2, bytesize = self.bytesize) - return obj2 - def _decode(self, obj, context): - if self.swapped: - obj = swap_bytes(obj, bytesize = self.bytesize) - return bin_to_int(obj, signed = self.signed) - -class MappingAdapter(Adapter): - """ - Adapter that maps objects to other objects. - See SymmetricMapping and Enum. - - Parameters: - * subcon - the subcon to map - * decoding - the decoding (parsing) mapping (a dict) - * encoding - the encoding (building) mapping (a dict) - * decdefault - the default return value when the object is not found - in the decoding mapping. if no object is given, an exception is raised. - if `Pass` is used, the unmapped object will be passed as-is - * encdefault - the default return value when the object is not found - in the encoding mapping. if no object is given, an exception is raised. - if `Pass` is used, the unmapped object will be passed as-is - """ - __slots__ = ["encoding", "decoding", "encdefault", "decdefault"] - def __init__(self, subcon, decoding, encoding, - decdefault = NotImplemented, encdefault = NotImplemented): - Adapter.__init__(self, subcon) - self.decoding = decoding - self.encoding = encoding - self.decdefault = decdefault - self.encdefault = encdefault - def _encode(self, obj, context): - try: - return self.encoding[obj] - except (KeyError, TypeError): - if self.encdefault is NotImplemented: - raise MappingError("no encoding mapping for %r [%s]" % ( - obj, self.subcon.name)) - if self.encdefault is Pass: - return obj - return self.encdefault - def _decode(self, obj, context): - try: - return self.decoding[obj] - except (KeyError, TypeError): - if self.decdefault is NotImplemented: - raise MappingError("no decoding mapping for %r [%s]" % ( - obj, self.subcon.name)) - if self.decdefault is Pass: - return obj - return self.decdefault - -class FlagsAdapter(Adapter): - """ - Adapter for flag fields. Each flag is extracted from the number, resulting - in a FlagsContainer object. Not intended for direct usage. - See FlagsEnum. - - Parameters - * subcon - the subcon to extract - * flags - a dictionary mapping flag-names to their value - """ - __slots__ = ["flags"] - def __init__(self, subcon, flags): - Adapter.__init__(self, subcon) - self.flags = flags - def _encode(self, obj, context): - flags = 0 - for name, value in self.flags.iteritems(): - if getattr(obj, name, False): - flags |= value - return flags - def _decode(self, obj, context): - obj2 = FlagsContainer() - for name, value in self.flags.iteritems(): - setattr(obj2, name, bool(obj & value)) - return obj2 - -class StringAdapter(Adapter): - """ - Adapter for strings. Converts a sequence of characters into a python - string, and optionally handles character encoding. - See String. - - Parameters: - * subcon - the subcon to convert - * encoding - the character encoding name (e.g., "utf8"), or None to - return raw bytes (usually 8-bit ASCII). - """ - __slots__ = ["encoding"] - def __init__(self, subcon, encoding = None): - Adapter.__init__(self, subcon) - self.encoding = encoding - def _encode(self, obj, context): - if self.encoding: - obj = obj.encode(self.encoding) - return obj - def _decode(self, obj, context): - obj = "".join(obj) - if self.encoding: - obj = obj.decode(self.encoding) - return obj - -class PaddedStringAdapter(Adapter): - r""" - Adapter for padded strings. - See String. - - Parameters: - * subcon - the subcon to adapt - * padchar - the padding character. default is "\x00". - * paddir - the direction where padding is placed ("right", "left", or - "center"). the default is "right". - * trimdir - the direction where trimming will take place ("right" or - "left"). the default is "right". trimming is only meaningful for - building, when the given string is too long. - """ - __slots__ = ["padchar", "paddir", "trimdir"] - def __init__(self, subcon, padchar = "\x00", paddir = "right", - trimdir = "right"): - if paddir not in ("right", "left", "center"): - raise ValueError("paddir must be 'right', 'left' or 'center'", - paddir) - if trimdir not in ("right", "left"): - raise ValueError("trimdir must be 'right' or 'left'", trimdir) - Adapter.__init__(self, subcon) - self.padchar = padchar - self.paddir = paddir - self.trimdir = trimdir - def _decode(self, obj, context): - if self.paddir == "right": - obj = obj.rstrip(self.padchar) - elif self.paddir == "left": - obj = obj.lstrip(self.padchar) - else: - obj = obj.strip(self.padchar) - return obj - def _encode(self, obj, context): - size = self._sizeof(context) - if self.paddir == "right": - obj = obj.ljust(size, self.padchar) - elif self.paddir == "left": - obj = obj.rjust(size, self.padchar) - else: - obj = obj.center(size, self.padchar) - if len(obj) > size: - if self.trimdir == "right": - obj = obj[:size] - else: - obj = obj[-size:] - return obj - -class LengthValueAdapter(Adapter): - """ - Adapter for length-value pairs. It extracts only the value from the - pair, and calculates the length based on the value. - See PrefixedArray and PascalString. - - Parameters: - * subcon - the subcon returning a length-value pair - """ - __slots__ = [] - def _encode(self, obj, context): - return (len(obj), obj) - def _decode(self, obj, context): - return obj[1] - -class CStringAdapter(StringAdapter): - r""" - Adapter for C-style strings (strings terminated by a terminator char). - - Parameters: - * subcon - the subcon to convert - * terminators - a sequence of terminator chars. default is "\x00". - * encoding - the character encoding to use (e.g., "utf8"), or None to - return raw-bytes. the terminator characters are not affected by the - encoding. - """ - __slots__ = ["terminators"] - def __init__(self, subcon, terminators = "\x00", encoding = None): - StringAdapter.__init__(self, subcon, encoding = encoding) - self.terminators = terminators - def _encode(self, obj, context): - return StringAdapter._encode(self, obj, context) + self.terminators[0] - def _decode(self, obj, context): - return StringAdapter._decode(self, obj[:-1], context) - -class TunnelAdapter(Adapter): - """ - Adapter for tunneling (as in protocol tunneling). A tunnel is construct - nested upon another (layering). For parsing, the lower layer first parses - the data (note: it must return a string!), then the upper layer is called - to parse that data (bottom-up). For building it works in a top-down manner; - first the upper layer builds the data, then the lower layer takes it and - writes it to the stream. - - Parameters: - * subcon - the lower layer subcon - * inner_subcon - the upper layer (tunneled/nested) subcon - - Example: - # a pascal string containing compressed data (zlib encoding), so first - # the string is read, decompressed, and finally re-parsed as an array - # of UBInt16 - TunnelAdapter( - PascalString("data", encoding = "zlib"), - GreedyRange(UBInt16("elements")) - ) - """ - __slots__ = ["inner_subcon"] - def __init__(self, subcon, inner_subcon): - Adapter.__init__(self, subcon) - self.inner_subcon = inner_subcon - def _decode(self, obj, context): - return self.inner_subcon._parse(StringIO(obj), context) - def _encode(self, obj, context): - stream = StringIO() - self.inner_subcon._build(obj, stream, context) - return stream.getvalue() - -class ExprAdapter(Adapter): - """ - A generic adapter that accepts 'encoder' and 'decoder' as parameters. You - can use ExprAdapter instead of writing a full-blown class when only a - simple expression is needed. - - Parameters: - * subcon - the subcon to adapt - * encoder - a function that takes (obj, context) and returns an encoded - version of obj - * decoder - a function that takes (obj, context) and returns an decoded - version of obj - - Example: - ExprAdapter(UBInt8("foo"), - encoder = lambda obj, ctx: obj / 4, - decoder = lambda obj, ctx: obj * 4, - ) - """ - __slots__ = ["_encode", "_decode"] - def __init__(self, subcon, encoder, decoder): - Adapter.__init__(self, subcon) - self._encode = encoder - self._decode = decoder - -class HexDumpAdapter(Adapter): - """ - Adapter for hex-dumping strings. It returns a HexString, which is a string - """ - __slots__ = ["linesize"] - def __init__(self, subcon, linesize = 16): - Adapter.__init__(self, subcon) - self.linesize = linesize - def _encode(self, obj, context): - return obj - def _decode(self, obj, context): - return HexString(obj, linesize = self.linesize) - -class ConstAdapter(Adapter): - """ - Adapter for enforcing a constant value ("magic numbers"). When decoding, - the return value is checked; when building, the value is substituted in. - - Parameters: - * subcon - the subcon to validate - * value - the expected value - - Example: - Const(Field("signature", 2), "MZ") - """ - __slots__ = ["value"] - def __init__(self, subcon, value): - Adapter.__init__(self, subcon) - self.value = value - def _encode(self, obj, context): - if obj is None or obj == self.value: - return self.value - else: - raise ConstError("expected %r, found %r" % (self.value, obj)) - def _decode(self, obj, context): - if obj != self.value: - raise ConstError("expected %r, found %r" % (self.value, obj)) - return obj - -class SlicingAdapter(Adapter): - """ - Adapter for slicing a list (getting a slice from that list) - - Parameters: - * subcon - the subcon to slice - * start - start index - * stop - stop index (or None for up-to-end) - * step - step (or None for every element) - """ - __slots__ = ["start", "stop", "step"] - def __init__(self, subcon, start, stop = None): - Adapter.__init__(self, subcon) - self.start = start - self.stop = stop - def _encode(self, obj, context): - if self.start is None: - return obj - return [None] * self.start + obj - def _decode(self, obj, context): - return obj[self.start:self.stop] - -class IndexingAdapter(Adapter): - """ - Adapter for indexing a list (getting a single item from that list) - - Parameters: - * subcon - the subcon to index - * index - the index of the list to get - """ - __slots__ = ["index"] - def __init__(self, subcon, index): - Adapter.__init__(self, subcon) - if type(index) is not int: - raise TypeError("index must be an integer", type(index)) - self.index = index - def _encode(self, obj, context): - return [None] * self.index + [obj] - def _decode(self, obj, context): - return obj[self.index] - -class PaddingAdapter(Adapter): - r""" - Adapter for padding. - - Parameters: - * subcon - the subcon to pad - * pattern - the padding pattern (character). default is "\x00") - * strict - whether or not to verify, during parsing, that the given - padding matches the padding pattern. default is False (unstrict) - """ - __slots__ = ["pattern", "strict"] - def __init__(self, subcon, pattern = "\x00", strict = False): - Adapter.__init__(self, subcon) - self.pattern = pattern - self.strict = strict - def _encode(self, obj, context): - return self._sizeof(context) * self.pattern - def _decode(self, obj, context): - if self.strict: - expected = self._sizeof(context) * self.pattern - if obj != expected: - raise PaddingError("expected %r, found %r" % (expected, obj)) - return obj - - -#=============================================================================== -# validators -#=============================================================================== -class Validator(Adapter): - """ - Abstract class: validates a condition on the encoded/decoded object. - Override _validate(obj, context) in deriving classes. - - Parameters: - * subcon - the subcon to validate - """ - __slots__ = [] - def _decode(self, obj, context): - if not self._validate(obj, context): - raise ValidationError("invalid object", obj) - return obj - def _encode(self, obj, context): - return self._decode(obj, context) - def _validate(self, obj, context): - raise NotImplementedError() - -class OneOf(Validator): - """ - Validates that the value is one of the listed values - - Parameters: - * subcon - the subcon to validate - * valids - a set of valid values - """ - __slots__ = ["valids"] - def __init__(self, subcon, valids): - Validator.__init__(self, subcon) - self.valids = valids - def _validate(self, obj, context): - return obj in self.valids - -class NoneOf(Validator): - """ - Validates that the value is none of the listed values - - Parameters: - * subcon - the subcon to validate - * invalids - a set of invalid values - """ - __slots__ = ["invalids"] - def __init__(self, subcon, invalids): - Validator.__init__(self, subcon) - self.invalids = invalids - def _validate(self, obj, context): - return obj not in self.invalids - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +from core import Adapter, AdaptationError, Pass +from lib import int_to_bin, bin_to_int, swap_bytes, StringIO +from lib import FlagsContainer, HexString + + +#=============================================================================== +# exceptions +#=============================================================================== +class BitIntegerError(AdaptationError): + __slots__ = [] +class MappingError(AdaptationError): + __slots__ = [] +class ConstError(AdaptationError): + __slots__ = [] +class ValidationError(AdaptationError): + __slots__ = [] +class PaddingError(AdaptationError): + __slots__ = [] + +#=============================================================================== +# adapters +#=============================================================================== +class BitIntegerAdapter(Adapter): + """ + Adapter for bit-integers (converts bitstrings to integers, and vice versa). + See BitField. + + Parameters: + * subcon - the subcon to adapt + * width - the size of the subcon, in bits + * swapped - whether to swap byte order (little endian/big endian). + default is False (big endian) + * signed - whether the value is signed (two's complement). the default + is False (unsigned) + * bytesize - number of bits per byte, used for byte-swapping (if swapped). + default is 8. + """ + __slots__ = ["width", "swapped", "signed", "bytesize"] + def __init__(self, subcon, width, swapped = False, signed = False, + bytesize = 8): + Adapter.__init__(self, subcon) + self.width = width + self.swapped = swapped + self.signed = signed + self.bytesize = bytesize + def _encode(self, obj, context): + if obj < 0 and not self.signed: + raise BitIntegerError("object is negative, but field is not signed", + obj) + obj2 = int_to_bin(obj, width = self.width) + if self.swapped: + obj2 = swap_bytes(obj2, bytesize = self.bytesize) + return obj2 + def _decode(self, obj, context): + if self.swapped: + obj = swap_bytes(obj, bytesize = self.bytesize) + return bin_to_int(obj, signed = self.signed) + +class MappingAdapter(Adapter): + """ + Adapter that maps objects to other objects. + See SymmetricMapping and Enum. + + Parameters: + * subcon - the subcon to map + * decoding - the decoding (parsing) mapping (a dict) + * encoding - the encoding (building) mapping (a dict) + * decdefault - the default return value when the object is not found + in the decoding mapping. if no object is given, an exception is raised. + if `Pass` is used, the unmapped object will be passed as-is + * encdefault - the default return value when the object is not found + in the encoding mapping. if no object is given, an exception is raised. + if `Pass` is used, the unmapped object will be passed as-is + """ + __slots__ = ["encoding", "decoding", "encdefault", "decdefault"] + def __init__(self, subcon, decoding, encoding, + decdefault = NotImplemented, encdefault = NotImplemented): + Adapter.__init__(self, subcon) + self.decoding = decoding + self.encoding = encoding + self.decdefault = decdefault + self.encdefault = encdefault + def _encode(self, obj, context): + try: + return self.encoding[obj] + except (KeyError, TypeError): + if self.encdefault is NotImplemented: + raise MappingError("no encoding mapping for %r [%s]" % ( + obj, self.subcon.name)) + if self.encdefault is Pass: + return obj + return self.encdefault + def _decode(self, obj, context): + try: + return self.decoding[obj] + except (KeyError, TypeError): + if self.decdefault is NotImplemented: + raise MappingError("no decoding mapping for %r [%s]" % ( + obj, self.subcon.name)) + if self.decdefault is Pass: + return obj + return self.decdefault + +class FlagsAdapter(Adapter): + """ + Adapter for flag fields. Each flag is extracted from the number, resulting + in a FlagsContainer object. Not intended for direct usage. + See FlagsEnum. + + Parameters + * subcon - the subcon to extract + * flags - a dictionary mapping flag-names to their value + """ + __slots__ = ["flags"] + def __init__(self, subcon, flags): + Adapter.__init__(self, subcon) + self.flags = flags + def _encode(self, obj, context): + flags = 0 + for name, value in self.flags.iteritems(): + if getattr(obj, name, False): + flags |= value + return flags + def _decode(self, obj, context): + obj2 = FlagsContainer() + for name, value in self.flags.iteritems(): + setattr(obj2, name, bool(obj & value)) + return obj2 + +class StringAdapter(Adapter): + """ + Adapter for strings. Converts a sequence of characters into a python + string, and optionally handles character encoding. + See String. + + Parameters: + * subcon - the subcon to convert + * encoding - the character encoding name (e.g., "utf8"), or None to + return raw bytes (usually 8-bit ASCII). + """ + __slots__ = ["encoding"] + def __init__(self, subcon, encoding = None): + Adapter.__init__(self, subcon) + self.encoding = encoding + def _encode(self, obj, context): + if self.encoding: + obj = obj.encode(self.encoding) + return obj + def _decode(self, obj, context): + obj = "".join(obj) + if self.encoding: + obj = obj.decode(self.encoding) + return obj + +class PaddedStringAdapter(Adapter): + r""" + Adapter for padded strings. + See String. + + Parameters: + * subcon - the subcon to adapt + * padchar - the padding character. default is "\x00". + * paddir - the direction where padding is placed ("right", "left", or + "center"). the default is "right". + * trimdir - the direction where trimming will take place ("right" or + "left"). the default is "right". trimming is only meaningful for + building, when the given string is too long. + """ + __slots__ = ["padchar", "paddir", "trimdir"] + def __init__(self, subcon, padchar = "\x00", paddir = "right", + trimdir = "right"): + if paddir not in ("right", "left", "center"): + raise ValueError("paddir must be 'right', 'left' or 'center'", + paddir) + if trimdir not in ("right", "left"): + raise ValueError("trimdir must be 'right' or 'left'", trimdir) + Adapter.__init__(self, subcon) + self.padchar = padchar + self.paddir = paddir + self.trimdir = trimdir + def _decode(self, obj, context): + if self.paddir == "right": + obj = obj.rstrip(self.padchar) + elif self.paddir == "left": + obj = obj.lstrip(self.padchar) + else: + obj = obj.strip(self.padchar) + return obj + def _encode(self, obj, context): + size = self._sizeof(context) + if self.paddir == "right": + obj = obj.ljust(size, self.padchar) + elif self.paddir == "left": + obj = obj.rjust(size, self.padchar) + else: + obj = obj.center(size, self.padchar) + if len(obj) > size: + if self.trimdir == "right": + obj = obj[:size] + else: + obj = obj[-size:] + return obj + +class LengthValueAdapter(Adapter): + """ + Adapter for length-value pairs. It extracts only the value from the + pair, and calculates the length based on the value. + See PrefixedArray and PascalString. + + Parameters: + * subcon - the subcon returning a length-value pair + """ + __slots__ = [] + def _encode(self, obj, context): + return (len(obj), obj) + def _decode(self, obj, context): + return obj[1] + +class CStringAdapter(StringAdapter): + r""" + Adapter for C-style strings (strings terminated by a terminator char). + + Parameters: + * subcon - the subcon to convert + * terminators - a sequence of terminator chars. default is "\x00". + * encoding - the character encoding to use (e.g., "utf8"), or None to + return raw-bytes. the terminator characters are not affected by the + encoding. + """ + __slots__ = ["terminators"] + def __init__(self, subcon, terminators = "\x00", encoding = None): + StringAdapter.__init__(self, subcon, encoding = encoding) + self.terminators = terminators + def _encode(self, obj, context): + return StringAdapter._encode(self, obj, context) + self.terminators[0] + def _decode(self, obj, context): + return StringAdapter._decode(self, obj[:-1], context) + +class TunnelAdapter(Adapter): + """ + Adapter for tunneling (as in protocol tunneling). A tunnel is construct + nested upon another (layering). For parsing, the lower layer first parses + the data (note: it must return a string!), then the upper layer is called + to parse that data (bottom-up). For building it works in a top-down manner; + first the upper layer builds the data, then the lower layer takes it and + writes it to the stream. + + Parameters: + * subcon - the lower layer subcon + * inner_subcon - the upper layer (tunneled/nested) subcon + + Example: + # a pascal string containing compressed data (zlib encoding), so first + # the string is read, decompressed, and finally re-parsed as an array + # of UBInt16 + TunnelAdapter( + PascalString("data", encoding = "zlib"), + GreedyRange(UBInt16("elements")) + ) + """ + __slots__ = ["inner_subcon"] + def __init__(self, subcon, inner_subcon): + Adapter.__init__(self, subcon) + self.inner_subcon = inner_subcon + def _decode(self, obj, context): + return self.inner_subcon._parse(StringIO(obj), context) + def _encode(self, obj, context): + stream = StringIO() + self.inner_subcon._build(obj, stream, context) + return stream.getvalue() + +class ExprAdapter(Adapter): + """ + A generic adapter that accepts 'encoder' and 'decoder' as parameters. You + can use ExprAdapter instead of writing a full-blown class when only a + simple expression is needed. + + Parameters: + * subcon - the subcon to adapt + * encoder - a function that takes (obj, context) and returns an encoded + version of obj + * decoder - a function that takes (obj, context) and returns an decoded + version of obj + + Example: + ExprAdapter(UBInt8("foo"), + encoder = lambda obj, ctx: obj / 4, + decoder = lambda obj, ctx: obj * 4, + ) + """ + __slots__ = ["_encode", "_decode"] + def __init__(self, subcon, encoder, decoder): + Adapter.__init__(self, subcon) + self._encode = encoder + self._decode = decoder + +class HexDumpAdapter(Adapter): + """ + Adapter for hex-dumping strings. It returns a HexString, which is a string + """ + __slots__ = ["linesize"] + def __init__(self, subcon, linesize = 16): + Adapter.__init__(self, subcon) + self.linesize = linesize + def _encode(self, obj, context): + return obj + def _decode(self, obj, context): + return HexString(obj, linesize = self.linesize) + +class ConstAdapter(Adapter): + """ + Adapter for enforcing a constant value ("magic numbers"). When decoding, + the return value is checked; when building, the value is substituted in. + + Parameters: + * subcon - the subcon to validate + * value - the expected value + + Example: + Const(Field("signature", 2), "MZ") + """ + __slots__ = ["value"] + def __init__(self, subcon, value): + Adapter.__init__(self, subcon) + self.value = value + def _encode(self, obj, context): + if obj is None or obj == self.value: + return self.value + else: + raise ConstError("expected %r, found %r" % (self.value, obj)) + def _decode(self, obj, context): + if obj != self.value: + raise ConstError("expected %r, found %r" % (self.value, obj)) + return obj + +class SlicingAdapter(Adapter): + """ + Adapter for slicing a list (getting a slice from that list) + + Parameters: + * subcon - the subcon to slice + * start - start index + * stop - stop index (or None for up-to-end) + * step - step (or None for every element) + """ + __slots__ = ["start", "stop", "step"] + def __init__(self, subcon, start, stop = None): + Adapter.__init__(self, subcon) + self.start = start + self.stop = stop + def _encode(self, obj, context): + if self.start is None: + return obj + return [None] * self.start + obj + def _decode(self, obj, context): + return obj[self.start:self.stop] + +class IndexingAdapter(Adapter): + """ + Adapter for indexing a list (getting a single item from that list) + + Parameters: + * subcon - the subcon to index + * index - the index of the list to get + """ + __slots__ = ["index"] + def __init__(self, subcon, index): + Adapter.__init__(self, subcon) + if type(index) is not int: + raise TypeError("index must be an integer", type(index)) + self.index = index + def _encode(self, obj, context): + return [None] * self.index + [obj] + def _decode(self, obj, context): + return obj[self.index] + +class PaddingAdapter(Adapter): + r""" + Adapter for padding. + + Parameters: + * subcon - the subcon to pad + * pattern - the padding pattern (character). default is "\x00") + * strict - whether or not to verify, during parsing, that the given + padding matches the padding pattern. default is False (unstrict) + """ + __slots__ = ["pattern", "strict"] + def __init__(self, subcon, pattern = "\x00", strict = False): + Adapter.__init__(self, subcon) + self.pattern = pattern + self.strict = strict + def _encode(self, obj, context): + return self._sizeof(context) * self.pattern + def _decode(self, obj, context): + if self.strict: + expected = self._sizeof(context) * self.pattern + if obj != expected: + raise PaddingError("expected %r, found %r" % (expected, obj)) + return obj + + +#=============================================================================== +# validators +#=============================================================================== +class Validator(Adapter): + """ + Abstract class: validates a condition on the encoded/decoded object. + Override _validate(obj, context) in deriving classes. + + Parameters: + * subcon - the subcon to validate + """ + __slots__ = [] + def _decode(self, obj, context): + if not self._validate(obj, context): + raise ValidationError("invalid object", obj) + return obj + def _encode(self, obj, context): + return self._decode(obj, context) + def _validate(self, obj, context): + raise NotImplementedError() + +class OneOf(Validator): + """ + Validates that the value is one of the listed values + + Parameters: + * subcon - the subcon to validate + * valids - a set of valid values + """ + __slots__ = ["valids"] + def __init__(self, subcon, valids): + Validator.__init__(self, subcon) + self.valids = valids + def _validate(self, obj, context): + return obj in self.valids + +class NoneOf(Validator): + """ + Validates that the value is none of the listed values + + Parameters: + * subcon - the subcon to validate + * invalids - a set of invalid values + """ + __slots__ = ["invalids"] + def __init__(self, subcon, invalids): + Validator.__init__(self, subcon) + self.invalids = invalids + def _validate(self, obj, context): + return obj not in self.invalids + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/elftools/construct/core.py b/elftools/construct/core.py index b09382f..773d830 100644 --- a/elftools/construct/core.py +++ b/elftools/construct/core.py @@ -1,1217 +1,1217 @@ -from lib import StringIO, Packer -from lib import Container, ListContainer, AttrDict, LazyContainer - - -#=============================================================================== -# exceptions -#=============================================================================== -class ConstructError(Exception): - __slots__ = [] -class FieldError(ConstructError): - __slots__ = [] -class SizeofError(ConstructError): - __slots__ = [] -class AdaptationError(ConstructError): - __slots__ = [] -class ArrayError(ConstructError): - __slots__ = [] -class RangeError(ConstructError): - __slots__ = [] -class SwitchError(ConstructError): - __slots__ = [] -class SelectError(ConstructError): - __slots__ = [] -class TerminatorError(ConstructError): - __slots__ = [] - -#=============================================================================== -# abstract constructs -#=============================================================================== -class Construct(object): - """ - The mother of all constructs! - - User API: - * parse(buf) - parses an in-memory buffer (usually a string) - * parse_stream(stream) - parses a stream (in-memory, file, pipe, ...) - * build(obj) - builds the object into an in-memory buffer (a string) - * build_stream(obj, stream) - builds the object into the given stream - * sizeof(context) - calculates the size of the construct, if possible, - based on the context - - Overriable methods for subclassing: - * _parse(stream, context) - low-level parse from stream - * _build(obj, stream, context) - low-level build to stream - * _sizeof(context) - low-level compute size - - Flags API: - * _set_flag(flag) - sets the given flag/flags - * _clear_flag(flag) - clears the given flag/flags - * _inherit_flags(*subcons) - inherits the flag of subcons - * _is_flag(flag) - is the flag set? (predicate) - - Overridable methods for the copy-API: - * __getstate__() - returns a dict of the attributes of self - * __setstate__(attrs) - sets the attrs to self - - Attributes: - All constructs have a name and flags. The name is used for naming - struct-members and context dicts. Note that the name must be a string or - None (if the name is not needed). A single underscore ("_") is a reserved - name, and so are names starting with a less-than character ("<"). The name - should be descriptive, short, and valid as a python identifier (although - these rules are not enforced). - - The flags specify additional behavioral information about this construct. - The flags are used by enclosing constructs to determine a proper course - of action. Usually, flags are "inherited", i.e., an enclosing construct - inherits the flags of its subconstruct. The enclosing construct may - set new flags or clear existing ones, as necessary. - - For example, if FLAG_COPY_CONTEXT is set, repeaters will pass a copy of - the context for each iteration, which is necessary for OnDemand parsing. - """ - FLAG_COPY_CONTEXT = 0x0001 - FLAG_DYNAMIC = 0x0002 - FLAG_EMBED = 0x0004 - FLAG_NESTING = 0x0008 - - __slots__ = ["name", "conflags"] - def __init__(self, name, flags = 0): - if name is not None: - if type(name) is not str: - raise TypeError("name must be a string or None", name) - if name == "_" or name.startswith("<"): - raise ValueError("reserved name", name) - self.name = name - self.conflags = flags - def __repr__(self): - return "%s(%r)" % (self.__class__.__name__, self.name) - - def _set_flag(self, flag): - self.conflags |= flag - def _clear_flag(self, flag): - self.conflags &= ~flag - def _inherit_flags(self, *subcons): - for sc in subcons: - self._set_flag(sc.conflags) - def _is_flag(self, flag): - return bool(self.conflags & flag) - - def __getstate__(self): - attrs = {} - if hasattr(self, "__dict__"): - attrs.update(self.__dict__) - slots = [] - c = self.__class__ - while c is not None: - if hasattr(c, "__slots__"): - slots.extend(c.__slots__) - c = c.__base__ - for name in slots: - if hasattr(self, name): - attrs[name] = getattr(self, name) - return attrs - def __setstate__(self, attrs): - for name, value in attrs.iteritems(): - setattr(self, name, value) - def __copy__(self): - """returns a copy of this construct""" - self2 = object.__new__(self.__class__) - self2.__setstate__(self.__getstate__()) - return self2 - - def parse(self, data): - """parses data given as a buffer or a string (in-memory)""" - return self.parse_stream(StringIO(data)) - def parse_stream(self, stream): - """parses data read directly from a stream""" - return self._parse(stream, AttrDict()) - def _parse(self, stream, context): - raise NotImplementedError() - - def build(self, obj): - """builds an object in a string (in memory)""" - stream = StringIO() - self.build_stream(obj, stream) - return stream.getvalue() - def build_stream(self, obj, stream): - """builds an object into a stream""" - self._build(obj, stream, AttrDict()) - def _build(self, obj, stream, context): - raise NotImplementedError() - - def sizeof(self, context = None): - """calculates the size of the construct (if possible) using the - given context""" - if context is None: - context = AttrDict() - return self._sizeof(context) - def _sizeof(self, context): - raise SizeofError("can't calculate size") - -class Subconstruct(Construct): - """ - Abstract subconstruct (wraps an inner construct, inheriting it's - name and flags). - - Parameters: - * subcon - the construct to wrap - """ - __slots__ = ["subcon"] - def __init__(self, subcon): - Construct.__init__(self, subcon.name, subcon.conflags) - self.subcon = subcon - def _parse(self, stream, context): - return self.subcon._parse(stream, context) - def _build(self, obj, stream, context): - self.subcon._build(obj, stream, context) - def _sizeof(self, context): - return self.subcon._sizeof(context) - -class Adapter(Subconstruct): - """ - Abstract adapter: calls _decode for parsing and _encode for building. - - Parameters: - * subcon - the construct to wrap - """ - __slots__ = [] - def _parse(self, stream, context): - return self._decode(self.subcon._parse(stream, context), context) - def _build(self, obj, stream, context): - self.subcon._build(self._encode(obj, context), stream, context) - def _decode(self, obj, context): - raise NotImplementedError() - def _encode(self, obj, context): - raise NotImplementedError() - - -#=============================================================================== -# primitives -#=============================================================================== -def _read_stream(stream, length): - if length < 0: - raise ValueError("length must be >= 0", length) - data = stream.read(length) - if len(data) != length: - raise FieldError("expected %d, found %d" % (length, len(data))) - return data - -def _write_stream(stream, length, data): - if length < 0: - raise ValueError("length must be >= 0", length) - if len(data) != length: - raise FieldError("expected %d, found %d" % (length, len(data))) - stream.write(data) - -class StaticField(Construct): - """ - A field of a fixed size - - Parameters: - * name - the name of the field - * length - the length (an integer) - - Example: - StaticField("foo", 5) - """ - __slots__ = ["length"] - def __init__(self, name, length): - Construct.__init__(self, name) - self.length = length - def _parse(self, stream, context): - return _read_stream(stream, self.length) - def _build(self, obj, stream, context): - _write_stream(stream, self.length, obj) - def _sizeof(self, context): - return self.length - -class FormatField(StaticField): - """ - A field that uses python's built-in struct module to pack/unpack data - according to a format string. - Note: this field has been originally implemented as an Adapter, but it - was made a construct for performance reasons. - - Parameters: - * name - the name - * endianity - "<" for little endian, ">" for big endian, or "=" for native - * format - a single format character - - Example: - FormatField("foo", ">", "L") - """ - __slots__ = ["packer"] - def __init__(self, name, endianity, format): - if endianity not in (">", "<", "="): - raise ValueError("endianity must be be '=', '<', or '>'", - endianity) - if len(format) != 1: - raise ValueError("must specify one and only one format char") - self.packer = Packer(endianity + format) - StaticField.__init__(self, name, self.packer.size) - def __getstate__(self): - attrs = StaticField.__getstate__(self) - attrs["packer"] = attrs["packer"].format - return attrs - def __setstate__(self, attrs): - attrs["packer"] = Packer(attrs["packer"]) - return StaticField.__setstate__(attrs) - def _parse(self, stream, context): - try: - return self.packer.unpack(_read_stream(stream, self.length))[0] - except Exception, ex: - raise FieldError(ex) - def _build(self, obj, stream, context): - try: - _write_stream(stream, self.length, self.packer.pack(obj)) - except Exception, ex: - raise FieldError(ex) - -class MetaField(Construct): - """ - A field of a meta-length. The length is computed at runtime based on - the context. - - Parameters: - * name - the name of the field - * lengthfunc - a function that takes the context as a parameter and return - the length of the field - - Example: - MetaField("foo", lambda ctx: 5) - """ - __slots__ = ["lengthfunc"] - def __init__(self, name, lengthfunc): - Construct.__init__(self, name) - self.lengthfunc = lengthfunc - self._set_flag(self.FLAG_DYNAMIC) - def _parse(self, stream, context): - return _read_stream(stream, self.lengthfunc(context)) - def _build(self, obj, stream, context): - _write_stream(stream, self.lengthfunc(context), obj) - def _sizeof(self, context): - return self.lengthfunc(context) - - -#=============================================================================== -# arrays and repeaters -#=============================================================================== -class MetaArray(Subconstruct): - """ - An array (repeater) of a meta-count. The array will iterate exactly - `countfunc()` times. Will raise ArrayError if less elements are found. - See also Array, Range and RepeatUntil. - - Parameters: - * countfunc - a function that takes the context as a parameter and returns - the number of elements of the array (count) - * subcon - the subcon to repeat `countfunc()` times - - Example: - MetaArray(lambda ctx: 5, UBInt8("foo")) - """ - __slots__ = ["countfunc"] - def __init__(self, countfunc, subcon): - Subconstruct.__init__(self, subcon) - self.countfunc = countfunc - self._clear_flag(self.FLAG_COPY_CONTEXT) - self._set_flag(self.FLAG_DYNAMIC) - def _parse(self, stream, context): - obj = ListContainer() - c = 0 - count = self.countfunc(context) - try: - if self.subcon.conflags & self.FLAG_COPY_CONTEXT: - while c < count: - obj.append(self.subcon._parse(stream, context.__copy__())) - c += 1 - else: - while c < count: - obj.append(self.subcon._parse(stream, context)) - c += 1 - except ConstructError, ex: - raise ArrayError("expected %d, found %d" % (count, c), ex) - return obj - def _build(self, obj, stream, context): - count = self.countfunc(context) - if len(obj) != count: - raise ArrayError("expected %d, found %d" % (count, len(obj))) - if self.subcon.conflags & self.FLAG_COPY_CONTEXT: - for subobj in obj: - self.subcon._build(subobj, stream, context.__copy__()) - else: - for subobj in obj: - self.subcon._build(subobj, stream, context) - def _sizeof(self, context): - return self.subcon._sizeof(context) * self.countfunc(context) - -class Range(Subconstruct): - """ - A range-array. The subcon will iterate between `mincount` to `maxcount` - times. If less than `mincount` elements are found, raises RangeError. - See also GreedyRange and OptionalGreedyRange. - - Notes: - * requires a seekable stream. - - Parameters: - * mincount - the minimal count (an integer) - * maxcount - the maximal count (an integer) - * subcon - the subcon to repeat - - Example: - Range(5, 8, UBInt8("foo")) - """ - __slots__ = ["mincount", "maxcout"] - def __init__(self, mincount, maxcout, subcon): - Subconstruct.__init__(self, subcon) - self.mincount = mincount - self.maxcout = maxcout - self._clear_flag(self.FLAG_COPY_CONTEXT) - self._set_flag(self.FLAG_DYNAMIC) - def _parse(self, stream, context): - obj = ListContainer() - c = 0 - try: - if self.subcon.conflags & self.FLAG_COPY_CONTEXT: - while c < self.maxcout: - pos = stream.tell() - obj.append(self.subcon._parse(stream, context.__copy__())) - c += 1 - else: - while c < self.maxcout: - pos = stream.tell() - obj.append(self.subcon._parse(stream, context)) - c += 1 - except ConstructError: - if c < self.mincount: - raise RangeError("expected %d to %d, found %d" % - (self.mincount, self.maxcout, c)) - stream.seek(pos) - return obj - def _build(self, obj, stream, context): - if len(obj) < self.mincount or len(obj) > self.maxcout: - raise RangeError("expected %d to %d, found %d" % - (self.mincount, self.maxcout, len(obj))) - cnt = 0 - try: - if self.subcon.conflags & self.FLAG_COPY_CONTEXT: - for subobj in obj: - self.subcon._build(subobj, stream, context.__copy__()) - cnt += 1 - else: - for subobj in obj: - self.subcon._build(subobj, stream, context) - cnt += 1 - except ConstructError: - if cnt < self.mincount: - raise RangeError("expected %d to %d, found %d" % - (self.mincount, self.maxcout, len(obj))) - def _sizeof(self, context): - raise SizeofError("can't calculate size") - -class RepeatUntil(Subconstruct): - """ - An array that repeat until the predicate indicates it to stop. Note that - the last element (which caused the repeat to exit) is included in the - return value. - - Parameters: - * predicate - a predicate function that takes (obj, context) and returns - True if the stop-condition is met, or False to continue. - * subcon - the subcon to repeat. - - Example: - # will read chars until \x00 (inclusive) - RepeatUntil(lambda obj, ctx: obj == "\x00", - Field("chars", 1) - ) - """ - __slots__ = ["predicate"] - def __init__(self, predicate, subcon): - Subconstruct.__init__(self, subcon) - self.predicate = predicate - self._clear_flag(self.FLAG_COPY_CONTEXT) - self._set_flag(self.FLAG_DYNAMIC) - def _parse(self, stream, context): - obj = [] - try: - if self.subcon.conflags & self.FLAG_COPY_CONTEXT: - while True: - subobj = self.subcon._parse(stream, context.__copy__()) - obj.append(subobj) - if self.predicate(subobj, context): - break - else: - while True: - subobj = self.subcon._parse(stream, context) - obj.append(subobj) - if self.predicate(subobj, context): - break - except ConstructError, ex: - raise ArrayError("missing terminator", ex) - return obj - def _build(self, obj, stream, context): - terminated = False - if self.subcon.conflags & self.FLAG_COPY_CONTEXT: - for subobj in obj: - self.subcon._build(subobj, stream, context.__copy__()) - if self.predicate(subobj, context): - terminated = True - break - else: - for subobj in obj: - self.subcon._build(subobj, stream, context.__copy__()) - if self.predicate(subobj, context): - terminated = True - break - if not terminated: - raise ArrayError("missing terminator") - def _sizeof(self, context): - raise SizeofError("can't calculate size") - - -#=============================================================================== -# structures and sequences -#=============================================================================== -class Struct(Construct): - """ - A sequence of named constructs, similar to structs in C. The elements are - parsed and built in the order they are defined. - See also Embedded. - - Parameters: - * name - the name of the structure - * subcons - a sequence of subconstructs that make up this structure. - * nested - a keyword-only argument that indicates whether this struct - creates a nested context. The default is True. This parameter is - considered "advanced usage", and may be removed in the future. - - Example: - Struct("foo", - UBInt8("first_element"), - UBInt16("second_element"), - Padding(2), - UBInt8("third_element"), - ) - """ - __slots__ = ["subcons", "nested"] - def __init__(self, name, *subcons, **kw): - self.nested = kw.pop("nested", True) - if kw: - raise TypeError("the only keyword argument accepted is 'nested'", kw) - Construct.__init__(self, name) - self.subcons = subcons - self._inherit_flags(*subcons) - self._clear_flag(self.FLAG_EMBED) - def _parse(self, stream, context): - if "" in context: - obj = context[""] - del context[""] - else: - obj = Container() - if self.nested: - context = AttrDict(_ = context) - for sc in self.subcons: - if sc.conflags & self.FLAG_EMBED: - context[""] = obj - sc._parse(stream, context) - else: - subobj = sc._parse(stream, context) - if sc.name is not None: - obj[sc.name] = subobj - context[sc.name] = subobj - return obj - def _build(self, obj, stream, context): - if "" in context: - del context[""] - elif self.nested: - context = AttrDict(_ = context) - for sc in self.subcons: - if sc.conflags & self.FLAG_EMBED: - context[""] = True - subobj = obj - elif sc.name is None: - subobj = None - else: - subobj = getattr(obj, sc.name) - context[sc.name] = subobj - sc._build(subobj, stream, context) - def _sizeof(self, context): - if self.nested: - context = AttrDict(_ = context) - return sum(sc._sizeof(context) for sc in self.subcons) - -class Sequence(Struct): - """ - A sequence of unnamed constructs. The elements are parsed and built in the - order they are defined. - See also Embedded. - - Parameters: - * name - the name of the structure - * subcons - a sequence of subconstructs that make up this structure. - * nested - a keyword-only argument that indicates whether this struct - creates a nested context. The default is True. This parameter is - considered "advanced usage", and may be removed in the future. - - Example: - Sequence("foo", - UBInt8("first_element"), - UBInt16("second_element"), - Padding(2), - UBInt8("third_element"), - ) - """ - __slots__ = [] - def _parse(self, stream, context): - if "" in context: - obj = context[""] - del context[""] - else: - obj = ListContainer() - if self.nested: - context = AttrDict(_ = context) - for sc in self.subcons: - if sc.conflags & self.FLAG_EMBED: - context[""] = obj - sc._parse(stream, context) - else: - subobj = sc._parse(stream, context) - if sc.name is not None: - obj.append(subobj) - context[sc.name] = subobj - return obj - def _build(self, obj, stream, context): - if "" in context: - del context[""] - elif self.nested: - context = AttrDict(_ = context) - objiter = iter(obj) - for sc in self.subcons: - if sc.conflags & self.FLAG_EMBED: - context[""] = True - subobj = objiter - elif sc.name is None: - subobj = None - else: - subobj = objiter.next() - context[sc.name] = subobj - sc._build(subobj, stream, context) - -class Union(Construct): - """ - a set of overlapping fields (like unions in C). when parsing, - all fields read the same data; when building, only the first subcon - (called "master") is used. - - Parameters: - * name - the name of the union - * master - the master subcon, i.e., the subcon used for building and - calculating the total size - * subcons - additional subcons - - Example: - Union("what_are_four_bytes", - UBInt32("one_dword"), - Struct("two_words", UBInt16("first"), UBInt16("second")), - Struct("four_bytes", - UBInt8("a"), - UBInt8("b"), - UBInt8("c"), - UBInt8("d") - ), - ) - """ - __slots__ = ["parser", "builder"] - def __init__(self, name, master, *subcons, **kw): - Construct.__init__(self, name) - args = [Peek(sc) for sc in subcons] - args.append(MetaField(None, lambda ctx: master._sizeof(ctx))) - self.parser = Struct(name, Peek(master, perform_build = True), *args) - self.builder = Struct(name, master) - def _parse(self, stream, context): - return self.parser._parse(stream, context) - def _build(self, obj, stream, context): - return self.builder._build(obj, stream, context) - def _sizeof(self, context): - return self.builder._sizeof(context) - -#=============================================================================== -# conditional -#=============================================================================== -class Switch(Construct): - """ - A conditional branch. Switch will choose the case to follow based on - the return value of keyfunc. If no case is matched, and no default value - is given, SwitchError will be raised. - See also Pass. - - Parameters: - * name - the name of the construct - * keyfunc - a function that takes the context and returns a key, which - will ne used to choose the relevant case. - * cases - a dictionary mapping keys to constructs. the keys can be any - values that may be returned by keyfunc. - * default - a default value to use when the key is not found in the cases. - if not supplied, an exception will be raised when the key is not found. - You can use the builtin construct Pass for 'do-nothing'. - * include_key - whether or not to include the key in the return value - of parsing. defualt is False. - - Example: - Struct("foo", - UBInt8("type"), - Switch("value", lambda ctx: ctx.type, { - 1 : UBInt8("spam"), - 2 : UBInt16("spam"), - 3 : UBInt32("spam"), - 4 : UBInt64("spam"), - } - ), - ) - """ - - class NoDefault(Construct): - def _parse(self, stream, context): - raise SwitchError("no default case defined") - def _build(self, obj, stream, context): - raise SwitchError("no default case defined") - def _sizeof(self, context): - raise SwitchError("no default case defined") - NoDefault = NoDefault("NoDefault") - - __slots__ = ["subcons", "keyfunc", "cases", "default", "include_key"] - - def __init__(self, name, keyfunc, cases, default = NoDefault, - include_key = False): - Construct.__init__(self, name) - self._inherit_flags(*cases.values()) - self.keyfunc = keyfunc - self.cases = cases - self.default = default - self.include_key = include_key - self._inherit_flags(*cases.values()) - self._set_flag(self.FLAG_DYNAMIC) - def _parse(self, stream, context): - key = self.keyfunc(context) - obj = self.cases.get(key, self.default)._parse(stream, context) - if self.include_key: - return key, obj - else: - return obj - def _build(self, obj, stream, context): - if self.include_key: - key, obj = obj - else: - key = self.keyfunc(context) - case = self.cases.get(key, self.default) - case._build(obj, stream, context) - def _sizeof(self, context): - case = self.cases.get(self.keyfunc(context), self.default) - return case._sizeof(context) - -class Select(Construct): - """ - Selects the first matching subconstruct. It will literally try each of - the subconstructs, until one matches. - - Notes: - * requires a seekable stream. - - Parameters: - * name - the name of the construct - * subcons - the subcons to try (order-sensitive) - * include_name - a keyword only argument, indicating whether to include - the name of the selected subcon in the return value of parsing. default - is false. - - Example: - Select("foo", - UBInt64("large"), - UBInt32("medium"), - UBInt16("small"), - UBInt8("tiny"), - ) - """ - __slots__ = ["subcons", "include_name"] - def __init__(self, name, *subcons, **kw): - include_name = kw.pop("include_name", False) - if kw: - raise TypeError("the only keyword argument accepted " - "is 'include_name'", kw) - Construct.__init__(self, name) - self.subcons = subcons - self.include_name = include_name - self._inherit_flags(*subcons) - self._set_flag(self.FLAG_DYNAMIC) - def _parse(self, stream, context): - for sc in self.subcons: - pos = stream.tell() - context2 = context.__copy__() - try: - obj = sc._parse(stream, context2) - except ConstructError: - stream.seek(pos) - else: - context.__update__(context2) - if self.include_name: - return sc.name, obj - else: - return obj - raise SelectError("no subconstruct matched") - def _build(self, obj, stream, context): - if self.include_name: - name, obj = obj - for sc in self.subcons: - if sc.name == name: - sc._build(obj, stream, context) - return - else: - for sc in self.subcons: - stream2 = StringIO() - context2 = context.__copy__() - try: - sc._build(obj, stream2, context2) - except Exception: - pass - else: - context.__update__(context2) - stream.write(stream2.getvalue()) - return - raise SelectError("no subconstruct matched", obj) - def _sizeof(self, context): - raise SizeofError("can't calculate size") - - -#=============================================================================== -# stream manipulation -#=============================================================================== -class Pointer(Subconstruct): - """ - Changes the stream position to a given offset, where the construction - should take place, and restores the stream position when finished. - See also Anchor, OnDemand and OnDemandPointer. - - Notes: - * requires a seekable stream. - - Parameters: - * offsetfunc: a function that takes the context and returns an absolute - stream position, where the construction would take place - * subcon - the subcon to use at `offsetfunc()` - - Example: - Struct("foo", - UBInt32("spam_pointer"), - Pointer(lambda ctx: ctx.spam_pointer, - Array(5, UBInt8("spam")) - ) - ) - """ - __slots__ = ["offsetfunc"] - def __init__(self, offsetfunc, subcon): - Subconstruct.__init__(self, subcon) - self.offsetfunc = offsetfunc - def _parse(self, stream, context): - newpos = self.offsetfunc(context) - origpos = stream.tell() - stream.seek(newpos) - obj = self.subcon._parse(stream, context) - stream.seek(origpos) - return obj - def _build(self, obj, stream, context): - newpos = self.offsetfunc(context) - origpos = stream.tell() - stream.seek(newpos) - self.subcon._build(obj, stream, context) - stream.seek(origpos) - def _sizeof(self, context): - return 0 - -class Peek(Subconstruct): - """ - Peeks at the stream: parses without changing the stream position. - See also Union. If the end of the stream is reached when peeking, - returns None. - - Notes: - * requires a seekable stream. - - Parameters: - * subcon - the subcon to peek at - * perform_build - whether or not to perform building. by default this - parameter is set to False, meaning building is a no-op. - - Example: - Peek(UBInt8("foo")) - """ - __slots__ = ["perform_build"] - def __init__(self, subcon, perform_build = False): - Subconstruct.__init__(self, subcon) - self.perform_build = perform_build - def _parse(self, stream, context): - pos = stream.tell() - try: - try: - return self.subcon._parse(stream, context) - except FieldError: - pass - finally: - stream.seek(pos) - def _build(self, obj, stream, context): - if self.perform_build: - self.subcon._build(obj, stream, context) - def _sizeof(self, context): - return 0 - -class OnDemand(Subconstruct): - """ - Allows for on-demand (lazy) parsing. When parsing, it will return a - LazyContainer that represents a pointer to the data, but does not actually - parses it from stream until it's "demanded". - By accessing the 'value' property of LazyContainers, you will demand the - data from the stream. The data will be parsed and cached for later use. - You can use the 'has_value' property to know whether the data has already - been demanded. - See also OnDemandPointer. - - Notes: - * requires a seekable stream. - - Parameters: - * subcon - - * advance_stream - whether or not to advance the stream position. by - default this is True, but if subcon is a pointer, this should be False. - * force_build - whether or not to force build. If set to False, and the - LazyContainer has not been demaned, building is a no-op. - - Example: - OnDemand(Array(10000, UBInt8("foo")) - """ - __slots__ = ["advance_stream", "force_build"] - def __init__(self, subcon, advance_stream = True, force_build = True): - Subconstruct.__init__(self, subcon) - self.advance_stream = advance_stream - self.force_build = force_build - def _parse(self, stream, context): - obj = LazyContainer(self.subcon, stream, stream.tell(), context) - if self.advance_stream: - stream.seek(self.subcon._sizeof(context), 1) - return obj - def _build(self, obj, stream, context): - if not isinstance(obj, LazyContainer): - self.subcon._build(obj, stream, context) - elif self.force_build or obj.has_value: - self.subcon._build(obj.value, stream, context) - elif self.advance_stream: - stream.seek(self.subcon._sizeof(context), 1) - -class Buffered(Subconstruct): - """ - Creates an in-memory buffered stream, which can undergo encoding and - decoding prior to being passed on to the subconstruct. - See also Bitwise. - - Note: - * Do not use pointers inside Buffered - - Parameters: - * subcon - the subcon which will operate on the buffer - * encoder - a function that takes a string and returns an encoded - string (used after building) - * decoder - a function that takes a string and returns a decoded - string (used before parsing) - * resizer - a function that takes the size of the subcon and "adjusts" - or "resizes" it according to the encoding/decoding process. - - Example: - Buffered(BitField("foo", 16), - encoder = decode_bin, - decoder = encode_bin, - resizer = lambda size: size / 8, - ) - """ - __slots__ = ["encoder", "decoder", "resizer"] - def __init__(self, subcon, decoder, encoder, resizer): - Subconstruct.__init__(self, subcon) - self.encoder = encoder - self.decoder = decoder - self.resizer = resizer - def _parse(self, stream, context): - data = _read_stream(stream, self._sizeof(context)) - stream2 = StringIO(self.decoder(data)) - return self.subcon._parse(stream2, context) - def _build(self, obj, stream, context): - size = self._sizeof(context) - stream2 = StringIO() - self.subcon._build(obj, stream2, context) - data = self.encoder(stream2.getvalue()) - assert len(data) == size - _write_stream(stream, self._sizeof(context), data) - def _sizeof(self, context): - return self.resizer(self.subcon._sizeof(context)) - -class Restream(Subconstruct): - """ - Wraps the stream with a read-wrapper (for parsing) or a - write-wrapper (for building). The stream wrapper can buffer the data - internally, reading it from- or writing it to the underlying stream - as needed. For example, BitStreamReader reads whole bytes from the - underlying stream, but returns them as individual bits. - See also Bitwise. - - When the parsing or building is done, the stream's close method - will be invoked. It can perform any finalization needed for the stream - wrapper, but it must not close the underlying stream. - - Note: - * Do not use pointers inside Restream - - Parameters: - * subcon - the subcon - * stream_reader - the read-wrapper - * stream_writer - the write wrapper - * resizer - a function that takes the size of the subcon and "adjusts" - or "resizes" it according to the encoding/decoding process. - - Example: - Restream(BitField("foo", 16), - stream_reader = BitStreamReader, - stream_writer = BitStreamWriter, - resizer = lambda size: size / 8, - ) - """ - __slots__ = ["stream_reader", "stream_writer", "resizer"] - def __init__(self, subcon, stream_reader, stream_writer, resizer): - Subconstruct.__init__(self, subcon) - self.stream_reader = stream_reader - self.stream_writer = stream_writer - self.resizer = resizer - def _parse(self, stream, context): - stream2 = self.stream_reader(stream) - obj = self.subcon._parse(stream2, context) - stream2.close() - return obj - def _build(self, obj, stream, context): - stream2 = self.stream_writer(stream) - self.subcon._build(obj, stream2, context) - stream2.close() - def _sizeof(self, context): - return self.resizer(self.subcon._sizeof(context)) - - -#=============================================================================== -# miscellaneous -#=============================================================================== -class Reconfig(Subconstruct): - """ - Reconfigures a subconstruct. Reconfig can be used to change the name and - set and clear flags of the inner subcon. - - Parameters: - * name - the new name - * subcon - the subcon to reconfigure - * setflags - the flags to set (default is 0) - * clearflags - the flags to clear (default is 0) - - Example: - Reconfig("foo", UBInt8("bar")) - """ - __slots__ = [] - def __init__(self, name, subcon, setflags = 0, clearflags = 0): - Construct.__init__(self, name, subcon.conflags) - self.subcon = subcon - self._set_flag(setflags) - self._clear_flag(clearflags) - -class Anchor(Construct): - """ - Returns the "anchor" (stream position) at the point where it's inserted. - Useful for adjusting relative offsets to absolute positions, or to measure - sizes of constructs. - absolute pointer = anchor + relative offset - size = anchor_after - anchor_before - See also Pointer. - - Notes: - * requires a seekable stream. - - Parameters: - * name - the name of the anchor - - Example: - Struct("foo", - Anchor("base"), - UBInt8("relative_offset"), - Pointer(lambda ctx: ctx.relative_offset + ctx.base, - UBInt8("data") - ) - ) - """ - __slots__ = [] - def _parse(self, stream, context): - return stream.tell() - def _build(self, obj, stream, context): - context[self.name] = stream.tell() - def _sizeof(self, context): - return 0 - -class Value(Construct): - """ - A computed value. - - Parameters: - * name - the name of the value - * func - a function that takes the context and return the computed value - - Example: - Struct("foo", - UBInt8("width"), - UBInt8("height"), - Value("total_pixels", lambda ctx: ctx.width * ctx.height), - ) - """ - __slots__ = ["func"] - def __init__(self, name, func): - Construct.__init__(self, name) - self.func = func - self._set_flag(self.FLAG_DYNAMIC) - def _parse(self, stream, context): - return self.func(context) - def _build(self, obj, stream, context): - context[self.name] = self.func(context) - def _sizeof(self, context): - return 0 - -#class Dynamic(Construct): -# """ -# Dynamically creates a construct and uses it for parsing and building. -# This allows you to create change the construction tree on the fly. -# Deprecated. -# -# Parameters: -# * name - the name of the construct -# * factoryfunc - a function that takes the context and returns a new -# construct object which will be used for parsing and building. -# -# Example: -# def factory(ctx): -# if ctx.bar == 8: -# return UBInt8("spam") -# if ctx.bar == 9: -# return String("spam", 9) -# -# Struct("foo", -# UBInt8("bar"), -# Dynamic("spam", factory), -# ) -# """ -# __slots__ = ["factoryfunc"] -# def __init__(self, name, factoryfunc): -# Construct.__init__(self, name, self.FLAG_COPY_CONTEXT) -# self.factoryfunc = factoryfunc -# self._set_flag(self.FLAG_DYNAMIC) -# def _parse(self, stream, context): -# return self.factoryfunc(context)._parse(stream, context) -# def _build(self, obj, stream, context): -# return self.factoryfunc(context)._build(obj, stream, context) -# def _sizeof(self, context): -# return self.factoryfunc(context)._sizeof(context) - -class LazyBound(Construct): - """ - Lazily bound construct, useful for constructs that need to make cyclic - references (linked-lists, expression trees, etc.). - - Parameters: - - - Example: - foo = Struct("foo", - UBInt8("bar"), - LazyBound("next", lambda: foo), - ) - """ - __slots__ = ["bindfunc", "bound"] - def __init__(self, name, bindfunc): - Construct.__init__(self, name) - self.bound = None - self.bindfunc = bindfunc - def _parse(self, stream, context): - if self.bound is None: - self.bound = self.bindfunc() - return self.bound._parse(stream, context) - def _build(self, obj, stream, context): - if self.bound is None: - self.bound = self.bindfunc() - self.bound._build(obj, stream, context) - def _sizeof(self, context): - if self.bound is None: - self.bound = self.bindfunc() - return self.bound._sizeof(context) - -class Pass(Construct): - """ - A do-nothing construct, useful as the default case for Switch, or - to indicate Enums. - See also Switch and Enum. - - Notes: - * this construct is a singleton. do not try to instatiate it, as it - will not work :) - - Example: - Pass - """ - __slots__ = [] - def _parse(self, stream, context): - pass - def _build(self, obj, stream, context): - assert obj is None - def _sizeof(self, context): - return 0 -Pass = Pass(None) - -class Terminator(Construct): - """ - Asserts the end of the stream has been reached at the point it's placed. - You can use this to ensure no more unparsed data follows. - - Notes: - * this construct is a singleton. do not try to instatiate it, as it - will not work :) - - Example: - Terminator - """ - __slots__ = [] - def _parse(self, stream, context): - if stream.read(1): - raise TerminatorError("expected end of stream") - def _build(self, obj, stream, context): - assert obj is None - def _sizeof(self, context): - return 0 -Terminator = Terminator(None) - - - - - - - - - - - - - - - - - - - +from lib import StringIO, Packer +from lib import Container, ListContainer, AttrDict, LazyContainer + + +#=============================================================================== +# exceptions +#=============================================================================== +class ConstructError(Exception): + __slots__ = [] +class FieldError(ConstructError): + __slots__ = [] +class SizeofError(ConstructError): + __slots__ = [] +class AdaptationError(ConstructError): + __slots__ = [] +class ArrayError(ConstructError): + __slots__ = [] +class RangeError(ConstructError): + __slots__ = [] +class SwitchError(ConstructError): + __slots__ = [] +class SelectError(ConstructError): + __slots__ = [] +class TerminatorError(ConstructError): + __slots__ = [] + +#=============================================================================== +# abstract constructs +#=============================================================================== +class Construct(object): + """ + The mother of all constructs! + + User API: + * parse(buf) - parses an in-memory buffer (usually a string) + * parse_stream(stream) - parses a stream (in-memory, file, pipe, ...) + * build(obj) - builds the object into an in-memory buffer (a string) + * build_stream(obj, stream) - builds the object into the given stream + * sizeof(context) - calculates the size of the construct, if possible, + based on the context + + Overriable methods for subclassing: + * _parse(stream, context) - low-level parse from stream + * _build(obj, stream, context) - low-level build to stream + * _sizeof(context) - low-level compute size + + Flags API: + * _set_flag(flag) - sets the given flag/flags + * _clear_flag(flag) - clears the given flag/flags + * _inherit_flags(*subcons) - inherits the flag of subcons + * _is_flag(flag) - is the flag set? (predicate) + + Overridable methods for the copy-API: + * __getstate__() - returns a dict of the attributes of self + * __setstate__(attrs) - sets the attrs to self + + Attributes: + All constructs have a name and flags. The name is used for naming + struct-members and context dicts. Note that the name must be a string or + None (if the name is not needed). A single underscore ("_") is a reserved + name, and so are names starting with a less-than character ("<"). The name + should be descriptive, short, and valid as a python identifier (although + these rules are not enforced). + + The flags specify additional behavioral information about this construct. + The flags are used by enclosing constructs to determine a proper course + of action. Usually, flags are "inherited", i.e., an enclosing construct + inherits the flags of its subconstruct. The enclosing construct may + set new flags or clear existing ones, as necessary. + + For example, if FLAG_COPY_CONTEXT is set, repeaters will pass a copy of + the context for each iteration, which is necessary for OnDemand parsing. + """ + FLAG_COPY_CONTEXT = 0x0001 + FLAG_DYNAMIC = 0x0002 + FLAG_EMBED = 0x0004 + FLAG_NESTING = 0x0008 + + __slots__ = ["name", "conflags"] + def __init__(self, name, flags = 0): + if name is not None: + if type(name) is not str: + raise TypeError("name must be a string or None", name) + if name == "_" or name.startswith("<"): + raise ValueError("reserved name", name) + self.name = name + self.conflags = flags + def __repr__(self): + return "%s(%r)" % (self.__class__.__name__, self.name) + + def _set_flag(self, flag): + self.conflags |= flag + def _clear_flag(self, flag): + self.conflags &= ~flag + def _inherit_flags(self, *subcons): + for sc in subcons: + self._set_flag(sc.conflags) + def _is_flag(self, flag): + return bool(self.conflags & flag) + + def __getstate__(self): + attrs = {} + if hasattr(self, "__dict__"): + attrs.update(self.__dict__) + slots = [] + c = self.__class__ + while c is not None: + if hasattr(c, "__slots__"): + slots.extend(c.__slots__) + c = c.__base__ + for name in slots: + if hasattr(self, name): + attrs[name] = getattr(self, name) + return attrs + def __setstate__(self, attrs): + for name, value in attrs.iteritems(): + setattr(self, name, value) + def __copy__(self): + """returns a copy of this construct""" + self2 = object.__new__(self.__class__) + self2.__setstate__(self.__getstate__()) + return self2 + + def parse(self, data): + """parses data given as a buffer or a string (in-memory)""" + return self.parse_stream(StringIO(data)) + def parse_stream(self, stream): + """parses data read directly from a stream""" + return self._parse(stream, AttrDict()) + def _parse(self, stream, context): + raise NotImplementedError() + + def build(self, obj): + """builds an object in a string (in memory)""" + stream = StringIO() + self.build_stream(obj, stream) + return stream.getvalue() + def build_stream(self, obj, stream): + """builds an object into a stream""" + self._build(obj, stream, AttrDict()) + def _build(self, obj, stream, context): + raise NotImplementedError() + + def sizeof(self, context = None): + """calculates the size of the construct (if possible) using the + given context""" + if context is None: + context = AttrDict() + return self._sizeof(context) + def _sizeof(self, context): + raise SizeofError("can't calculate size") + +class Subconstruct(Construct): + """ + Abstract subconstruct (wraps an inner construct, inheriting it's + name and flags). + + Parameters: + * subcon - the construct to wrap + """ + __slots__ = ["subcon"] + def __init__(self, subcon): + Construct.__init__(self, subcon.name, subcon.conflags) + self.subcon = subcon + def _parse(self, stream, context): + return self.subcon._parse(stream, context) + def _build(self, obj, stream, context): + self.subcon._build(obj, stream, context) + def _sizeof(self, context): + return self.subcon._sizeof(context) + +class Adapter(Subconstruct): + """ + Abstract adapter: calls _decode for parsing and _encode for building. + + Parameters: + * subcon - the construct to wrap + """ + __slots__ = [] + def _parse(self, stream, context): + return self._decode(self.subcon._parse(stream, context), context) + def _build(self, obj, stream, context): + self.subcon._build(self._encode(obj, context), stream, context) + def _decode(self, obj, context): + raise NotImplementedError() + def _encode(self, obj, context): + raise NotImplementedError() + + +#=============================================================================== +# primitives +#=============================================================================== +def _read_stream(stream, length): + if length < 0: + raise ValueError("length must be >= 0", length) + data = stream.read(length) + if len(data) != length: + raise FieldError("expected %d, found %d" % (length, len(data))) + return data + +def _write_stream(stream, length, data): + if length < 0: + raise ValueError("length must be >= 0", length) + if len(data) != length: + raise FieldError("expected %d, found %d" % (length, len(data))) + stream.write(data) + +class StaticField(Construct): + """ + A field of a fixed size + + Parameters: + * name - the name of the field + * length - the length (an integer) + + Example: + StaticField("foo", 5) + """ + __slots__ = ["length"] + def __init__(self, name, length): + Construct.__init__(self, name) + self.length = length + def _parse(self, stream, context): + return _read_stream(stream, self.length) + def _build(self, obj, stream, context): + _write_stream(stream, self.length, obj) + def _sizeof(self, context): + return self.length + +class FormatField(StaticField): + """ + A field that uses python's built-in struct module to pack/unpack data + according to a format string. + Note: this field has been originally implemented as an Adapter, but it + was made a construct for performance reasons. + + Parameters: + * name - the name + * endianity - "<" for little endian, ">" for big endian, or "=" for native + * format - a single format character + + Example: + FormatField("foo", ">", "L") + """ + __slots__ = ["packer"] + def __init__(self, name, endianity, format): + if endianity not in (">", "<", "="): + raise ValueError("endianity must be be '=', '<', or '>'", + endianity) + if len(format) != 1: + raise ValueError("must specify one and only one format char") + self.packer = Packer(endianity + format) + StaticField.__init__(self, name, self.packer.size) + def __getstate__(self): + attrs = StaticField.__getstate__(self) + attrs["packer"] = attrs["packer"].format + return attrs + def __setstate__(self, attrs): + attrs["packer"] = Packer(attrs["packer"]) + return StaticField.__setstate__(attrs) + def _parse(self, stream, context): + try: + return self.packer.unpack(_read_stream(stream, self.length))[0] + except Exception, ex: + raise FieldError(ex) + def _build(self, obj, stream, context): + try: + _write_stream(stream, self.length, self.packer.pack(obj)) + except Exception, ex: + raise FieldError(ex) + +class MetaField(Construct): + """ + A field of a meta-length. The length is computed at runtime based on + the context. + + Parameters: + * name - the name of the field + * lengthfunc - a function that takes the context as a parameter and return + the length of the field + + Example: + MetaField("foo", lambda ctx: 5) + """ + __slots__ = ["lengthfunc"] + def __init__(self, name, lengthfunc): + Construct.__init__(self, name) + self.lengthfunc = lengthfunc + self._set_flag(self.FLAG_DYNAMIC) + def _parse(self, stream, context): + return _read_stream(stream, self.lengthfunc(context)) + def _build(self, obj, stream, context): + _write_stream(stream, self.lengthfunc(context), obj) + def _sizeof(self, context): + return self.lengthfunc(context) + + +#=============================================================================== +# arrays and repeaters +#=============================================================================== +class MetaArray(Subconstruct): + """ + An array (repeater) of a meta-count. The array will iterate exactly + `countfunc()` times. Will raise ArrayError if less elements are found. + See also Array, Range and RepeatUntil. + + Parameters: + * countfunc - a function that takes the context as a parameter and returns + the number of elements of the array (count) + * subcon - the subcon to repeat `countfunc()` times + + Example: + MetaArray(lambda ctx: 5, UBInt8("foo")) + """ + __slots__ = ["countfunc"] + def __init__(self, countfunc, subcon): + Subconstruct.__init__(self, subcon) + self.countfunc = countfunc + self._clear_flag(self.FLAG_COPY_CONTEXT) + self._set_flag(self.FLAG_DYNAMIC) + def _parse(self, stream, context): + obj = ListContainer() + c = 0 + count = self.countfunc(context) + try: + if self.subcon.conflags & self.FLAG_COPY_CONTEXT: + while c < count: + obj.append(self.subcon._parse(stream, context.__copy__())) + c += 1 + else: + while c < count: + obj.append(self.subcon._parse(stream, context)) + c += 1 + except ConstructError, ex: + raise ArrayError("expected %d, found %d" % (count, c), ex) + return obj + def _build(self, obj, stream, context): + count = self.countfunc(context) + if len(obj) != count: + raise ArrayError("expected %d, found %d" % (count, len(obj))) + if self.subcon.conflags & self.FLAG_COPY_CONTEXT: + for subobj in obj: + self.subcon._build(subobj, stream, context.__copy__()) + else: + for subobj in obj: + self.subcon._build(subobj, stream, context) + def _sizeof(self, context): + return self.subcon._sizeof(context) * self.countfunc(context) + +class Range(Subconstruct): + """ + A range-array. The subcon will iterate between `mincount` to `maxcount` + times. If less than `mincount` elements are found, raises RangeError. + See also GreedyRange and OptionalGreedyRange. + + Notes: + * requires a seekable stream. + + Parameters: + * mincount - the minimal count (an integer) + * maxcount - the maximal count (an integer) + * subcon - the subcon to repeat + + Example: + Range(5, 8, UBInt8("foo")) + """ + __slots__ = ["mincount", "maxcout"] + def __init__(self, mincount, maxcout, subcon): + Subconstruct.__init__(self, subcon) + self.mincount = mincount + self.maxcout = maxcout + self._clear_flag(self.FLAG_COPY_CONTEXT) + self._set_flag(self.FLAG_DYNAMIC) + def _parse(self, stream, context): + obj = ListContainer() + c = 0 + try: + if self.subcon.conflags & self.FLAG_COPY_CONTEXT: + while c < self.maxcout: + pos = stream.tell() + obj.append(self.subcon._parse(stream, context.__copy__())) + c += 1 + else: + while c < self.maxcout: + pos = stream.tell() + obj.append(self.subcon._parse(stream, context)) + c += 1 + except ConstructError: + if c < self.mincount: + raise RangeError("expected %d to %d, found %d" % + (self.mincount, self.maxcout, c)) + stream.seek(pos) + return obj + def _build(self, obj, stream, context): + if len(obj) < self.mincount or len(obj) > self.maxcout: + raise RangeError("expected %d to %d, found %d" % + (self.mincount, self.maxcout, len(obj))) + cnt = 0 + try: + if self.subcon.conflags & self.FLAG_COPY_CONTEXT: + for subobj in obj: + self.subcon._build(subobj, stream, context.__copy__()) + cnt += 1 + else: + for subobj in obj: + self.subcon._build(subobj, stream, context) + cnt += 1 + except ConstructError: + if cnt < self.mincount: + raise RangeError("expected %d to %d, found %d" % + (self.mincount, self.maxcout, len(obj))) + def _sizeof(self, context): + raise SizeofError("can't calculate size") + +class RepeatUntil(Subconstruct): + """ + An array that repeat until the predicate indicates it to stop. Note that + the last element (which caused the repeat to exit) is included in the + return value. + + Parameters: + * predicate - a predicate function that takes (obj, context) and returns + True if the stop-condition is met, or False to continue. + * subcon - the subcon to repeat. + + Example: + # will read chars until \x00 (inclusive) + RepeatUntil(lambda obj, ctx: obj == "\x00", + Field("chars", 1) + ) + """ + __slots__ = ["predicate"] + def __init__(self, predicate, subcon): + Subconstruct.__init__(self, subcon) + self.predicate = predicate + self._clear_flag(self.FLAG_COPY_CONTEXT) + self._set_flag(self.FLAG_DYNAMIC) + def _parse(self, stream, context): + obj = [] + try: + if self.subcon.conflags & self.FLAG_COPY_CONTEXT: + while True: + subobj = self.subcon._parse(stream, context.__copy__()) + obj.append(subobj) + if self.predicate(subobj, context): + break + else: + while True: + subobj = self.subcon._parse(stream, context) + obj.append(subobj) + if self.predicate(subobj, context): + break + except ConstructError, ex: + raise ArrayError("missing terminator", ex) + return obj + def _build(self, obj, stream, context): + terminated = False + if self.subcon.conflags & self.FLAG_COPY_CONTEXT: + for subobj in obj: + self.subcon._build(subobj, stream, context.__copy__()) + if self.predicate(subobj, context): + terminated = True + break + else: + for subobj in obj: + self.subcon._build(subobj, stream, context.__copy__()) + if self.predicate(subobj, context): + terminated = True + break + if not terminated: + raise ArrayError("missing terminator") + def _sizeof(self, context): + raise SizeofError("can't calculate size") + + +#=============================================================================== +# structures and sequences +#=============================================================================== +class Struct(Construct): + """ + A sequence of named constructs, similar to structs in C. The elements are + parsed and built in the order they are defined. + See also Embedded. + + Parameters: + * name - the name of the structure + * subcons - a sequence of subconstructs that make up this structure. + * nested - a keyword-only argument that indicates whether this struct + creates a nested context. The default is True. This parameter is + considered "advanced usage", and may be removed in the future. + + Example: + Struct("foo", + UBInt8("first_element"), + UBInt16("second_element"), + Padding(2), + UBInt8("third_element"), + ) + """ + __slots__ = ["subcons", "nested"] + def __init__(self, name, *subcons, **kw): + self.nested = kw.pop("nested", True) + if kw: + raise TypeError("the only keyword argument accepted is 'nested'", kw) + Construct.__init__(self, name) + self.subcons = subcons + self._inherit_flags(*subcons) + self._clear_flag(self.FLAG_EMBED) + def _parse(self, stream, context): + if "" in context: + obj = context[""] + del context[""] + else: + obj = Container() + if self.nested: + context = AttrDict(_ = context) + for sc in self.subcons: + if sc.conflags & self.FLAG_EMBED: + context[""] = obj + sc._parse(stream, context) + else: + subobj = sc._parse(stream, context) + if sc.name is not None: + obj[sc.name] = subobj + context[sc.name] = subobj + return obj + def _build(self, obj, stream, context): + if "" in context: + del context[""] + elif self.nested: + context = AttrDict(_ = context) + for sc in self.subcons: + if sc.conflags & self.FLAG_EMBED: + context[""] = True + subobj = obj + elif sc.name is None: + subobj = None + else: + subobj = getattr(obj, sc.name) + context[sc.name] = subobj + sc._build(subobj, stream, context) + def _sizeof(self, context): + if self.nested: + context = AttrDict(_ = context) + return sum(sc._sizeof(context) for sc in self.subcons) + +class Sequence(Struct): + """ + A sequence of unnamed constructs. The elements are parsed and built in the + order they are defined. + See also Embedded. + + Parameters: + * name - the name of the structure + * subcons - a sequence of subconstructs that make up this structure. + * nested - a keyword-only argument that indicates whether this struct + creates a nested context. The default is True. This parameter is + considered "advanced usage", and may be removed in the future. + + Example: + Sequence("foo", + UBInt8("first_element"), + UBInt16("second_element"), + Padding(2), + UBInt8("third_element"), + ) + """ + __slots__ = [] + def _parse(self, stream, context): + if "" in context: + obj = context[""] + del context[""] + else: + obj = ListContainer() + if self.nested: + context = AttrDict(_ = context) + for sc in self.subcons: + if sc.conflags & self.FLAG_EMBED: + context[""] = obj + sc._parse(stream, context) + else: + subobj = sc._parse(stream, context) + if sc.name is not None: + obj.append(subobj) + context[sc.name] = subobj + return obj + def _build(self, obj, stream, context): + if "" in context: + del context[""] + elif self.nested: + context = AttrDict(_ = context) + objiter = iter(obj) + for sc in self.subcons: + if sc.conflags & self.FLAG_EMBED: + context[""] = True + subobj = objiter + elif sc.name is None: + subobj = None + else: + subobj = objiter.next() + context[sc.name] = subobj + sc._build(subobj, stream, context) + +class Union(Construct): + """ + a set of overlapping fields (like unions in C). when parsing, + all fields read the same data; when building, only the first subcon + (called "master") is used. + + Parameters: + * name - the name of the union + * master - the master subcon, i.e., the subcon used for building and + calculating the total size + * subcons - additional subcons + + Example: + Union("what_are_four_bytes", + UBInt32("one_dword"), + Struct("two_words", UBInt16("first"), UBInt16("second")), + Struct("four_bytes", + UBInt8("a"), + UBInt8("b"), + UBInt8("c"), + UBInt8("d") + ), + ) + """ + __slots__ = ["parser", "builder"] + def __init__(self, name, master, *subcons, **kw): + Construct.__init__(self, name) + args = [Peek(sc) for sc in subcons] + args.append(MetaField(None, lambda ctx: master._sizeof(ctx))) + self.parser = Struct(name, Peek(master, perform_build = True), *args) + self.builder = Struct(name, master) + def _parse(self, stream, context): + return self.parser._parse(stream, context) + def _build(self, obj, stream, context): + return self.builder._build(obj, stream, context) + def _sizeof(self, context): + return self.builder._sizeof(context) + +#=============================================================================== +# conditional +#=============================================================================== +class Switch(Construct): + """ + A conditional branch. Switch will choose the case to follow based on + the return value of keyfunc. If no case is matched, and no default value + is given, SwitchError will be raised. + See also Pass. + + Parameters: + * name - the name of the construct + * keyfunc - a function that takes the context and returns a key, which + will ne used to choose the relevant case. + * cases - a dictionary mapping keys to constructs. the keys can be any + values that may be returned by keyfunc. + * default - a default value to use when the key is not found in the cases. + if not supplied, an exception will be raised when the key is not found. + You can use the builtin construct Pass for 'do-nothing'. + * include_key - whether or not to include the key in the return value + of parsing. defualt is False. + + Example: + Struct("foo", + UBInt8("type"), + Switch("value", lambda ctx: ctx.type, { + 1 : UBInt8("spam"), + 2 : UBInt16("spam"), + 3 : UBInt32("spam"), + 4 : UBInt64("spam"), + } + ), + ) + """ + + class NoDefault(Construct): + def _parse(self, stream, context): + raise SwitchError("no default case defined") + def _build(self, obj, stream, context): + raise SwitchError("no default case defined") + def _sizeof(self, context): + raise SwitchError("no default case defined") + NoDefault = NoDefault("NoDefault") + + __slots__ = ["subcons", "keyfunc", "cases", "default", "include_key"] + + def __init__(self, name, keyfunc, cases, default = NoDefault, + include_key = False): + Construct.__init__(self, name) + self._inherit_flags(*cases.values()) + self.keyfunc = keyfunc + self.cases = cases + self.default = default + self.include_key = include_key + self._inherit_flags(*cases.values()) + self._set_flag(self.FLAG_DYNAMIC) + def _parse(self, stream, context): + key = self.keyfunc(context) + obj = self.cases.get(key, self.default)._parse(stream, context) + if self.include_key: + return key, obj + else: + return obj + def _build(self, obj, stream, context): + if self.include_key: + key, obj = obj + else: + key = self.keyfunc(context) + case = self.cases.get(key, self.default) + case._build(obj, stream, context) + def _sizeof(self, context): + case = self.cases.get(self.keyfunc(context), self.default) + return case._sizeof(context) + +class Select(Construct): + """ + Selects the first matching subconstruct. It will literally try each of + the subconstructs, until one matches. + + Notes: + * requires a seekable stream. + + Parameters: + * name - the name of the construct + * subcons - the subcons to try (order-sensitive) + * include_name - a keyword only argument, indicating whether to include + the name of the selected subcon in the return value of parsing. default + is false. + + Example: + Select("foo", + UBInt64("large"), + UBInt32("medium"), + UBInt16("small"), + UBInt8("tiny"), + ) + """ + __slots__ = ["subcons", "include_name"] + def __init__(self, name, *subcons, **kw): + include_name = kw.pop("include_name", False) + if kw: + raise TypeError("the only keyword argument accepted " + "is 'include_name'", kw) + Construct.__init__(self, name) + self.subcons = subcons + self.include_name = include_name + self._inherit_flags(*subcons) + self._set_flag(self.FLAG_DYNAMIC) + def _parse(self, stream, context): + for sc in self.subcons: + pos = stream.tell() + context2 = context.__copy__() + try: + obj = sc._parse(stream, context2) + except ConstructError: + stream.seek(pos) + else: + context.__update__(context2) + if self.include_name: + return sc.name, obj + else: + return obj + raise SelectError("no subconstruct matched") + def _build(self, obj, stream, context): + if self.include_name: + name, obj = obj + for sc in self.subcons: + if sc.name == name: + sc._build(obj, stream, context) + return + else: + for sc in self.subcons: + stream2 = StringIO() + context2 = context.__copy__() + try: + sc._build(obj, stream2, context2) + except Exception: + pass + else: + context.__update__(context2) + stream.write(stream2.getvalue()) + return + raise SelectError("no subconstruct matched", obj) + def _sizeof(self, context): + raise SizeofError("can't calculate size") + + +#=============================================================================== +# stream manipulation +#=============================================================================== +class Pointer(Subconstruct): + """ + Changes the stream position to a given offset, where the construction + should take place, and restores the stream position when finished. + See also Anchor, OnDemand and OnDemandPointer. + + Notes: + * requires a seekable stream. + + Parameters: + * offsetfunc: a function that takes the context and returns an absolute + stream position, where the construction would take place + * subcon - the subcon to use at `offsetfunc()` + + Example: + Struct("foo", + UBInt32("spam_pointer"), + Pointer(lambda ctx: ctx.spam_pointer, + Array(5, UBInt8("spam")) + ) + ) + """ + __slots__ = ["offsetfunc"] + def __init__(self, offsetfunc, subcon): + Subconstruct.__init__(self, subcon) + self.offsetfunc = offsetfunc + def _parse(self, stream, context): + newpos = self.offsetfunc(context) + origpos = stream.tell() + stream.seek(newpos) + obj = self.subcon._parse(stream, context) + stream.seek(origpos) + return obj + def _build(self, obj, stream, context): + newpos = self.offsetfunc(context) + origpos = stream.tell() + stream.seek(newpos) + self.subcon._build(obj, stream, context) + stream.seek(origpos) + def _sizeof(self, context): + return 0 + +class Peek(Subconstruct): + """ + Peeks at the stream: parses without changing the stream position. + See also Union. If the end of the stream is reached when peeking, + returns None. + + Notes: + * requires a seekable stream. + + Parameters: + * subcon - the subcon to peek at + * perform_build - whether or not to perform building. by default this + parameter is set to False, meaning building is a no-op. + + Example: + Peek(UBInt8("foo")) + """ + __slots__ = ["perform_build"] + def __init__(self, subcon, perform_build = False): + Subconstruct.__init__(self, subcon) + self.perform_build = perform_build + def _parse(self, stream, context): + pos = stream.tell() + try: + try: + return self.subcon._parse(stream, context) + except FieldError: + pass + finally: + stream.seek(pos) + def _build(self, obj, stream, context): + if self.perform_build: + self.subcon._build(obj, stream, context) + def _sizeof(self, context): + return 0 + +class OnDemand(Subconstruct): + """ + Allows for on-demand (lazy) parsing. When parsing, it will return a + LazyContainer that represents a pointer to the data, but does not actually + parses it from stream until it's "demanded". + By accessing the 'value' property of LazyContainers, you will demand the + data from the stream. The data will be parsed and cached for later use. + You can use the 'has_value' property to know whether the data has already + been demanded. + See also OnDemandPointer. + + Notes: + * requires a seekable stream. + + Parameters: + * subcon - + * advance_stream - whether or not to advance the stream position. by + default this is True, but if subcon is a pointer, this should be False. + * force_build - whether or not to force build. If set to False, and the + LazyContainer has not been demaned, building is a no-op. + + Example: + OnDemand(Array(10000, UBInt8("foo")) + """ + __slots__ = ["advance_stream", "force_build"] + def __init__(self, subcon, advance_stream = True, force_build = True): + Subconstruct.__init__(self, subcon) + self.advance_stream = advance_stream + self.force_build = force_build + def _parse(self, stream, context): + obj = LazyContainer(self.subcon, stream, stream.tell(), context) + if self.advance_stream: + stream.seek(self.subcon._sizeof(context), 1) + return obj + def _build(self, obj, stream, context): + if not isinstance(obj, LazyContainer): + self.subcon._build(obj, stream, context) + elif self.force_build or obj.has_value: + self.subcon._build(obj.value, stream, context) + elif self.advance_stream: + stream.seek(self.subcon._sizeof(context), 1) + +class Buffered(Subconstruct): + """ + Creates an in-memory buffered stream, which can undergo encoding and + decoding prior to being passed on to the subconstruct. + See also Bitwise. + + Note: + * Do not use pointers inside Buffered + + Parameters: + * subcon - the subcon which will operate on the buffer + * encoder - a function that takes a string and returns an encoded + string (used after building) + * decoder - a function that takes a string and returns a decoded + string (used before parsing) + * resizer - a function that takes the size of the subcon and "adjusts" + or "resizes" it according to the encoding/decoding process. + + Example: + Buffered(BitField("foo", 16), + encoder = decode_bin, + decoder = encode_bin, + resizer = lambda size: size / 8, + ) + """ + __slots__ = ["encoder", "decoder", "resizer"] + def __init__(self, subcon, decoder, encoder, resizer): + Subconstruct.__init__(self, subcon) + self.encoder = encoder + self.decoder = decoder + self.resizer = resizer + def _parse(self, stream, context): + data = _read_stream(stream, self._sizeof(context)) + stream2 = StringIO(self.decoder(data)) + return self.subcon._parse(stream2, context) + def _build(self, obj, stream, context): + size = self._sizeof(context) + stream2 = StringIO() + self.subcon._build(obj, stream2, context) + data = self.encoder(stream2.getvalue()) + assert len(data) == size + _write_stream(stream, self._sizeof(context), data) + def _sizeof(self, context): + return self.resizer(self.subcon._sizeof(context)) + +class Restream(Subconstruct): + """ + Wraps the stream with a read-wrapper (for parsing) or a + write-wrapper (for building). The stream wrapper can buffer the data + internally, reading it from- or writing it to the underlying stream + as needed. For example, BitStreamReader reads whole bytes from the + underlying stream, but returns them as individual bits. + See also Bitwise. + + When the parsing or building is done, the stream's close method + will be invoked. It can perform any finalization needed for the stream + wrapper, but it must not close the underlying stream. + + Note: + * Do not use pointers inside Restream + + Parameters: + * subcon - the subcon + * stream_reader - the read-wrapper + * stream_writer - the write wrapper + * resizer - a function that takes the size of the subcon and "adjusts" + or "resizes" it according to the encoding/decoding process. + + Example: + Restream(BitField("foo", 16), + stream_reader = BitStreamReader, + stream_writer = BitStreamWriter, + resizer = lambda size: size / 8, + ) + """ + __slots__ = ["stream_reader", "stream_writer", "resizer"] + def __init__(self, subcon, stream_reader, stream_writer, resizer): + Subconstruct.__init__(self, subcon) + self.stream_reader = stream_reader + self.stream_writer = stream_writer + self.resizer = resizer + def _parse(self, stream, context): + stream2 = self.stream_reader(stream) + obj = self.subcon._parse(stream2, context) + stream2.close() + return obj + def _build(self, obj, stream, context): + stream2 = self.stream_writer(stream) + self.subcon._build(obj, stream2, context) + stream2.close() + def _sizeof(self, context): + return self.resizer(self.subcon._sizeof(context)) + + +#=============================================================================== +# miscellaneous +#=============================================================================== +class Reconfig(Subconstruct): + """ + Reconfigures a subconstruct. Reconfig can be used to change the name and + set and clear flags of the inner subcon. + + Parameters: + * name - the new name + * subcon - the subcon to reconfigure + * setflags - the flags to set (default is 0) + * clearflags - the flags to clear (default is 0) + + Example: + Reconfig("foo", UBInt8("bar")) + """ + __slots__ = [] + def __init__(self, name, subcon, setflags = 0, clearflags = 0): + Construct.__init__(self, name, subcon.conflags) + self.subcon = subcon + self._set_flag(setflags) + self._clear_flag(clearflags) + +class Anchor(Construct): + """ + Returns the "anchor" (stream position) at the point where it's inserted. + Useful for adjusting relative offsets to absolute positions, or to measure + sizes of constructs. + absolute pointer = anchor + relative offset + size = anchor_after - anchor_before + See also Pointer. + + Notes: + * requires a seekable stream. + + Parameters: + * name - the name of the anchor + + Example: + Struct("foo", + Anchor("base"), + UBInt8("relative_offset"), + Pointer(lambda ctx: ctx.relative_offset + ctx.base, + UBInt8("data") + ) + ) + """ + __slots__ = [] + def _parse(self, stream, context): + return stream.tell() + def _build(self, obj, stream, context): + context[self.name] = stream.tell() + def _sizeof(self, context): + return 0 + +class Value(Construct): + """ + A computed value. + + Parameters: + * name - the name of the value + * func - a function that takes the context and return the computed value + + Example: + Struct("foo", + UBInt8("width"), + UBInt8("height"), + Value("total_pixels", lambda ctx: ctx.width * ctx.height), + ) + """ + __slots__ = ["func"] + def __init__(self, name, func): + Construct.__init__(self, name) + self.func = func + self._set_flag(self.FLAG_DYNAMIC) + def _parse(self, stream, context): + return self.func(context) + def _build(self, obj, stream, context): + context[self.name] = self.func(context) + def _sizeof(self, context): + return 0 + +#class Dynamic(Construct): +# """ +# Dynamically creates a construct and uses it for parsing and building. +# This allows you to create change the construction tree on the fly. +# Deprecated. +# +# Parameters: +# * name - the name of the construct +# * factoryfunc - a function that takes the context and returns a new +# construct object which will be used for parsing and building. +# +# Example: +# def factory(ctx): +# if ctx.bar == 8: +# return UBInt8("spam") +# if ctx.bar == 9: +# return String("spam", 9) +# +# Struct("foo", +# UBInt8("bar"), +# Dynamic("spam", factory), +# ) +# """ +# __slots__ = ["factoryfunc"] +# def __init__(self, name, factoryfunc): +# Construct.__init__(self, name, self.FLAG_COPY_CONTEXT) +# self.factoryfunc = factoryfunc +# self._set_flag(self.FLAG_DYNAMIC) +# def _parse(self, stream, context): +# return self.factoryfunc(context)._parse(stream, context) +# def _build(self, obj, stream, context): +# return self.factoryfunc(context)._build(obj, stream, context) +# def _sizeof(self, context): +# return self.factoryfunc(context)._sizeof(context) + +class LazyBound(Construct): + """ + Lazily bound construct, useful for constructs that need to make cyclic + references (linked-lists, expression trees, etc.). + + Parameters: + + + Example: + foo = Struct("foo", + UBInt8("bar"), + LazyBound("next", lambda: foo), + ) + """ + __slots__ = ["bindfunc", "bound"] + def __init__(self, name, bindfunc): + Construct.__init__(self, name) + self.bound = None + self.bindfunc = bindfunc + def _parse(self, stream, context): + if self.bound is None: + self.bound = self.bindfunc() + return self.bound._parse(stream, context) + def _build(self, obj, stream, context): + if self.bound is None: + self.bound = self.bindfunc() + self.bound._build(obj, stream, context) + def _sizeof(self, context): + if self.bound is None: + self.bound = self.bindfunc() + return self.bound._sizeof(context) + +class Pass(Construct): + """ + A do-nothing construct, useful as the default case for Switch, or + to indicate Enums. + See also Switch and Enum. + + Notes: + * this construct is a singleton. do not try to instatiate it, as it + will not work :) + + Example: + Pass + """ + __slots__ = [] + def _parse(self, stream, context): + pass + def _build(self, obj, stream, context): + assert obj is None + def _sizeof(self, context): + return 0 +Pass = Pass(None) + +class Terminator(Construct): + """ + Asserts the end of the stream has been reached at the point it's placed. + You can use this to ensure no more unparsed data follows. + + Notes: + * this construct is a singleton. do not try to instatiate it, as it + will not work :) + + Example: + Terminator + """ + __slots__ = [] + def _parse(self, stream, context): + if stream.read(1): + raise TerminatorError("expected end of stream") + def _build(self, obj, stream, context): + assert obj is None + def _sizeof(self, context): + return 0 +Terminator = Terminator(None) + + + + + + + + + + + + + + + + + + + diff --git a/elftools/construct/debug.py b/elftools/construct/debug.py index 47e47c6..b2df465 100644 --- a/elftools/construct/debug.py +++ b/elftools/construct/debug.py @@ -1,160 +1,160 @@ -""" -Debugging utilities for constructs -""" -import sys -import traceback -import pdb -import inspect -from core import Construct, Subconstruct -from lib import HexString, Container, ListContainer, AttrDict - - -class Probe(Construct): - """ - A probe: dumps the context, stack frames, and stream content to the screen - to aid the debugging process. - See also Debugger. - - Parameters: - * name - the display name - * show_stream - whether or not to show stream contents. default is True. - the stream must be seekable. - * show_context - whether or not to show the context. default is True. - * show_stack - whether or not to show the upper stack frames. default - is True. - * stream_lookahead - the number of bytes to dump when show_stack is set. - default is 100. - - Example: - Struct("foo", - UBInt8("a"), - Probe("between a and b"), - UBInt8("b"), - ) - """ - __slots__ = [ - "printname", "show_stream", "show_context", "show_stack", - "stream_lookahead" - ] - counter = 0 - - def __init__(self, name = None, show_stream = True, - show_context = True, show_stack = True, - stream_lookahead = 100): - Construct.__init__(self, None) - if name is None: - Probe.counter += 1 - name = "" % (Probe.counter,) - self.printname = name - self.show_stream = show_stream - self.show_context = show_context - self.show_stack = show_stack - self.stream_lookahead = stream_lookahead - def __repr__(self): - return "%s(%r)" % (self.__class__.__name__, self.printname) - def _parse(self, stream, context): - self.printout(stream, context) - def _build(self, obj, stream, context): - self.printout(stream, context) - def _sizeof(self, context): - return 0 - - def printout(self, stream, context): - obj = Container() - if self.show_stream: - obj.stream_position = stream.tell() - follows = stream.read(self.stream_lookahead) - if not follows: - obj.following_stream_data = "EOF reached" - else: - stream.seek(-len(follows), 1) - obj.following_stream_data = HexString(follows) - print - - if self.show_context: - obj.context = context - - if self.show_stack: - obj.stack = ListContainer() - frames = [s[0] for s in inspect.stack()][1:-1] - frames.reverse() - for f in frames: - a = AttrDict() - a.__update__(f.f_locals) - obj.stack.append(a) - - print "=" * 80 - print "Probe", self.printname - print obj - print "=" * 80 - -class Debugger(Subconstruct): - """ - A pdb-based debugger. When an exception occurs in the subcon, a debugger - will appear and allow you to debug the error (and even fix on-the-fly). - - Parameters: - * subcon - the subcon to debug - - Example: - Debugger( - Enum(UBInt8("foo"), - a = 1, - b = 2, - c = 3 - ) - ) - """ - __slots__ = ["retval"] - def _parse(self, stream, context): - try: - return self.subcon._parse(stream, context) - except Exception: - self.retval = NotImplemented - self.handle_exc("(you can set the value of 'self.retval', " - "which will be returned)") - if self.retval is NotImplemented: - raise - else: - return self.retval - def _build(self, obj, stream, context): - try: - self.subcon._build(obj, stream, context) - except Exception: - self.handle_exc() - def handle_exc(self, msg = None): - print "=" * 80 - print "Debugging exception of %s:" % (self.subcon,) - print "".join(traceback.format_exception(*sys.exc_info())[1:]) - if msg: - print msg - pdb.post_mortem(sys.exc_info()[2]) - print "=" * 80 - - - - - - - - - - - - - - - - - - - - - - - - - - - - +""" +Debugging utilities for constructs +""" +import sys +import traceback +import pdb +import inspect +from core import Construct, Subconstruct +from lib import HexString, Container, ListContainer, AttrDict + + +class Probe(Construct): + """ + A probe: dumps the context, stack frames, and stream content to the screen + to aid the debugging process. + See also Debugger. + + Parameters: + * name - the display name + * show_stream - whether or not to show stream contents. default is True. + the stream must be seekable. + * show_context - whether or not to show the context. default is True. + * show_stack - whether or not to show the upper stack frames. default + is True. + * stream_lookahead - the number of bytes to dump when show_stack is set. + default is 100. + + Example: + Struct("foo", + UBInt8("a"), + Probe("between a and b"), + UBInt8("b"), + ) + """ + __slots__ = [ + "printname", "show_stream", "show_context", "show_stack", + "stream_lookahead" + ] + counter = 0 + + def __init__(self, name = None, show_stream = True, + show_context = True, show_stack = True, + stream_lookahead = 100): + Construct.__init__(self, None) + if name is None: + Probe.counter += 1 + name = "" % (Probe.counter,) + self.printname = name + self.show_stream = show_stream + self.show_context = show_context + self.show_stack = show_stack + self.stream_lookahead = stream_lookahead + def __repr__(self): + return "%s(%r)" % (self.__class__.__name__, self.printname) + def _parse(self, stream, context): + self.printout(stream, context) + def _build(self, obj, stream, context): + self.printout(stream, context) + def _sizeof(self, context): + return 0 + + def printout(self, stream, context): + obj = Container() + if self.show_stream: + obj.stream_position = stream.tell() + follows = stream.read(self.stream_lookahead) + if not follows: + obj.following_stream_data = "EOF reached" + else: + stream.seek(-len(follows), 1) + obj.following_stream_data = HexString(follows) + print + + if self.show_context: + obj.context = context + + if self.show_stack: + obj.stack = ListContainer() + frames = [s[0] for s in inspect.stack()][1:-1] + frames.reverse() + for f in frames: + a = AttrDict() + a.__update__(f.f_locals) + obj.stack.append(a) + + print "=" * 80 + print "Probe", self.printname + print obj + print "=" * 80 + +class Debugger(Subconstruct): + """ + A pdb-based debugger. When an exception occurs in the subcon, a debugger + will appear and allow you to debug the error (and even fix on-the-fly). + + Parameters: + * subcon - the subcon to debug + + Example: + Debugger( + Enum(UBInt8("foo"), + a = 1, + b = 2, + c = 3 + ) + ) + """ + __slots__ = ["retval"] + def _parse(self, stream, context): + try: + return self.subcon._parse(stream, context) + except Exception: + self.retval = NotImplemented + self.handle_exc("(you can set the value of 'self.retval', " + "which will be returned)") + if self.retval is NotImplemented: + raise + else: + return self.retval + def _build(self, obj, stream, context): + try: + self.subcon._build(obj, stream, context) + except Exception: + self.handle_exc() + def handle_exc(self, msg = None): + print "=" * 80 + print "Debugging exception of %s:" % (self.subcon,) + print "".join(traceback.format_exception(*sys.exc_info())[1:]) + if msg: + print msg + pdb.post_mortem(sys.exc_info()[2]) + print "=" * 80 + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/elftools/construct/lib/__init__.py b/elftools/construct/lib/__init__.py index 23f0820..84d88db 100644 --- a/elftools/construct/lib/__init__.py +++ b/elftools/construct/lib/__init__.py @@ -1,10 +1,10 @@ -from binary import int_to_bin, bin_to_int, swap_bytes, encode_bin, decode_bin -from bitstream import BitStreamReader, BitStreamWriter -from container import (Container, AttrDict, FlagsContainer, - ListContainer, LazyContainer) -from hex import HexString, hexdump -from utils import Packer, StringIO - - - - +from binary import int_to_bin, bin_to_int, swap_bytes, encode_bin, decode_bin +from bitstream import BitStreamReader, BitStreamWriter +from container import (Container, AttrDict, FlagsContainer, + ListContainer, LazyContainer) +from hex import HexString, hexdump +from utils import Packer, StringIO + + + + diff --git a/elftools/construct/lib/binary.py b/elftools/construct/lib/binary.py index 971882e..b348da2 100644 --- a/elftools/construct/lib/binary.py +++ b/elftools/construct/lib/binary.py @@ -1,61 +1,61 @@ -def int_to_bin(number, width = 32): - if number < 0: - number += 1 << width - i = width - 1 - bits = ["\x00"] * width - while number and i >= 0: - bits[i] = "\x00\x01"[number & 1] - number >>= 1 - i -= 1 - return "".join(bits) - -_bit_values = {"\x00" : 0, "\x01" : 1, "0" : 0, "1" : 1} -def bin_to_int(bits, signed = False): - number = 0 - bias = 0 - if signed and _bit_values[bits[0]] == 1: - bits = bits[1:] - bias = 1 << len(bits) - for b in bits: - number <<= 1 - number |= _bit_values[b] - return number - bias - -def swap_bytes(bits, bytesize = 8): - i = 0 - l = len(bits) - output = [""] * ((l // bytesize) + 1) - j = len(output) - 1 - while i < l: - output[j] = bits[i : i + bytesize] - i += bytesize - j -= 1 - return "".join(output) - -_char_to_bin = {} -_bin_to_char = {} -for i in range(256): - ch = chr(i) - bin = int_to_bin(i, 8) - _char_to_bin[ch] = bin - _bin_to_char[bin] = ch - _bin_to_char[bin] = ch - -def encode_bin(data): - return "".join(_char_to_bin[ch] for ch in data) - -def decode_bin(data): - assert len(data) & 7 == 0, "data length must be a multiple of 8" - i = 0 - j = 0 - l = len(data) // 8 - chars = [""] * l - while j < l: - chars[j] = _bin_to_char[data[i:i+8]] - i += 8 - j += 1 - return "".join(chars) - - - - +def int_to_bin(number, width = 32): + if number < 0: + number += 1 << width + i = width - 1 + bits = ["\x00"] * width + while number and i >= 0: + bits[i] = "\x00\x01"[number & 1] + number >>= 1 + i -= 1 + return "".join(bits) + +_bit_values = {"\x00" : 0, "\x01" : 1, "0" : 0, "1" : 1} +def bin_to_int(bits, signed = False): + number = 0 + bias = 0 + if signed and _bit_values[bits[0]] == 1: + bits = bits[1:] + bias = 1 << len(bits) + for b in bits: + number <<= 1 + number |= _bit_values[b] + return number - bias + +def swap_bytes(bits, bytesize = 8): + i = 0 + l = len(bits) + output = [""] * ((l // bytesize) + 1) + j = len(output) - 1 + while i < l: + output[j] = bits[i : i + bytesize] + i += bytesize + j -= 1 + return "".join(output) + +_char_to_bin = {} +_bin_to_char = {} +for i in range(256): + ch = chr(i) + bin = int_to_bin(i, 8) + _char_to_bin[ch] = bin + _bin_to_char[bin] = ch + _bin_to_char[bin] = ch + +def encode_bin(data): + return "".join(_char_to_bin[ch] for ch in data) + +def decode_bin(data): + assert len(data) & 7 == 0, "data length must be a multiple of 8" + i = 0 + j = 0 + l = len(data) // 8 + chars = [""] * l + while j < l: + chars[j] = _bin_to_char[data[i:i+8]] + i += 8 + j += 1 + return "".join(chars) + + + + diff --git a/elftools/construct/lib/bitstream.py b/elftools/construct/lib/bitstream.py index ff56be6..e473864 100644 --- a/elftools/construct/lib/bitstream.py +++ b/elftools/construct/lib/bitstream.py @@ -1,80 +1,80 @@ -from binary import encode_bin, decode_bin - - -class BitStreamReader(object): - __slots__ = ["substream", "buffer", "total_size"] - def __init__(self, substream): - self.substream = substream - self.total_size = 0 - self.buffer = "" - def close(self): - if self.total_size % 8 != 0: - raise ValueError("total size of read data must be a multiple of 8", - self.total_size) - def tell(self): - return self.substream.tell() - def seek(self, pos, whence = 0): - self.buffer = "" - self.total_size = 0 - self.substream.seek(pos, whence) - def read(self, count): - assert count >= 0 - l = len(self.buffer) - if count == 0: - data = "" - elif count <= l: - data = self.buffer[:count] - self.buffer = self.buffer[count:] - else: - data = self.buffer - count -= l - bytes = count // 8 - if count & 7: - bytes += 1 - buf = encode_bin(self.substream.read(bytes)) - data += buf[:count] - self.buffer = buf[count:] - self.total_size += len(data) - return data - - -class BitStreamWriter(object): - __slots__ = ["substream", "buffer", "pos"] - def __init__(self, substream): - self.substream = substream - self.buffer = [] - self.pos = 0 - def close(self): - self.flush() - def flush(self): - bytes = decode_bin("".join(self.buffer)) - self.substream.write(bytes) - self.buffer = [] - self.pos = 0 - def tell(self): - return self.substream.tell() + self.pos // 8 - def seek(self, pos, whence = 0): - self.flush() - self.substream.seek(pos, whence) - def write(self, data): - if not data: - return - if type(data) is not str: - raise TypeError("data must be a string, not %r" % (type(data),)) - self.buffer.append(data) - - - - - - - - - - - - - - - - +from binary import encode_bin, decode_bin + + +class BitStreamReader(object): + __slots__ = ["substream", "buffer", "total_size"] + def __init__(self, substream): + self.substream = substream + self.total_size = 0 + self.buffer = "" + def close(self): + if self.total_size % 8 != 0: + raise ValueError("total size of read data must be a multiple of 8", + self.total_size) + def tell(self): + return self.substream.tell() + def seek(self, pos, whence = 0): + self.buffer = "" + self.total_size = 0 + self.substream.seek(pos, whence) + def read(self, count): + assert count >= 0 + l = len(self.buffer) + if count == 0: + data = "" + elif count <= l: + data = self.buffer[:count] + self.buffer = self.buffer[count:] + else: + data = self.buffer + count -= l + bytes = count // 8 + if count & 7: + bytes += 1 + buf = encode_bin(self.substream.read(bytes)) + data += buf[:count] + self.buffer = buf[count:] + self.total_size += len(data) + return data + + +class BitStreamWriter(object): + __slots__ = ["substream", "buffer", "pos"] + def __init__(self, substream): + self.substream = substream + self.buffer = [] + self.pos = 0 + def close(self): + self.flush() + def flush(self): + bytes = decode_bin("".join(self.buffer)) + self.substream.write(bytes) + self.buffer = [] + self.pos = 0 + def tell(self): + return self.substream.tell() + self.pos // 8 + def seek(self, pos, whence = 0): + self.flush() + self.substream.seek(pos, whence) + def write(self, data): + if not data: + return + if type(data) is not str: + raise TypeError("data must be a string, not %r" % (type(data),)) + self.buffer.append(data) + + + + + + + + + + + + + + + + diff --git a/elftools/construct/lib/container.py b/elftools/construct/lib/container.py index 66f8ab4..4b2f727 100644 --- a/elftools/construct/lib/container.py +++ b/elftools/construct/lib/container.py @@ -1,258 +1,258 @@ -def recursion_lock(retval, lock_name = "__recursion_lock__"): - def decorator(func): - def wrapper(self, *args, **kw): - if getattr(self, lock_name, False): - return retval - setattr(self, lock_name, True) - try: - return func(self, *args, **kw) - finally: - setattr(self, lock_name, False) - wrapper.__name__ = func.__name__ - return wrapper - return decorator - -class Container(object): - """ - A generic container of attributes - """ - __slots__ = ["__dict__", "__attrs__"] - def __init__(self, **kw): - self.__dict__.update(kw) - object.__setattr__(self, "__attrs__", kw.keys()) - - def __eq__(self, other): - try: - return self.__dict__ == other.__dict__ - except AttributeError: - return False - def __ne__(self, other): - return not (self == other) - - def __delattr__(self, name): - object.__delattr__(self, name) - self.__attrs__.remove(name) - def __setattr__(self, name, value): - d = self.__dict__ - if name not in d: - self.__attrs__.append(name) - d[name] = value - def __getitem__(self, name): - return self.__dict__[name] - def __delitem__(self, name): - self.__delattr__(name) - def __setitem__(self, name, value): - self.__setattr__(name, value) - def __update__(self, obj): - for name in obj.__attrs__: - self[name] = obj[name] - def __copy__(self): - new = self.__class__() - new.__attrs__ = self.__attrs__[:] - new.__dict__ = self.__dict__.copy() - return new - - @recursion_lock("<...>") - def __repr__(self): - attrs = sorted("%s = %r" % (k, v) - for k, v in self.__dict__.iteritems() - if not k.startswith("_")) - return "%s(%s)" % (self.__class__.__name__, ", ".join(attrs)) - def __str__(self): - return self.__pretty_str__() - @recursion_lock("<...>") - def __pretty_str__(self, nesting = 1, indentation = " "): - attrs = [] - ind = indentation * nesting - for k in self.__attrs__: - v = self.__dict__[k] - if not k.startswith("_"): - text = [ind, k, " = "] - if hasattr(v, "__pretty_str__"): - text.append(v.__pretty_str__(nesting + 1, indentation)) - else: - text.append(repr(v)) - attrs.append("".join(text)) - if not attrs: - return "%s()" % (self.__class__.__name__,) - attrs.insert(0, self.__class__.__name__ + ":") - return "\n".join(attrs) - -class FlagsContainer(Container): - """ - A container providing pretty-printing for flags. Only set flags are - displayed. - """ - def __pretty_str__(self, nesting = 1, indentation = " "): - attrs = [] - ind = indentation * nesting - for k in self.__attrs__: - v = self.__dict__[k] - if not k.startswith("_") and v: - attrs.append(ind + k) - if not attrs: - return "%s()" % (self.__class__.__name__,) - attrs.insert(0, self.__class__.__name__+ ":") - return "\n".join(attrs) - -class ListContainer(list): - """ - A container for lists - """ - __slots__ = ["__recursion_lock__"] - def __str__(self): - return self.__pretty_str__() - @recursion_lock("[...]") - def __pretty_str__(self, nesting = 1, indentation = " "): - if not self: - return "[]" - ind = indentation * nesting - lines = ["["] - for elem in self: - lines.append("\n") - lines.append(ind) - if hasattr(elem, "__pretty_str__"): - lines.append(elem.__pretty_str__(nesting + 1, indentation)) - else: - lines.append(repr(elem)) - lines.append("\n") - lines.append(indentation * (nesting - 1)) - lines.append("]") - return "".join(lines) - -class AttrDict(object): - """ - A dictionary that can be accessed both using indexing and attributes, - i.e., - x = AttrDict() - x.foo = 5 - print x["foo"] - """ - __slots__ = ["__dict__"] - def __init__(self, **kw): - self.__dict__ = kw - def __contains__(self, key): - return key in self.__dict__ - def __nonzero__(self): - return bool(self.__dict__) - def __repr__(self): - return repr(self.__dict__) - def __str__(self): - return self.__pretty_str__() - def __pretty_str__(self, nesting = 1, indentation = " "): - if not self: - return "{}" - text = ["{\n"] - ind = nesting * indentation - for k in sorted(self.__dict__.keys()): - v = self.__dict__[k] - text.append(ind) - text.append(repr(k)) - text.append(" : ") - if hasattr(v, "__pretty_str__"): - try: - text.append(v.__pretty_str__(nesting+1, indentation)) - except Exception: - text.append(repr(v)) - else: - text.append(repr(v)) - text.append("\n") - text.append((nesting-1) * indentation) - text.append("}") - return "".join(text) - def __delitem__(self, key): - del self.__dict__[key] - def __getitem__(self, key): - return self.__dict__[key] - def __setitem__(self, key, value): - self.__dict__[key] = value - def __copy__(self): - new = self.__class__() - new.__dict__ = self.__dict__.copy() - return new - def __update__(self, other): - if isinstance(other, dict): - self.__dict__.update(other) - else: - self.__dict__.update(other.__dict__) - -class LazyContainer(object): - __slots__ = ["subcon", "stream", "pos", "context", "_value"] - def __init__(self, subcon, stream, pos, context): - self.subcon = subcon - self.stream = stream - self.pos = pos - self.context = context - self._value = NotImplemented - def __eq__(self, other): - try: - return self._value == other._value - except AttributeError: - return False - def __ne__(self, other): - return not (self == other) - def __str__(self): - return self.__pretty_str__() - def __pretty_str__(self, nesting = 1, indentation = " "): - if self._value is NotImplemented: - text = "" - elif hasattr(self._value, "__pretty_str__"): - text = self._value.__pretty_str__(nesting, indentation) - else: - text = repr(self._value) - return "%s: %s" % (self.__class__.__name__, text) - def read(self): - self.stream.seek(self.pos) - return self.subcon._parse(self.stream, self.context) - def dispose(self): - self.subcon = None - self.stream = None - self.context = None - self.pos = None - def _get_value(self): - if self._value is NotImplemented: - self._value = self.read() - return self._value - value = property(_get_value) - has_value = property(lambda self: self._value is not NotImplemented) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +def recursion_lock(retval, lock_name = "__recursion_lock__"): + def decorator(func): + def wrapper(self, *args, **kw): + if getattr(self, lock_name, False): + return retval + setattr(self, lock_name, True) + try: + return func(self, *args, **kw) + finally: + setattr(self, lock_name, False) + wrapper.__name__ = func.__name__ + return wrapper + return decorator + +class Container(object): + """ + A generic container of attributes + """ + __slots__ = ["__dict__", "__attrs__"] + def __init__(self, **kw): + self.__dict__.update(kw) + object.__setattr__(self, "__attrs__", kw.keys()) + + def __eq__(self, other): + try: + return self.__dict__ == other.__dict__ + except AttributeError: + return False + def __ne__(self, other): + return not (self == other) + + def __delattr__(self, name): + object.__delattr__(self, name) + self.__attrs__.remove(name) + def __setattr__(self, name, value): + d = self.__dict__ + if name not in d: + self.__attrs__.append(name) + d[name] = value + def __getitem__(self, name): + return self.__dict__[name] + def __delitem__(self, name): + self.__delattr__(name) + def __setitem__(self, name, value): + self.__setattr__(name, value) + def __update__(self, obj): + for name in obj.__attrs__: + self[name] = obj[name] + def __copy__(self): + new = self.__class__() + new.__attrs__ = self.__attrs__[:] + new.__dict__ = self.__dict__.copy() + return new + + @recursion_lock("<...>") + def __repr__(self): + attrs = sorted("%s = %r" % (k, v) + for k, v in self.__dict__.iteritems() + if not k.startswith("_")) + return "%s(%s)" % (self.__class__.__name__, ", ".join(attrs)) + def __str__(self): + return self.__pretty_str__() + @recursion_lock("<...>") + def __pretty_str__(self, nesting = 1, indentation = " "): + attrs = [] + ind = indentation * nesting + for k in self.__attrs__: + v = self.__dict__[k] + if not k.startswith("_"): + text = [ind, k, " = "] + if hasattr(v, "__pretty_str__"): + text.append(v.__pretty_str__(nesting + 1, indentation)) + else: + text.append(repr(v)) + attrs.append("".join(text)) + if not attrs: + return "%s()" % (self.__class__.__name__,) + attrs.insert(0, self.__class__.__name__ + ":") + return "\n".join(attrs) + +class FlagsContainer(Container): + """ + A container providing pretty-printing for flags. Only set flags are + displayed. + """ + def __pretty_str__(self, nesting = 1, indentation = " "): + attrs = [] + ind = indentation * nesting + for k in self.__attrs__: + v = self.__dict__[k] + if not k.startswith("_") and v: + attrs.append(ind + k) + if not attrs: + return "%s()" % (self.__class__.__name__,) + attrs.insert(0, self.__class__.__name__+ ":") + return "\n".join(attrs) + +class ListContainer(list): + """ + A container for lists + """ + __slots__ = ["__recursion_lock__"] + def __str__(self): + return self.__pretty_str__() + @recursion_lock("[...]") + def __pretty_str__(self, nesting = 1, indentation = " "): + if not self: + return "[]" + ind = indentation * nesting + lines = ["["] + for elem in self: + lines.append("\n") + lines.append(ind) + if hasattr(elem, "__pretty_str__"): + lines.append(elem.__pretty_str__(nesting + 1, indentation)) + else: + lines.append(repr(elem)) + lines.append("\n") + lines.append(indentation * (nesting - 1)) + lines.append("]") + return "".join(lines) + +class AttrDict(object): + """ + A dictionary that can be accessed both using indexing and attributes, + i.e., + x = AttrDict() + x.foo = 5 + print x["foo"] + """ + __slots__ = ["__dict__"] + def __init__(self, **kw): + self.__dict__ = kw + def __contains__(self, key): + return key in self.__dict__ + def __nonzero__(self): + return bool(self.__dict__) + def __repr__(self): + return repr(self.__dict__) + def __str__(self): + return self.__pretty_str__() + def __pretty_str__(self, nesting = 1, indentation = " "): + if not self: + return "{}" + text = ["{\n"] + ind = nesting * indentation + for k in sorted(self.__dict__.keys()): + v = self.__dict__[k] + text.append(ind) + text.append(repr(k)) + text.append(" : ") + if hasattr(v, "__pretty_str__"): + try: + text.append(v.__pretty_str__(nesting+1, indentation)) + except Exception: + text.append(repr(v)) + else: + text.append(repr(v)) + text.append("\n") + text.append((nesting-1) * indentation) + text.append("}") + return "".join(text) + def __delitem__(self, key): + del self.__dict__[key] + def __getitem__(self, key): + return self.__dict__[key] + def __setitem__(self, key, value): + self.__dict__[key] = value + def __copy__(self): + new = self.__class__() + new.__dict__ = self.__dict__.copy() + return new + def __update__(self, other): + if isinstance(other, dict): + self.__dict__.update(other) + else: + self.__dict__.update(other.__dict__) + +class LazyContainer(object): + __slots__ = ["subcon", "stream", "pos", "context", "_value"] + def __init__(self, subcon, stream, pos, context): + self.subcon = subcon + self.stream = stream + self.pos = pos + self.context = context + self._value = NotImplemented + def __eq__(self, other): + try: + return self._value == other._value + except AttributeError: + return False + def __ne__(self, other): + return not (self == other) + def __str__(self): + return self.__pretty_str__() + def __pretty_str__(self, nesting = 1, indentation = " "): + if self._value is NotImplemented: + text = "" + elif hasattr(self._value, "__pretty_str__"): + text = self._value.__pretty_str__(nesting, indentation) + else: + text = repr(self._value) + return "%s: %s" % (self.__class__.__name__, text) + def read(self): + self.stream.seek(self.pos) + return self.subcon._parse(self.stream, self.context) + def dispose(self): + self.subcon = None + self.stream = None + self.context = None + self.pos = None + def _get_value(self): + if self._value is NotImplemented: + self._value = self.read() + return self._value + value = property(_get_value) + has_value = property(lambda self: self._value is not NotImplemented) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/elftools/construct/lib/hex.py b/elftools/construct/lib/hex.py index e392bd3..0bb2f02 100644 --- a/elftools/construct/lib/hex.py +++ b/elftools/construct/lib/hex.py @@ -1,34 +1,34 @@ -_printable = dict((chr(i), ".") for i in range(256)) -_printable.update((chr(i), chr(i)) for i in range(32, 128)) - -def hexdump(data, linesize = 16): - prettylines = [] - if len(data) < 65536: - fmt = "%%04X %%-%ds %%s" - else: - fmt = "%%08X %%-%ds %%s" - fmt = fmt % (3 * linesize - 1,) - for i in xrange(0, len(data), linesize): - line = data[i : i + linesize] - hextext = " ".join(b.encode("hex") for b in line) - rawtext = "".join(_printable[b] for b in line) - prettylines.append(fmt % (i, hextext, rawtext)) - return prettylines - -class HexString(str): - """ - represents a string that will be hex-dumped (only via __pretty_str__). - this class derives of str, and behaves just like a normal string in all - other contexts. - """ - def __init__(self, data, linesize = 16): - str.__init__(self, data) - self.linesize = linesize - def __new__(cls, data, *args, **kwargs): - return str.__new__(cls, data) - def __pretty_str__(self, nesting = 1, indentation = " "): - sep = "\n" + indentation * nesting - return sep + sep.join(hexdump(self)) - - - +_printable = dict((chr(i), ".") for i in range(256)) +_printable.update((chr(i), chr(i)) for i in range(32, 128)) + +def hexdump(data, linesize = 16): + prettylines = [] + if len(data) < 65536: + fmt = "%%04X %%-%ds %%s" + else: + fmt = "%%08X %%-%ds %%s" + fmt = fmt % (3 * linesize - 1,) + for i in xrange(0, len(data), linesize): + line = data[i : i + linesize] + hextext = " ".join(b.encode("hex") for b in line) + rawtext = "".join(_printable[b] for b in line) + prettylines.append(fmt % (i, hextext, rawtext)) + return prettylines + +class HexString(str): + """ + represents a string that will be hex-dumped (only via __pretty_str__). + this class derives of str, and behaves just like a normal string in all + other contexts. + """ + def __init__(self, data, linesize = 16): + str.__init__(self, data) + self.linesize = linesize + def __new__(cls, data, *args, **kwargs): + return str.__new__(cls, data) + def __pretty_str__(self, nesting = 1, indentation = " "): + sep = "\n" + indentation * nesting + return sep + sep.join(hexdump(self)) + + + diff --git a/elftools/construct/lib/utils.py b/elftools/construct/lib/utils.py index 968dc26..86d8b03 100644 --- a/elftools/construct/lib/utils.py +++ b/elftools/construct/lib/utils.py @@ -1,22 +1,22 @@ -try: - from cStringIO import StringIO -except ImportError: - from StringIO import StringIO - - -try: - from struct import Struct as Packer -except ImportError: - from struct import pack, unpack, calcsize - class Packer(object): - __slots__ = ["format", "size"] - def __init__(self, format): - self.format = format - self.size = calcsize(format) - def pack(self, *args): - return pack(self.format, *args) - def unpack(self, data): - return unpack(self.format, data) - - - +try: + from cStringIO import StringIO +except ImportError: + from StringIO import StringIO + + +try: + from struct import Struct as Packer +except ImportError: + from struct import pack, unpack, calcsize + class Packer(object): + __slots__ = ["format", "size"] + def __init__(self, format): + self.format = format + self.size = calcsize(format) + def pack(self, *args): + return pack(self.format, *args) + def unpack(self, data): + return unpack(self.format, data) + + + diff --git a/elftools/construct/macros.py b/elftools/construct/macros.py index 35e689a..dcffb0e 100644 --- a/elftools/construct/macros.py +++ b/elftools/construct/macros.py @@ -1,514 +1,514 @@ -from lib import BitStreamReader, BitStreamWriter, encode_bin, decode_bin -from core import * -from adapters import * - - -#=============================================================================== -# fields -#=============================================================================== -def Field(name, length): - """a field - * name - the name of the field - * length - the length of the field. the length can be either an integer - (StaticField), or a function that takes the context as an argument and - returns the length (MetaField) - """ - if callable(length): - return MetaField(name, length) - else: - return StaticField(name, length) - -def BitField(name, length, swapped = False, signed = False, bytesize = 8): - """a bit field; must be enclosed in a BitStruct - * name - the name of the field - * length - the length of the field in bits. the length can be either - an integer, or a function that takes the context as an argument and - returns the length - * swapped - whether the value is byte-swapped (little endian). the - default is False. - * signed - whether the value of the bitfield is a signed integer. the - default is False. - * bytesize - the number of bits in a byte (used for byte-swapping). the - default is 8. - """ - return BitIntegerAdapter(Field(name, length), - length, - swapped = swapped, - signed = signed, - bytesize = bytesize - ) - -def Padding(length, pattern = "\x00", strict = False): - r"""a padding field (value is discarded) - * length - the length of the field. the length can be either an integer, - or a function that takes the context as an argument and returns the - length - * pattern - the padding pattern (character) to use. default is "\x00" - * strict - whether or not to raise an exception is the actual padding - pattern mismatches the desired pattern. default is False. - """ - return PaddingAdapter(Field(None, length), - pattern = pattern, - strict = strict, - ) - -def Flag(name, truth = 1, falsehood = 0, default = False): - """a flag field (True or False) - * name - the name of the field - * truth - the numeric value of truth. the default is 1. - * falsehood - the numeric value of falsehood. the default is 0. - * default - the default value to assume, when the value is neither - `truth` nor `falsehood`. the default is False. - """ - return SymmetricMapping(Field(name, 1), - {True : chr(truth), False : chr(falsehood)}, - default = default, - ) - -#=============================================================================== -# field shortcuts -#=============================================================================== -def Bit(name): - """a 1-bit BitField; must be enclosed in a BitStruct""" - return BitField(name, 1) -def Nibble(name): - """a 4-bit BitField; must be enclosed in a BitStruct""" - return BitField(name, 4) -def Octet(name): - """an 8-bit BitField; must be enclosed in a BitStruct""" - return BitField(name, 8) - -def UBInt8(name): - """unsigned, big endian 8-bit integer""" - return FormatField(name, ">", "B") -def UBInt16(name): - """unsigned, big endian 16-bit integer""" - return FormatField(name, ">", "H") -def UBInt32(name): - """unsigned, big endian 32-bit integer""" - return FormatField(name, ">", "L") -def UBInt64(name): - """unsigned, big endian 64-bit integer""" - return FormatField(name, ">", "Q") - -def SBInt8(name): - """signed, big endian 8-bit integer""" - return FormatField(name, ">", "b") -def SBInt16(name): - """signed, big endian 16-bit integer""" - return FormatField(name, ">", "h") -def SBInt32(name): - """signed, big endian 32-bit integer""" - return FormatField(name, ">", "l") -def SBInt64(name): - """signed, big endian 64-bit integer""" - return FormatField(name, ">", "q") - -def ULInt8(name): - """unsigned, little endian 8-bit integer""" - return FormatField(name, "<", "B") -def ULInt16(name): - """unsigned, little endian 16-bit integer""" - return FormatField(name, "<", "H") -def ULInt32(name): - """unsigned, little endian 32-bit integer""" - return FormatField(name, "<", "L") -def ULInt64(name): - """unsigned, little endian 64-bit integer""" - return FormatField(name, "<", "Q") - -def SLInt8(name): - """signed, little endian 8-bit integer""" - return FormatField(name, "<", "b") -def SLInt16(name): - """signed, little endian 16-bit integer""" - return FormatField(name, "<", "h") -def SLInt32(name): - """signed, little endian 32-bit integer""" - return FormatField(name, "<", "l") -def SLInt64(name): - """signed, little endian 64-bit integer""" - return FormatField(name, "<", "q") - -def UNInt8(name): - """unsigned, native endianity 8-bit integer""" - return FormatField(name, "=", "B") -def UNInt16(name): - """unsigned, native endianity 16-bit integer""" - return FormatField(name, "=", "H") -def UNInt32(name): - """unsigned, native endianity 32-bit integer""" - return FormatField(name, "=", "L") -def UNInt64(name): - """unsigned, native endianity 64-bit integer""" - return FormatField(name, "=", "Q") - -def SNInt8(name): - """signed, native endianity 8-bit integer""" - return FormatField(name, "=", "b") -def SNInt16(name): - """signed, native endianity 16-bit integer""" - return FormatField(name, "=", "h") -def SNInt32(name): - """signed, native endianity 32-bit integer""" - return FormatField(name, "=", "l") -def SNInt64(name): - """signed, native endianity 64-bit integer""" - return FormatField(name, "=", "q") - -def BFloat32(name): - """big endian, 32-bit IEEE floating point number""" - return FormatField(name, ">", "f") -def LFloat32(name): - """little endian, 32-bit IEEE floating point number""" - return FormatField(name, "<", "f") -def NFloat32(name): - """native endianity, 32-bit IEEE floating point number""" - return FormatField(name, "=", "f") - -def BFloat64(name): - """big endian, 64-bit IEEE floating point number""" - return FormatField(name, ">", "d") -def LFloat64(name): - """little endian, 64-bit IEEE floating point number""" - return FormatField(name, "<", "d") -def NFloat64(name): - """native endianity, 64-bit IEEE floating point number""" - return FormatField(name, "=", "d") - - -#=============================================================================== -# arrays -#=============================================================================== -def Array(count, subcon): - """array of subcon repeated count times. - * subcon - the subcon. - * count - an integer, or a function taking the context as an argument, - returning the count - """ - if callable(count): - con = MetaArray(count, subcon) - else: - con = MetaArray(lambda ctx: count, subcon) - con._clear_flag(con.FLAG_DYNAMIC) - return con - -def PrefixedArray(subcon, length_field = UBInt8("length")): - """an array prefixed by a length field. - * subcon - the subcon to be repeated - * length_field - an integer construct - """ - return LengthValueAdapter( - Sequence(subcon.name, - length_field, - Array(lambda ctx: ctx[length_field.name], subcon), - nested = False - ) - ) - -def OpenRange(mincount, subcon): - from sys import maxint - return Range(mincount, maxint, subcon) - -def GreedyRange(subcon): - """an open range (1 or more times) of repeated subcon. - * subcon - the subcon to repeat""" - return OpenRange(1, subcon) - -def OptionalGreedyRange(subcon): - """an open range (0 or more times) of repeated subcon. - * subcon - the subcon to repeat""" - return OpenRange(0, subcon) - - -#=============================================================================== -# subconstructs -#=============================================================================== -def Optional(subcon): - """an optional construct. if parsing fails, returns None. - * subcon - the subcon to optionally parse or build - """ - return Select(subcon.name, subcon, Pass) - -def Bitwise(subcon): - """converts the stream to bits, and passes the bitstream to subcon - * subcon - a bitwise construct (usually BitField) - """ - # subcons larger than MAX_BUFFER will be wrapped by Restream instead - # of Buffered. implementation details, don't stick your nose :) - MAX_BUFFER = 1024 * 8 - def resizer(length): - if length & 7: - raise SizeofError("size must be a multiple of 8", length) - return length >> 3 - if not subcon._is_flag(subcon.FLAG_DYNAMIC) and subcon.sizeof() < MAX_BUFFER: - con = Buffered(subcon, - encoder = decode_bin, - decoder = encode_bin, - resizer = resizer - ) - else: - con = Restream(subcon, - stream_reader = BitStreamReader, - stream_writer = BitStreamWriter, - resizer = resizer) - return con - -def Aligned(subcon, modulus = 4, pattern = "\x00"): - r"""aligns subcon to modulus boundary using padding pattern - * subcon - the subcon to align - * modulus - the modulus boundary (default is 4) - * pattern - the padding pattern (default is \x00) - """ - if modulus < 2: - raise ValueError("modulus must be >= 2", modulus) - if modulus in (2, 4, 8, 16, 32, 64, 128, 256, 512, 1024): - def padlength(ctx): - m1 = modulus - 1 - return (modulus - (subcon._sizeof(ctx) & m1)) & m1 - else: - def padlength(ctx): - return (modulus - (subcon._sizeof(ctx) % modulus)) % modulus - return IndexingAdapter( - Sequence(subcon.name, - subcon, - Padding(padlength, pattern = pattern), - nested = False, - ), - 0 - ) - -def Embedded(subcon): - """embeds a struct into the enclosing struct. - * subcon - the struct to embed - """ - return Reconfig(subcon.name, subcon, subcon.FLAG_EMBED) - -def Rename(newname, subcon): - """renames an existing construct - * newname - the new name - * subcon - the subcon to rename - """ - return Reconfig(newname, subcon) - -def Alias(newname, oldname): - """creates an alias for an existing element in a struct - * newname - the new name - * oldname - the name of an existing element - """ - return Value(newname, lambda ctx: ctx[oldname]) - - -#=============================================================================== -# mapping -#=============================================================================== -def SymmetricMapping(subcon, mapping, default = NotImplemented): - """defines a symmetrical mapping: a->b, b->a. - * subcon - the subcon to map - * mapping - the encoding mapping (a dict); the decoding mapping is - achieved by reversing this mapping - * default - the default value to use when no mapping is found. if no - default value is given, and exception is raised. setting to Pass would - return the value "as is" (unmapped) - """ - reversed_mapping = dict((v, k) for k, v in mapping.iteritems()) - return MappingAdapter(subcon, - encoding = mapping, - decoding = reversed_mapping, - encdefault = default, - decdefault = default, - ) - -def Enum(subcon, **kw): - """a set of named values mapping. - * subcon - the subcon to map - * kw - keyword arguments which serve as the encoding mapping - * _default_ - an optional, keyword-only argument that specifies the - default value to use when the mapping is undefined. if not given, - and exception is raised when the mapping is undefined. use `Pass` to - pass the unmapped value as-is - """ - return SymmetricMapping(subcon, kw, kw.pop("_default_", NotImplemented)) - -def FlagsEnum(subcon, **kw): - """a set of flag values mapping. - * subcon - the subcon to map - * kw - keyword arguments which serve as the encoding mapping - """ - return FlagsAdapter(subcon, kw) - - -#=============================================================================== -# structs -#=============================================================================== -def AlignedStruct(name, *subcons, **kw): - """a struct of aligned fields - * name - the name of the struct - * subcons - the subcons that make up this structure - * kw - keyword arguments to pass to Aligned: 'modulus' and 'pattern' - """ - return Struct(name, *(Aligned(sc, **kw) for sc in subcons)) - -def BitStruct(name, *subcons): - """a struct of bitwise fields - * name - the name of the struct - * subcons - the subcons that make up this structure - """ - return Bitwise(Struct(name, *subcons)) - -def EmbeddedBitStruct(*subcons): - """an embedded BitStruct. no name is necessary. - * subcons - the subcons that make up this structure - """ - return Bitwise(Embedded(Struct(None, *subcons))) - -#=============================================================================== -# strings -#=============================================================================== -def String(name, length, encoding = None, padchar = None, - paddir = "right", trimdir = "right"): - """a fixed-length, optionally padded string of characters - * name - the name of the field - * length - the length (integer) - * encoding - the encoding to use (e.g., "utf8"), or None, for raw bytes. - default is None - * padchar - the padding character (commonly "\x00"), or None to - disable padding. default is None - * paddir - the direction where padding is placed ("right", "left", or - "center"). the default is "right". this argument is meaningless if - padchar is None. - * trimdir - the direction where trimming will take place ("right" or - "left"). the default is "right". trimming is only meaningful for - building, when the given string is too long. this argument is - meaningless if padchar is None. - """ - con = StringAdapter(Field(name, length), encoding = encoding) - if padchar is not None: - con = PaddedStringAdapter(con, - padchar = padchar, - paddir = paddir, - trimdir = trimdir - ) - return con - -def PascalString(name, length_field = UBInt8("length"), encoding = None): - """a string prefixed with a length field. the data must directly follow - the length field. - * name - the name of the - * length_field - a numeric construct (i.e., UBInt8) that holds the - length. default is an unsigned, 8-bit integer field. note that this - argument must pass an instance of a construct, not a class - (`UBInt8("length")` rather than `UBInt8`) - * encoding - the encoding to use (e.g., "utf8"), or None, for raw bytes. - default is None - """ - return StringAdapter( - LengthValueAdapter( - Sequence(name, - length_field, - Field("data", lambda ctx: ctx[length_field.name]), - ) - ), - encoding = encoding, - ) - -def CString(name, terminators = "\x00", encoding = None, - char_field = Field(None, 1)): - r"""a c-style string (string terminated by a terminator char) - * name - the name fo the string - * terminators - a sequence of terminator chars. default is "\x00". - * encoding - the encoding to use (e.g., "utf8"), or None, for raw bytes. - default is None - * char_field - the construct that represents a single character. default - is a one-byte character. note that this argument must be an instance - of a construct, not a construct class (`Field("char", 1)` rather than - `Field`) - """ - return Rename(name, - CStringAdapter( - RepeatUntil(lambda obj, ctx: obj in terminators, - char_field, - ), - terminators = terminators, - encoding = encoding, - ) - ) - - -#=============================================================================== -# conditional -#=============================================================================== -def IfThenElse(name, predicate, then_subcon, else_subcon): - """an if-then-else conditional construct: if the predicate indicates True, - `then_subcon` will be used; otherwise `else_subcon` - * name - the name of the construct - * predicate - a function taking the context as an argument and returning - True or False - * then_subcon - the subcon that will be used if the predicate returns True - * else_subcon - the subcon that will be used if the predicate returns False - """ - return Switch(name, lambda ctx: bool(predicate(ctx)), - { - True : then_subcon, - False : else_subcon, - } - ) - -def If(predicate, subcon, elsevalue = None): - """an if-then conditional construct: if the predicate indicates True, - subcon will be used; otherwise, `elsevalue` will be returned instead. - * predicate - a function taking the context as an argument and returning - True or False - * subcon - the subcon that will be used if the predicate returns True - * elsevalue - the value that will be used should the predicate return False. - by default this value is None. - """ - return IfThenElse(subcon.name, - predicate, - subcon, - Value("elsevalue", lambda ctx: elsevalue) - ) - - -#=============================================================================== -# misc -#=============================================================================== -def OnDemandPointer(offsetfunc, subcon, force_build = True): - """an on-demand pointer. - * offsetfunc - a function taking the context as an argument and returning - the absolute stream position - * subcon - the subcon that will be parsed from the `offsetfunc()` stream - position on demand - * force_build - see OnDemand. by default True. - """ - return OnDemand(Pointer(offsetfunc, subcon), - advance_stream = False, - force_build = force_build - ) - - - - - - - - - - - - - - - - - - - - - - - - - - - +from lib import BitStreamReader, BitStreamWriter, encode_bin, decode_bin +from core import * +from adapters import * + + +#=============================================================================== +# fields +#=============================================================================== +def Field(name, length): + """a field + * name - the name of the field + * length - the length of the field. the length can be either an integer + (StaticField), or a function that takes the context as an argument and + returns the length (MetaField) + """ + if callable(length): + return MetaField(name, length) + else: + return StaticField(name, length) + +def BitField(name, length, swapped = False, signed = False, bytesize = 8): + """a bit field; must be enclosed in a BitStruct + * name - the name of the field + * length - the length of the field in bits. the length can be either + an integer, or a function that takes the context as an argument and + returns the length + * swapped - whether the value is byte-swapped (little endian). the + default is False. + * signed - whether the value of the bitfield is a signed integer. the + default is False. + * bytesize - the number of bits in a byte (used for byte-swapping). the + default is 8. + """ + return BitIntegerAdapter(Field(name, length), + length, + swapped = swapped, + signed = signed, + bytesize = bytesize + ) + +def Padding(length, pattern = "\x00", strict = False): + r"""a padding field (value is discarded) + * length - the length of the field. the length can be either an integer, + or a function that takes the context as an argument and returns the + length + * pattern - the padding pattern (character) to use. default is "\x00" + * strict - whether or not to raise an exception is the actual padding + pattern mismatches the desired pattern. default is False. + """ + return PaddingAdapter(Field(None, length), + pattern = pattern, + strict = strict, + ) + +def Flag(name, truth = 1, falsehood = 0, default = False): + """a flag field (True or False) + * name - the name of the field + * truth - the numeric value of truth. the default is 1. + * falsehood - the numeric value of falsehood. the default is 0. + * default - the default value to assume, when the value is neither + `truth` nor `falsehood`. the default is False. + """ + return SymmetricMapping(Field(name, 1), + {True : chr(truth), False : chr(falsehood)}, + default = default, + ) + +#=============================================================================== +# field shortcuts +#=============================================================================== +def Bit(name): + """a 1-bit BitField; must be enclosed in a BitStruct""" + return BitField(name, 1) +def Nibble(name): + """a 4-bit BitField; must be enclosed in a BitStruct""" + return BitField(name, 4) +def Octet(name): + """an 8-bit BitField; must be enclosed in a BitStruct""" + return BitField(name, 8) + +def UBInt8(name): + """unsigned, big endian 8-bit integer""" + return FormatField(name, ">", "B") +def UBInt16(name): + """unsigned, big endian 16-bit integer""" + return FormatField(name, ">", "H") +def UBInt32(name): + """unsigned, big endian 32-bit integer""" + return FormatField(name, ">", "L") +def UBInt64(name): + """unsigned, big endian 64-bit integer""" + return FormatField(name, ">", "Q") + +def SBInt8(name): + """signed, big endian 8-bit integer""" + return FormatField(name, ">", "b") +def SBInt16(name): + """signed, big endian 16-bit integer""" + return FormatField(name, ">", "h") +def SBInt32(name): + """signed, big endian 32-bit integer""" + return FormatField(name, ">", "l") +def SBInt64(name): + """signed, big endian 64-bit integer""" + return FormatField(name, ">", "q") + +def ULInt8(name): + """unsigned, little endian 8-bit integer""" + return FormatField(name, "<", "B") +def ULInt16(name): + """unsigned, little endian 16-bit integer""" + return FormatField(name, "<", "H") +def ULInt32(name): + """unsigned, little endian 32-bit integer""" + return FormatField(name, "<", "L") +def ULInt64(name): + """unsigned, little endian 64-bit integer""" + return FormatField(name, "<", "Q") + +def SLInt8(name): + """signed, little endian 8-bit integer""" + return FormatField(name, "<", "b") +def SLInt16(name): + """signed, little endian 16-bit integer""" + return FormatField(name, "<", "h") +def SLInt32(name): + """signed, little endian 32-bit integer""" + return FormatField(name, "<", "l") +def SLInt64(name): + """signed, little endian 64-bit integer""" + return FormatField(name, "<", "q") + +def UNInt8(name): + """unsigned, native endianity 8-bit integer""" + return FormatField(name, "=", "B") +def UNInt16(name): + """unsigned, native endianity 16-bit integer""" + return FormatField(name, "=", "H") +def UNInt32(name): + """unsigned, native endianity 32-bit integer""" + return FormatField(name, "=", "L") +def UNInt64(name): + """unsigned, native endianity 64-bit integer""" + return FormatField(name, "=", "Q") + +def SNInt8(name): + """signed, native endianity 8-bit integer""" + return FormatField(name, "=", "b") +def SNInt16(name): + """signed, native endianity 16-bit integer""" + return FormatField(name, "=", "h") +def SNInt32(name): + """signed, native endianity 32-bit integer""" + return FormatField(name, "=", "l") +def SNInt64(name): + """signed, native endianity 64-bit integer""" + return FormatField(name, "=", "q") + +def BFloat32(name): + """big endian, 32-bit IEEE floating point number""" + return FormatField(name, ">", "f") +def LFloat32(name): + """little endian, 32-bit IEEE floating point number""" + return FormatField(name, "<", "f") +def NFloat32(name): + """native endianity, 32-bit IEEE floating point number""" + return FormatField(name, "=", "f") + +def BFloat64(name): + """big endian, 64-bit IEEE floating point number""" + return FormatField(name, ">", "d") +def LFloat64(name): + """little endian, 64-bit IEEE floating point number""" + return FormatField(name, "<", "d") +def NFloat64(name): + """native endianity, 64-bit IEEE floating point number""" + return FormatField(name, "=", "d") + + +#=============================================================================== +# arrays +#=============================================================================== +def Array(count, subcon): + """array of subcon repeated count times. + * subcon - the subcon. + * count - an integer, or a function taking the context as an argument, + returning the count + """ + if callable(count): + con = MetaArray(count, subcon) + else: + con = MetaArray(lambda ctx: count, subcon) + con._clear_flag(con.FLAG_DYNAMIC) + return con + +def PrefixedArray(subcon, length_field = UBInt8("length")): + """an array prefixed by a length field. + * subcon - the subcon to be repeated + * length_field - an integer construct + """ + return LengthValueAdapter( + Sequence(subcon.name, + length_field, + Array(lambda ctx: ctx[length_field.name], subcon), + nested = False + ) + ) + +def OpenRange(mincount, subcon): + from sys import maxint + return Range(mincount, maxint, subcon) + +def GreedyRange(subcon): + """an open range (1 or more times) of repeated subcon. + * subcon - the subcon to repeat""" + return OpenRange(1, subcon) + +def OptionalGreedyRange(subcon): + """an open range (0 or more times) of repeated subcon. + * subcon - the subcon to repeat""" + return OpenRange(0, subcon) + + +#=============================================================================== +# subconstructs +#=============================================================================== +def Optional(subcon): + """an optional construct. if parsing fails, returns None. + * subcon - the subcon to optionally parse or build + """ + return Select(subcon.name, subcon, Pass) + +def Bitwise(subcon): + """converts the stream to bits, and passes the bitstream to subcon + * subcon - a bitwise construct (usually BitField) + """ + # subcons larger than MAX_BUFFER will be wrapped by Restream instead + # of Buffered. implementation details, don't stick your nose :) + MAX_BUFFER = 1024 * 8 + def resizer(length): + if length & 7: + raise SizeofError("size must be a multiple of 8", length) + return length >> 3 + if not subcon._is_flag(subcon.FLAG_DYNAMIC) and subcon.sizeof() < MAX_BUFFER: + con = Buffered(subcon, + encoder = decode_bin, + decoder = encode_bin, + resizer = resizer + ) + else: + con = Restream(subcon, + stream_reader = BitStreamReader, + stream_writer = BitStreamWriter, + resizer = resizer) + return con + +def Aligned(subcon, modulus = 4, pattern = "\x00"): + r"""aligns subcon to modulus boundary using padding pattern + * subcon - the subcon to align + * modulus - the modulus boundary (default is 4) + * pattern - the padding pattern (default is \x00) + """ + if modulus < 2: + raise ValueError("modulus must be >= 2", modulus) + if modulus in (2, 4, 8, 16, 32, 64, 128, 256, 512, 1024): + def padlength(ctx): + m1 = modulus - 1 + return (modulus - (subcon._sizeof(ctx) & m1)) & m1 + else: + def padlength(ctx): + return (modulus - (subcon._sizeof(ctx) % modulus)) % modulus + return IndexingAdapter( + Sequence(subcon.name, + subcon, + Padding(padlength, pattern = pattern), + nested = False, + ), + 0 + ) + +def Embedded(subcon): + """embeds a struct into the enclosing struct. + * subcon - the struct to embed + """ + return Reconfig(subcon.name, subcon, subcon.FLAG_EMBED) + +def Rename(newname, subcon): + """renames an existing construct + * newname - the new name + * subcon - the subcon to rename + """ + return Reconfig(newname, subcon) + +def Alias(newname, oldname): + """creates an alias for an existing element in a struct + * newname - the new name + * oldname - the name of an existing element + """ + return Value(newname, lambda ctx: ctx[oldname]) + + +#=============================================================================== +# mapping +#=============================================================================== +def SymmetricMapping(subcon, mapping, default = NotImplemented): + """defines a symmetrical mapping: a->b, b->a. + * subcon - the subcon to map + * mapping - the encoding mapping (a dict); the decoding mapping is + achieved by reversing this mapping + * default - the default value to use when no mapping is found. if no + default value is given, and exception is raised. setting to Pass would + return the value "as is" (unmapped) + """ + reversed_mapping = dict((v, k) for k, v in mapping.iteritems()) + return MappingAdapter(subcon, + encoding = mapping, + decoding = reversed_mapping, + encdefault = default, + decdefault = default, + ) + +def Enum(subcon, **kw): + """a set of named values mapping. + * subcon - the subcon to map + * kw - keyword arguments which serve as the encoding mapping + * _default_ - an optional, keyword-only argument that specifies the + default value to use when the mapping is undefined. if not given, + and exception is raised when the mapping is undefined. use `Pass` to + pass the unmapped value as-is + """ + return SymmetricMapping(subcon, kw, kw.pop("_default_", NotImplemented)) + +def FlagsEnum(subcon, **kw): + """a set of flag values mapping. + * subcon - the subcon to map + * kw - keyword arguments which serve as the encoding mapping + """ + return FlagsAdapter(subcon, kw) + + +#=============================================================================== +# structs +#=============================================================================== +def AlignedStruct(name, *subcons, **kw): + """a struct of aligned fields + * name - the name of the struct + * subcons - the subcons that make up this structure + * kw - keyword arguments to pass to Aligned: 'modulus' and 'pattern' + """ + return Struct(name, *(Aligned(sc, **kw) for sc in subcons)) + +def BitStruct(name, *subcons): + """a struct of bitwise fields + * name - the name of the struct + * subcons - the subcons that make up this structure + """ + return Bitwise(Struct(name, *subcons)) + +def EmbeddedBitStruct(*subcons): + """an embedded BitStruct. no name is necessary. + * subcons - the subcons that make up this structure + """ + return Bitwise(Embedded(Struct(None, *subcons))) + +#=============================================================================== +# strings +#=============================================================================== +def String(name, length, encoding = None, padchar = None, + paddir = "right", trimdir = "right"): + """a fixed-length, optionally padded string of characters + * name - the name of the field + * length - the length (integer) + * encoding - the encoding to use (e.g., "utf8"), or None, for raw bytes. + default is None + * padchar - the padding character (commonly "\x00"), or None to + disable padding. default is None + * paddir - the direction where padding is placed ("right", "left", or + "center"). the default is "right". this argument is meaningless if + padchar is None. + * trimdir - the direction where trimming will take place ("right" or + "left"). the default is "right". trimming is only meaningful for + building, when the given string is too long. this argument is + meaningless if padchar is None. + """ + con = StringAdapter(Field(name, length), encoding = encoding) + if padchar is not None: + con = PaddedStringAdapter(con, + padchar = padchar, + paddir = paddir, + trimdir = trimdir + ) + return con + +def PascalString(name, length_field = UBInt8("length"), encoding = None): + """a string prefixed with a length field. the data must directly follow + the length field. + * name - the name of the + * length_field - a numeric construct (i.e., UBInt8) that holds the + length. default is an unsigned, 8-bit integer field. note that this + argument must pass an instance of a construct, not a class + (`UBInt8("length")` rather than `UBInt8`) + * encoding - the encoding to use (e.g., "utf8"), or None, for raw bytes. + default is None + """ + return StringAdapter( + LengthValueAdapter( + Sequence(name, + length_field, + Field("data", lambda ctx: ctx[length_field.name]), + ) + ), + encoding = encoding, + ) + +def CString(name, terminators = "\x00", encoding = None, + char_field = Field(None, 1)): + r"""a c-style string (string terminated by a terminator char) + * name - the name fo the string + * terminators - a sequence of terminator chars. default is "\x00". + * encoding - the encoding to use (e.g., "utf8"), or None, for raw bytes. + default is None + * char_field - the construct that represents a single character. default + is a one-byte character. note that this argument must be an instance + of a construct, not a construct class (`Field("char", 1)` rather than + `Field`) + """ + return Rename(name, + CStringAdapter( + RepeatUntil(lambda obj, ctx: obj in terminators, + char_field, + ), + terminators = terminators, + encoding = encoding, + ) + ) + + +#=============================================================================== +# conditional +#=============================================================================== +def IfThenElse(name, predicate, then_subcon, else_subcon): + """an if-then-else conditional construct: if the predicate indicates True, + `then_subcon` will be used; otherwise `else_subcon` + * name - the name of the construct + * predicate - a function taking the context as an argument and returning + True or False + * then_subcon - the subcon that will be used if the predicate returns True + * else_subcon - the subcon that will be used if the predicate returns False + """ + return Switch(name, lambda ctx: bool(predicate(ctx)), + { + True : then_subcon, + False : else_subcon, + } + ) + +def If(predicate, subcon, elsevalue = None): + """an if-then conditional construct: if the predicate indicates True, + subcon will be used; otherwise, `elsevalue` will be returned instead. + * predicate - a function taking the context as an argument and returning + True or False + * subcon - the subcon that will be used if the predicate returns True + * elsevalue - the value that will be used should the predicate return False. + by default this value is None. + """ + return IfThenElse(subcon.name, + predicate, + subcon, + Value("elsevalue", lambda ctx: elsevalue) + ) + + +#=============================================================================== +# misc +#=============================================================================== +def OnDemandPointer(offsetfunc, subcon, force_build = True): + """an on-demand pointer. + * offsetfunc - a function taking the context as an argument and returning + the absolute stream position + * subcon - the subcon that will be parsed from the `offsetfunc()` stream + position on demand + * force_build - see OnDemand. by default True. + """ + return OnDemand(Pointer(offsetfunc, subcon), + advance_stream = False, + force_build = force_build + ) + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/elftools/construct/text.py b/elftools/construct/text.py index dee47a1..e461dff 100644 --- a/elftools/construct/text.py +++ b/elftools/construct/text.py @@ -1,286 +1,286 @@ -from core import * -from adapters import * -from macros import * - - -#=============================================================================== -# exceptions -#=============================================================================== -class QuotedStringError(ConstructError): - __slots__ = [] - - -#=============================================================================== -# constructs -#=============================================================================== -class QuotedString(Construct): - r""" - A quoted string (begins with an opening-quote, terminated by a - closing-quote, which may be escaped by an escape character) - - Parameters: - * name - the name of the field - * start_quote - the opening quote character. default is '"' - * end_quote - the closing quote character. default is '"' - * esc_char - the escape character, or None to disable escaping. defualt - is "\" (backslash) - * encoding - the character encoding (e.g., "utf8"), or None to return - raw bytes. defualt is None. - * allow_eof - whether to allow EOF before the closing quote is matched. - if False, an exception will be raised when EOF is reached by the closing - quote is missing. default is False. - - Example: - QuotedString("foo", start_quote = "{", end_quote = "}", esc_char = None) - """ - __slots__ = [ - "start_quote", "end_quote", "char", "esc_char", "encoding", - "allow_eof" - ] - def __init__(self, name, start_quote = '"', end_quote = None, - esc_char = '\\', encoding = None, allow_eof = False): - Construct.__init__(self, name) - if end_quote is None: - end_quote = start_quote - self.start_quote = Literal(start_quote) - self.char = Char("char") - self.end_quote = end_quote - self.esc_char = esc_char - self.encoding = encoding - self.allow_eof = allow_eof - - def _parse(self, stream, context): - self.start_quote._parse(stream, context) - text = [] - escaped = False - try: - while True: - ch = self.char._parse(stream, context) - if ch == self.esc_char: - if escaped: - text.append(ch) - escaped = False - else: - escaped = True - elif ch == self.end_quote and not escaped: - break - else: - text.append(ch) - escaped = False - except FieldError: - if not self.allow_eof: - raise - text = "".join(text) - if self.encoding is not None: - text = text.decode(self.encoding) - return text - - def _build(self, obj, stream, context): - self.start_quote._build(None, stream, context) - if self.encoding: - obj = obj.encode(self.encoding) - for ch in obj: - if ch == self.esc_char: - self.char._build(self.esc_char, stream, context) - elif ch == self.end_quote: - if self.esc_char is None: - raise QuotedStringError("found ending quote in data, " - "but no escape char defined", ch) - else: - self.char._build(self.esc_char, stream, context) - self.char._build(ch, stream, context) - self.char._build(self.end_quote, stream, context) - - def _sizeof(self, context): - raise SizeofError("can't calculate size") - - -#=============================================================================== -# macros -#=============================================================================== -class WhitespaceAdapter(Adapter): - """ - Adapter for whitespace sequences; do not use directly. - See Whitespace. - - Parameters: - * subcon - the subcon to adapt - * build_char - the character used for encoding (building) - """ - __slots__ = ["build_char"] - def __init__(self, subcon, build_char): - Adapter.__init__(self, subcon) - self.build_char = build_char - def _encode(self, obj, context): - return self.build_char - def _decode(self, obj, context): - return None - -def Whitespace(charset = " \t", optional = True): - """whitespace (space that is ignored between tokens). when building, the - first character of the charset is used. - * charset - the set of characters that are considered whitespace. default - is space and tab. - * optional - whether or not whitespace is optional. default is True. - """ - con = CharOf(None, charset) - if optional: - con = OptionalGreedyRange(con) - else: - con = GreedyRange(con) - return WhitespaceAdapter(con, build_char = charset[0]) - -def Literal(text): - """matches a literal string in the text - * text - the text (string) to match - """ - return ConstAdapter(Field(None, len(text)), text) - -def Char(name): - """a one-byte character""" - return Field(name, 1) - -def CharOf(name, charset): - """matches only characters of a given charset - * name - the name of the field - * charset - the set of valid characters - """ - return OneOf(Char(name), charset) - -def CharNoneOf(name, charset): - """matches only characters that do not belong to a given charset - * name - the name of the field - * charset - the set of invalid characters - """ - return NoneOf(Char(name), charset) - -def Alpha(name): - """a letter character (A-Z, a-z)""" - return CharOf(name, set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')) - -def Digit(name): - """a digit character (0-9)""" - return CharOf(name, set('0123456789')) - -def AlphaDigit(name): - """an alphanumeric character (A-Z, a-z, 0-9)""" - return CharOf(name, set("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")) - -def BinDigit(name): - """a binary digit (0-1)""" - return CharOf(name, set('01')) - -def HexDigit(name): - """a hexadecimal digit (0-9, A-F, a-f)""" - return CharOf(name, set('0123456789abcdefABCDEF')) - -def Word(name): - """a sequence of letters""" - return StringAdapter(GreedyRange(Alpha(name))) - -class TextualIntAdapter(Adapter): - """ - Adapter for textual integers - - Parameters: - * subcon - the subcon to adapt - * radix - the base of the integer (decimal, hexadecimal, binary, ...) - * digits - the sequence of digits of that radix - """ - __slots__ = ["radix", "digits"] - def __init__(self, subcon, radix = 10, digits = "0123456789abcdef"): - Adapter.__init__(self, subcon) - if radix > len(digits): - raise ValueError("not enough digits for radix %d" % (radix,)) - self.radix = radix - self.digits = digits - def _encode(self, obj, context): - chars = [] - if obj < 0: - chars.append("-") - n = -obj - else: - n = obj - r = self.radix - digs = self.digits - while n > 0: - n, d = divmod(n, r) - chars.append(digs[d]) - # obj2 = "".join(reversed(chars)) - # filler = digs[0] * (self._sizeof(context) - len(obj2)) - # return filler + obj2 - return "".join(reversed(chars)) - def _decode(self, obj, context): - return int("".join(obj), self.radix) - -def DecNumber(name): - """decimal number""" - return TextualIntAdapter(GreedyRange(Digit(name))) - -def BinNumber(name): - """binary number""" - return TextualIntAdapter(GreedyRange(Digit(name)), 2) - -def HexNumber(name): - """hexadecimal number""" - return TextualIntAdapter(GreedyRange(Digit(name)), 16) - -def StringUpto(name, charset): - """a string that stretches up to a terminator, or EOF. unlike CString, - StringUpto will no consume the terminator char. - * name - the name of the field - * charset - the set of terminator characters""" - return StringAdapter(OptionalGreedyRange(CharNoneOf(name, charset))) - -def Line(name): - r"""a textual line (up to "\n")""" - return StringUpto(name, "\n") - -class IdentifierAdapter(Adapter): - """ - Adapter for programmatic identifiers - - Parameters: - * subcon - the subcon to adapt - """ - def _encode(self, obj, context): - return obj[0], obj[1:] - def _decode(self, obj, context): - return obj[0] + "".join(obj[1]) - -def Identifier(name, - headset = set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_"), - tailset = set("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_") - ): - """a programmatic identifier (symbol). must start with a char of headset, - followed by a sequence of tailset characters - * name - the name of the field - * headset - charset for the first character. default is A-Z, a-z, and _ - * tailset - charset for the tail. default is A-Z, a-z, 0-9 and _ - """ - return IdentifierAdapter( - Sequence(name, - CharOf("head", headset), - OptionalGreedyRange(CharOf("tail", tailset)), - ) - ) - - - - - - - - - - - - - - - - - - - - +from core import * +from adapters import * +from macros import * + + +#=============================================================================== +# exceptions +#=============================================================================== +class QuotedStringError(ConstructError): + __slots__ = [] + + +#=============================================================================== +# constructs +#=============================================================================== +class QuotedString(Construct): + r""" + A quoted string (begins with an opening-quote, terminated by a + closing-quote, which may be escaped by an escape character) + + Parameters: + * name - the name of the field + * start_quote - the opening quote character. default is '"' + * end_quote - the closing quote character. default is '"' + * esc_char - the escape character, or None to disable escaping. defualt + is "\" (backslash) + * encoding - the character encoding (e.g., "utf8"), or None to return + raw bytes. defualt is None. + * allow_eof - whether to allow EOF before the closing quote is matched. + if False, an exception will be raised when EOF is reached by the closing + quote is missing. default is False. + + Example: + QuotedString("foo", start_quote = "{", end_quote = "}", esc_char = None) + """ + __slots__ = [ + "start_quote", "end_quote", "char", "esc_char", "encoding", + "allow_eof" + ] + def __init__(self, name, start_quote = '"', end_quote = None, + esc_char = '\\', encoding = None, allow_eof = False): + Construct.__init__(self, name) + if end_quote is None: + end_quote = start_quote + self.start_quote = Literal(start_quote) + self.char = Char("char") + self.end_quote = end_quote + self.esc_char = esc_char + self.encoding = encoding + self.allow_eof = allow_eof + + def _parse(self, stream, context): + self.start_quote._parse(stream, context) + text = [] + escaped = False + try: + while True: + ch = self.char._parse(stream, context) + if ch == self.esc_char: + if escaped: + text.append(ch) + escaped = False + else: + escaped = True + elif ch == self.end_quote and not escaped: + break + else: + text.append(ch) + escaped = False + except FieldError: + if not self.allow_eof: + raise + text = "".join(text) + if self.encoding is not None: + text = text.decode(self.encoding) + return text + + def _build(self, obj, stream, context): + self.start_quote._build(None, stream, context) + if self.encoding: + obj = obj.encode(self.encoding) + for ch in obj: + if ch == self.esc_char: + self.char._build(self.esc_char, stream, context) + elif ch == self.end_quote: + if self.esc_char is None: + raise QuotedStringError("found ending quote in data, " + "but no escape char defined", ch) + else: + self.char._build(self.esc_char, stream, context) + self.char._build(ch, stream, context) + self.char._build(self.end_quote, stream, context) + + def _sizeof(self, context): + raise SizeofError("can't calculate size") + + +#=============================================================================== +# macros +#=============================================================================== +class WhitespaceAdapter(Adapter): + """ + Adapter for whitespace sequences; do not use directly. + See Whitespace. + + Parameters: + * subcon - the subcon to adapt + * build_char - the character used for encoding (building) + """ + __slots__ = ["build_char"] + def __init__(self, subcon, build_char): + Adapter.__init__(self, subcon) + self.build_char = build_char + def _encode(self, obj, context): + return self.build_char + def _decode(self, obj, context): + return None + +def Whitespace(charset = " \t", optional = True): + """whitespace (space that is ignored between tokens). when building, the + first character of the charset is used. + * charset - the set of characters that are considered whitespace. default + is space and tab. + * optional - whether or not whitespace is optional. default is True. + """ + con = CharOf(None, charset) + if optional: + con = OptionalGreedyRange(con) + else: + con = GreedyRange(con) + return WhitespaceAdapter(con, build_char = charset[0]) + +def Literal(text): + """matches a literal string in the text + * text - the text (string) to match + """ + return ConstAdapter(Field(None, len(text)), text) + +def Char(name): + """a one-byte character""" + return Field(name, 1) + +def CharOf(name, charset): + """matches only characters of a given charset + * name - the name of the field + * charset - the set of valid characters + """ + return OneOf(Char(name), charset) + +def CharNoneOf(name, charset): + """matches only characters that do not belong to a given charset + * name - the name of the field + * charset - the set of invalid characters + """ + return NoneOf(Char(name), charset) + +def Alpha(name): + """a letter character (A-Z, a-z)""" + return CharOf(name, set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')) + +def Digit(name): + """a digit character (0-9)""" + return CharOf(name, set('0123456789')) + +def AlphaDigit(name): + """an alphanumeric character (A-Z, a-z, 0-9)""" + return CharOf(name, set("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")) + +def BinDigit(name): + """a binary digit (0-1)""" + return CharOf(name, set('01')) + +def HexDigit(name): + """a hexadecimal digit (0-9, A-F, a-f)""" + return CharOf(name, set('0123456789abcdefABCDEF')) + +def Word(name): + """a sequence of letters""" + return StringAdapter(GreedyRange(Alpha(name))) + +class TextualIntAdapter(Adapter): + """ + Adapter for textual integers + + Parameters: + * subcon - the subcon to adapt + * radix - the base of the integer (decimal, hexadecimal, binary, ...) + * digits - the sequence of digits of that radix + """ + __slots__ = ["radix", "digits"] + def __init__(self, subcon, radix = 10, digits = "0123456789abcdef"): + Adapter.__init__(self, subcon) + if radix > len(digits): + raise ValueError("not enough digits for radix %d" % (radix,)) + self.radix = radix + self.digits = digits + def _encode(self, obj, context): + chars = [] + if obj < 0: + chars.append("-") + n = -obj + else: + n = obj + r = self.radix + digs = self.digits + while n > 0: + n, d = divmod(n, r) + chars.append(digs[d]) + # obj2 = "".join(reversed(chars)) + # filler = digs[0] * (self._sizeof(context) - len(obj2)) + # return filler + obj2 + return "".join(reversed(chars)) + def _decode(self, obj, context): + return int("".join(obj), self.radix) + +def DecNumber(name): + """decimal number""" + return TextualIntAdapter(GreedyRange(Digit(name))) + +def BinNumber(name): + """binary number""" + return TextualIntAdapter(GreedyRange(Digit(name)), 2) + +def HexNumber(name): + """hexadecimal number""" + return TextualIntAdapter(GreedyRange(Digit(name)), 16) + +def StringUpto(name, charset): + """a string that stretches up to a terminator, or EOF. unlike CString, + StringUpto will no consume the terminator char. + * name - the name of the field + * charset - the set of terminator characters""" + return StringAdapter(OptionalGreedyRange(CharNoneOf(name, charset))) + +def Line(name): + r"""a textual line (up to "\n")""" + return StringUpto(name, "\n") + +class IdentifierAdapter(Adapter): + """ + Adapter for programmatic identifiers + + Parameters: + * subcon - the subcon to adapt + """ + def _encode(self, obj, context): + return obj[0], obj[1:] + def _decode(self, obj, context): + return obj[0] + "".join(obj[1]) + +def Identifier(name, + headset = set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_"), + tailset = set("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_") + ): + """a programmatic identifier (symbol). must start with a char of headset, + followed by a sequence of tailset characters + * name - the name of the field + * headset - charset for the first character. default is A-Z, a-z, and _ + * tailset - charset for the tail. default is A-Z, a-z, 0-9 and _ + """ + return IdentifierAdapter( + Sequence(name, + CharOf("head", headset), + OptionalGreedyRange(CharOf("tail", tailset)), + ) + ) + + + + + + + + + + + + + + + + + + + + diff --git a/elftools/elf/constants.py b/elftools/elf/constants.py index cd21e09..1d49d2b 100644 --- a/elftools/elf/constants.py +++ b/elftools/elf/constants.py @@ -1,46 +1,46 @@ -#------------------------------------------------------------------------------- -# elftools: elf/constants.py -# -# Constants and flags, placed into classes for namespacing -# -# Eli Bendersky (eliben@gmail.com) -# This code is in the public domain -#------------------------------------------------------------------------------- -class SHN_INDICES(object): +#------------------------------------------------------------------------------- +# elftools: elf/constants.py +# +# Constants and flags, placed into classes for namespacing +# +# Eli Bendersky (eliben@gmail.com) +# This code is in the public domain +#------------------------------------------------------------------------------- +class SHN_INDICES(object): """ Special section indices - """ - SHN_UNDEF=0 - SHN_LORESERVE=0xff00 - SHN_LOPROC=0xff00 - SHN_HIPROC=0xff1f - SHN_ABS=0xfff1 - SHN_COMMON=0xfff2 + """ + SHN_UNDEF=0 + SHN_LORESERVE=0xff00 + SHN_LOPROC=0xff00 + SHN_HIPROC=0xff1f + SHN_ABS=0xfff1 + SHN_COMMON=0xfff2 SHN_HIRESERVE=0xffff - - -class SH_FLAGS(object): + + +class SH_FLAGS(object): """ Flag values for the sh_flags field of section headers - """ - SHF_WRITE=0x1 - SHF_ALLOC=0x2 - SHF_EXECINSTR=0x4 - SHF_MERGE=0x10 - SHF_STRINGS=0x20 - SHF_INFO_LINK=0x40 - SHF_LINK_ORDER=0x80 - SHF_OS_NONCONFORMING=0x100 - SHF_GROUP=0x200 - SHF_TLS=0x400 - SHF_MASKOS=0x0ff00000 - SHF_MASKPROC=0xf0000000 - - -class P_FLAGS(object): + """ + SHF_WRITE=0x1 + SHF_ALLOC=0x2 + SHF_EXECINSTR=0x4 + SHF_MERGE=0x10 + SHF_STRINGS=0x20 + SHF_INFO_LINK=0x40 + SHF_LINK_ORDER=0x80 + SHF_OS_NONCONFORMING=0x100 + SHF_GROUP=0x200 + SHF_TLS=0x400 + SHF_MASKOS=0x0ff00000 + SHF_MASKPROC=0xf0000000 + + +class P_FLAGS(object): """ Flag values for the p_flags field of program headers - """ - PF_X=0x1 - PF_W=0x2 - PF_R=0x4 - PF_MASKOS=0x00FF0000 - PF_MASKPROC=0xFF000000 - + """ + PF_X=0x1 + PF_W=0x2 + PF_R=0x4 + PF_MASKOS=0x00FF0000 + PF_MASKPROC=0xFF000000 + diff --git a/elftools/elf/elffile.py b/elftools/elf/elffile.py index 6f2e723..dc1b246 100644 --- a/elftools/elf/elffile.py +++ b/elftools/elf/elffile.py @@ -1,177 +1,177 @@ -#------------------------------------------------------------------------------- -# elftools: elf/elffile.py -# -# ELFFile - main class for accessing ELF files -# -# Eli Bendersky (eliben@gmail.com) -# This code is in the public domain -#------------------------------------------------------------------------------- -from ..common.exceptions import ELFError -from ..common.utils import struct_parse, elf_assert -from ..construct import ConstructError -from .structs import ELFStructs -from .sections import Section, StringTableSection, SymbolTableSection -from .segments import Segment - - -class ELFFile(object): - """ Accessible attributes: - - elfclass: - 32 or 64 - specifies the word size of the target machine - - little_endian: - boolean - specifies the target machine's endianness - - header: - the complete ELF file header +#------------------------------------------------------------------------------- +# elftools: elf/elffile.py +# +# ELFFile - main class for accessing ELF files +# +# Eli Bendersky (eliben@gmail.com) +# This code is in the public domain +#------------------------------------------------------------------------------- +from ..common.exceptions import ELFError +from ..common.utils import struct_parse, elf_assert +from ..construct import ConstructError +from .structs import ELFStructs +from .sections import Section, StringTableSection, SymbolTableSection +from .segments import Segment + + +class ELFFile(object): + """ Accessible attributes: + + elfclass: + 32 or 64 - specifies the word size of the target machine + + little_endian: + boolean - specifies the target machine's endianness + + header: + the complete ELF file header """ def __init__(self, stream): - self.stream = stream - self._identify_file() - self.structs = ELFStructs( - little_endian=self.little_endian, - elfclass=self.elfclass) - self.header = self._parse_elf_header() - - self._file_stringtable_section = self._get_file_stringtable() - + self.stream = stream + self._identify_file() + self.structs = ELFStructs( + little_endian=self.little_endian, + elfclass=self.elfclass) + self.header = self._parse_elf_header() + + self._file_stringtable_section = self._get_file_stringtable() + def num_sections(self): """ Number of sections in the file - """ - return self['e_shnum'] - + """ + return self['e_shnum'] + def get_section(self, n): - """ Get the section at index #n from the file (Section object or a + """ Get the section at index #n from the file (Section object or a subclass) - """ - section_header = self._get_section_header(n) - return self._make_section(section_header) - + """ + section_header = self._get_section_header(n) + return self._make_section(section_header) + def iter_sections(self): """ Yield all the sections in the file - """ - for i in range(self.num_sections()): - yield self.get_section(i) - + """ + for i in range(self.num_sections()): + yield self.get_section(i) + def num_segments(self): """ Number of segments in the file - """ - return self['e_phnum'] - + """ + return self['e_phnum'] + def get_segment(self, n): """ Get the segment at index #n from the file (Segment object) - """ - segment_header = self._get_segment_header(n) - return Segment(segment_header, self.stream) - + """ + segment_header = self._get_segment_header(n) + return Segment(segment_header, self.stream) + def iter_segments(self): """ Yield all the segments in the file - """ - for i in range(self.num_segments()): - yield self.get_segment(i) - - #-------------------------------- PRIVATE --------------------------------# - + """ + for i in range(self.num_segments()): + yield self.get_segment(i) + + #-------------------------------- PRIVATE --------------------------------# + def __getitem__(self, name): """ Implement dict-like access to header entries - """ - return self.header[name] - + """ + return self.header[name] + def _identify_file(self): """ Verify the ELF file and identify its class and endianness. - """ - # Note: this code reads the stream directly, without using ELFStructs, - # since we don't yet know its exact format. ELF was designed to be - # read like this - its e_ident field is word-size and endian agnostic. - # - self.stream.seek(0) - magic = self.stream.read(4) - elf_assert(magic == '\x7fELF', 'Magic number does not match') - - ei_class = self.stream.read(1) - if ei_class == '\x01': - self.elfclass = 32 - elif ei_class == '\x02': - self.elfclass = 64 - else: - raise ELFError('Invalid EI_CLASS %s' % repr(ei_class)) - - ei_data = self.stream.read(1) - if ei_data == '\x01': - self.little_endian = True - elif ei_data == '\x02': - self.little_endian = False - else: - raise ELFError('Invalid EI_DATA %s' % repr(ei_data)) - + """ + # Note: this code reads the stream directly, without using ELFStructs, + # since we don't yet know its exact format. ELF was designed to be + # read like this - its e_ident field is word-size and endian agnostic. + # + self.stream.seek(0) + magic = self.stream.read(4) + elf_assert(magic == '\x7fELF', 'Magic number does not match') + + ei_class = self.stream.read(1) + if ei_class == '\x01': + self.elfclass = 32 + elif ei_class == '\x02': + self.elfclass = 64 + else: + raise ELFError('Invalid EI_CLASS %s' % repr(ei_class)) + + ei_data = self.stream.read(1) + if ei_data == '\x01': + self.little_endian = True + elif ei_data == '\x02': + self.little_endian = False + else: + raise ELFError('Invalid EI_DATA %s' % repr(ei_data)) + def _section_offset(self, n): """ Compute the offset of section #n in the file - """ - return self['e_shoff'] + n * self['e_shentsize'] - + """ + return self['e_shoff'] + n * self['e_shentsize'] + def _segment_offset(self, n): """ Compute the offset of segment #n in the file - """ - return self['e_phoff'] + n * self['e_phentsize'] - + """ + return self['e_phoff'] + n * self['e_phentsize'] + def _get_section_header(self, n): """ Find the header of section #n, parse it and return the struct - """ - return struct_parse( - self.structs.Elf_Shdr, - self.stream, - stream_pos=self._section_offset(n)) - - def _get_section_name(self, section_header): - """ Given a section header, find this section's name in the file's - string table - """ - name_offset = section_header['sh_name'] - return self._file_stringtable_section.get_string(name_offset) - + """ + return struct_parse( + self.structs.Elf_Shdr, + self.stream, + stream_pos=self._section_offset(n)) + + def _get_section_name(self, section_header): + """ Given a section header, find this section's name in the file's + string table + """ + name_offset = section_header['sh_name'] + return self._file_stringtable_section.get_string(name_offset) + def _make_section(self, section_header): """ Create a section object of the appropriate type - """ - name = self._get_section_name(section_header) - sectype = section_header['sh_type'] - - if sectype == 'SHT_STRTAB': - return StringTableSection(section_header, name, self.stream) - elif sectype in ('SHT_SYMTAB', 'SHT_DYNSYM'): - return self._make_symbol_table_section(section_header, name) - else: - return Section(section_header, name, self.stream) - + """ + name = self._get_section_name(section_header) + sectype = section_header['sh_type'] + + if sectype == 'SHT_STRTAB': + return StringTableSection(section_header, name, self.stream) + elif sectype in ('SHT_SYMTAB', 'SHT_DYNSYM'): + return self._make_symbol_table_section(section_header, name) + else: + return Section(section_header, name, self.stream) + def _make_symbol_table_section(self, section_header, name): """ Create a SymbolTableSection - """ - linked_strtab_index = section_header['sh_link'] - strtab_section = self.get_section(linked_strtab_index) - return SymbolTableSection( - section_header, name, self.stream, - stringtable=strtab_section) - + """ + linked_strtab_index = section_header['sh_link'] + strtab_section = self.get_section(linked_strtab_index) + return SymbolTableSection( + section_header, name, self.stream, + stringtable=strtab_section) + def _get_segment_header(self, n): """ Find the header of segment #n, parse it and return the struct - """ - return struct_parse( - self.structs.Elf_Phdr, - self.stream, - stream_pos=self._segment_offset(n)) - + """ + return struct_parse( + self.structs.Elf_Phdr, + self.stream, + stream_pos=self._segment_offset(n)) + def _get_file_stringtable(self): """ Find the file's string table section - """ - stringtable_section_num = self['e_shstrndx'] - return StringTableSection( - header=self._get_section_header(stringtable_section_num), - name='', - stream=self.stream) - + """ + stringtable_section_num = self['e_shstrndx'] + return StringTableSection( + header=self._get_section_header(stringtable_section_num), + name='', + stream=self.stream) + def _parse_elf_header(self): - """ Parses the ELF file header and assigns the result to attributes + """ Parses the ELF file header and assigns the result to attributes of this object. - """ - return struct_parse(self.structs.Elf_Ehdr, self.stream, stream_pos=0) - + """ + return struct_parse(self.structs.Elf_Ehdr, self.stream, stream_pos=0) + diff --git a/elftools/elf/enums.py b/elftools/elf/enums.py index cdaf316..31647ac 100644 --- a/elftools/elf/enums.py +++ b/elftools/elf/enums.py @@ -1,107 +1,107 @@ -#------------------------------------------------------------------------------- -# elftools: elf/enums.py -# -# Mappings of enum names to values -# -# Eli Bendersky (eliben@gmail.com) -# This code is in the public domain -#------------------------------------------------------------------------------- - -# e_ident[EI_CLASS] in the ELF header -ENUM_EI_CLASS = dict( - ELFCLASSNONE=0, - ELFCLASS32=1, - ELFCLASS64=2 -) - -# e_ident[EI_DATA] in the ELF header -ENUM_EI_DATA = dict( - ELFDATANONE=0, - ELFDATA2LSB=1, - ELFDATA2MSB=2 -) - -# e_version in the ELF header -ENUM_E_VERSION = dict( - EV_NONE=0, - EV_CURRENT=1 -) - -# e_type in the ELF header -ENUM_E_TYPE = dict( - ET_NONE=0, - ET_REL=1, - ET_EXEC=2, - ET_DYN=3, - ET_CORE=4, - ET_LOPROC=0xff00, - ET_HIPROC=0xffff, - _default_='PROC_SPECIFIC', -) - -# e_machine in the ELF header -# (this list is currently somewhat partial...) -ENUM_E_MACHINE = dict( - EM_NONE=0, - EM_M32=1, - EM_SPARC=2, - EM_386=3, - EM_68K=4, - EM_88K=5, - EM_860=7, - EM_MIPS=8, - EM_S370=9, - EM_MIPS_RS4_BE=10, - EM_IA_64=50, - EM_X86_64=62, - EM_AVR=83, - _default_='RESERVED', -) - -# sh_type in the section header -ENUM_SH_TYPE = dict( - SHT_NULL=0, - SHT_PROGBITS=1, - SHT_SYMTAB=2, - SHT_STRTAB=3, - SHT_RELA=4, - SHT_HASH=5, - SHT_DYNAMIC=6, - SHT_NOTE=7, - SHT_NOBITS=8, - SHT_REL=9, - SHT_SHLIB=10, - SHT_DYNSYM=11, - SHT_INIT_ARRAY=14, - SHT_FINI_ARRAY=15, - SHT_PREINIT_ARRAY=16, - SHT_GROUP=17, - SHT_SYMTAB_SHNDX=18, - SHT_NUM=19, - SHT_LOOS=0x60000000, - SHT_HIOS=0x6fffffff, - SHT_LOPROC=0x70000000, - SHT_HIPROC=0x7fffffff, - SHT_LOUSER=0x80000000, - SHT_HIUSER=0xffffffff, - SHT_AMD64_UNWIND=0x70000001, - _default_='RESERVED', -) - -# p_type in the program header -# some values scavenged from the ELF headers in binutils-2.21 -ENUM_P_TYPE = dict( - PT_NULL=0, - PT_LOAD=1, - PT_DYNAMIC=2, - PT_INTERP=3, - PT_NOTE=4, - PT_SHLIB=5, - PT_PHDR=6, - PT_LOPROC=0x70000000, - PT_HIPROC=0x7fffffff, - PT_GNU_EH_FRAME=0x6474e550, - PT_GNU_STACK=0x6474e551, - PT_GNU_RELRO=0x6474e552, -) - +#------------------------------------------------------------------------------- +# elftools: elf/enums.py +# +# Mappings of enum names to values +# +# Eli Bendersky (eliben@gmail.com) +# This code is in the public domain +#------------------------------------------------------------------------------- + +# e_ident[EI_CLASS] in the ELF header +ENUM_EI_CLASS = dict( + ELFCLASSNONE=0, + ELFCLASS32=1, + ELFCLASS64=2 +) + +# e_ident[EI_DATA] in the ELF header +ENUM_EI_DATA = dict( + ELFDATANONE=0, + ELFDATA2LSB=1, + ELFDATA2MSB=2 +) + +# e_version in the ELF header +ENUM_E_VERSION = dict( + EV_NONE=0, + EV_CURRENT=1 +) + +# e_type in the ELF header +ENUM_E_TYPE = dict( + ET_NONE=0, + ET_REL=1, + ET_EXEC=2, + ET_DYN=3, + ET_CORE=4, + ET_LOPROC=0xff00, + ET_HIPROC=0xffff, + _default_='PROC_SPECIFIC', +) + +# e_machine in the ELF header +# (this list is currently somewhat partial...) +ENUM_E_MACHINE = dict( + EM_NONE=0, + EM_M32=1, + EM_SPARC=2, + EM_386=3, + EM_68K=4, + EM_88K=5, + EM_860=7, + EM_MIPS=8, + EM_S370=9, + EM_MIPS_RS4_BE=10, + EM_IA_64=50, + EM_X86_64=62, + EM_AVR=83, + _default_='RESERVED', +) + +# sh_type in the section header +ENUM_SH_TYPE = dict( + SHT_NULL=0, + SHT_PROGBITS=1, + SHT_SYMTAB=2, + SHT_STRTAB=3, + SHT_RELA=4, + SHT_HASH=5, + SHT_DYNAMIC=6, + SHT_NOTE=7, + SHT_NOBITS=8, + SHT_REL=9, + SHT_SHLIB=10, + SHT_DYNSYM=11, + SHT_INIT_ARRAY=14, + SHT_FINI_ARRAY=15, + SHT_PREINIT_ARRAY=16, + SHT_GROUP=17, + SHT_SYMTAB_SHNDX=18, + SHT_NUM=19, + SHT_LOOS=0x60000000, + SHT_HIOS=0x6fffffff, + SHT_LOPROC=0x70000000, + SHT_HIPROC=0x7fffffff, + SHT_LOUSER=0x80000000, + SHT_HIUSER=0xffffffff, + SHT_AMD64_UNWIND=0x70000001, + _default_='RESERVED', +) + +# p_type in the program header +# some values scavenged from the ELF headers in binutils-2.21 +ENUM_P_TYPE = dict( + PT_NULL=0, + PT_LOAD=1, + PT_DYNAMIC=2, + PT_INTERP=3, + PT_NOTE=4, + PT_SHLIB=5, + PT_PHDR=6, + PT_LOPROC=0x70000000, + PT_HIPROC=0x7fffffff, + PT_GNU_EH_FRAME=0x6474e550, + PT_GNU_STACK=0x6474e551, + PT_GNU_RELRO=0x6474e552, +) + diff --git a/elftools/elf/sections.py b/elftools/elf/sections.py index 6accf11..04410f3 100644 --- a/elftools/elf/sections.py +++ b/elftools/elf/sections.py @@ -1,60 +1,60 @@ -#------------------------------------------------------------------------------- -# elftools: elf/sections.py -# -# ELF sections -# -# Eli Bendersky (eliben@gmail.com) -# This code is in the public domain -#------------------------------------------------------------------------------- -from ..construct import CString -from ..common.utils import struct_parse, elf_assert - - -class Section(object): - """ Base class for ELF sections. Also used for all sections types that have - no special functionality. - - Allows dictionary-like access to the section header. For example: - > sec = Section(...) - > sec['sh_type'] # section type +#------------------------------------------------------------------------------- +# elftools: elf/sections.py +# +# ELF sections +# +# Eli Bendersky (eliben@gmail.com) +# This code is in the public domain +#------------------------------------------------------------------------------- +from ..construct import CString +from ..common.utils import struct_parse, elf_assert + + +class Section(object): + """ Base class for ELF sections. Also used for all sections types that have + no special functionality. + + Allows dictionary-like access to the section header. For example: + > sec = Section(...) + > sec['sh_type'] # section type """ def __init__(self, header, name, stream): - self.header = header - self.name = name - self.stream = stream - + self.header = header + self.name = name + self.stream = stream + def data(self): """ The section data from the file. - """ - self.stream.seek(self['sh_offset']) + """ + self.stream.seek(self['sh_offset']) return self.stream.read(self['sh_size']) - - def __getitem__(self, name): - """ Implement dict-like access to header entries - """ - return self.header[name] - - -class StringTableSection(Section): + + def __getitem__(self, name): + """ Implement dict-like access to header entries + """ + return self.header[name] + + +class StringTableSection(Section): """ ELF string table section. """ def __init__(self, header, name, stream): - super(StringTableSection, self).__init__(header, name, stream) - - def get_string(self, offset): + super(StringTableSection, self).__init__(header, name, stream) + + def get_string(self, offset): """ Get the string stored at the given offset in this string table. - """ - table_offset = self['sh_offset'] - return struct_parse( - CString(''), - self.stream, - stream_pos=table_offset + offset) - - -class SymbolTableSection(Section): - """ ELF symbol table section. Has an associated StringTableSection that's + """ + table_offset = self['sh_offset'] + return struct_parse( + CString(''), + self.stream, + stream_pos=table_offset + offset) + + +class SymbolTableSection(Section): + """ ELF symbol table section. Has an associated StringTableSection that's passed in the constructor. """ - def __init__(self, header, name, stream, stringtable): + def __init__(self, header, name, stream, stringtable): super(SymbolTableSection, self).__init__(header, name, stream) - self.stringtable = stringtable + self.stringtable = stringtable diff --git a/elftools/elf/segments.py b/elftools/elf/segments.py index 02a81ed..a583de0 100644 --- a/elftools/elf/segments.py +++ b/elftools/elf/segments.py @@ -1,25 +1,25 @@ -#------------------------------------------------------------------------------- -# elftools: elf/segments.py -# -# ELF segments -# -# Eli Bendersky (eliben@gmail.com) -# This code is in the public domain -#------------------------------------------------------------------------------- - -class Segment(object): - def __init__(self, header, stream): - self.header = header - self.stream = stream - - def data(self): - """ The segment data from the file. - """ - self.stream.seek(self['p_offset']) - return self.stream.read(self['p_filesz']) - - def __getitem__(self, name): - """ Implement dict-like access to header entries - """ - return self.header[name] - +#------------------------------------------------------------------------------- +# elftools: elf/segments.py +# +# ELF segments +# +# Eli Bendersky (eliben@gmail.com) +# This code is in the public domain +#------------------------------------------------------------------------------- + +class Segment(object): + def __init__(self, header, stream): + self.header = header + self.stream = stream + + def data(self): + """ The segment data from the file. + """ + self.stream.seek(self['p_offset']) + return self.stream.read(self['p_filesz']) + + def __getitem__(self, name): + """ Implement dict-like access to header entries + """ + return self.header[name] + diff --git a/elftools/elf/structs.py b/elftools/elf/structs.py index 465a9c8..c628c93 100644 --- a/elftools/elf/structs.py +++ b/elftools/elf/structs.py @@ -1,153 +1,153 @@ -#------------------------------------------------------------------------------- -# elftools: elf/structs.py -# -# Encapsulation of Construct structs for parsing an ELF file, adjusted for -# correct endianness and word-size. -# -# Eli Bendersky (eliben@gmail.com) -# This code is in the public domain -#------------------------------------------------------------------------------- -from ..construct import ( - UBInt8, UBInt16, UBInt32, UBInt64, - ULInt8, ULInt16, ULInt32, ULInt64, - SBInt32, SLInt32, SBInt64, SLInt64, - Struct, Array, Enum, Padding, - ) - -from .enums import * - - -class ELFStructs(object): - """ Accessible attributes: - - Elf_{byte|half|word|addr|offset|sword|xword|xsword}: - Data chunks, as specified by the ELF standard, adjusted for - correct endianness and word-size. - - Elf_Ehdr: - ELF file header - - Elf_Phdr: - Program header - - Elf_Shdr: - Section header - - Elf_Sym: +#------------------------------------------------------------------------------- +# elftools: elf/structs.py +# +# Encapsulation of Construct structs for parsing an ELF file, adjusted for +# correct endianness and word-size. +# +# Eli Bendersky (eliben@gmail.com) +# This code is in the public domain +#------------------------------------------------------------------------------- +from ..construct import ( + UBInt8, UBInt16, UBInt32, UBInt64, + ULInt8, ULInt16, ULInt32, ULInt64, + SBInt32, SLInt32, SBInt64, SLInt64, + Struct, Array, Enum, Padding, + ) + +from .enums import * + + +class ELFStructs(object): + """ Accessible attributes: + + Elf_{byte|half|word|addr|offset|sword|xword|xsword}: + Data chunks, as specified by the ELF standard, adjusted for + correct endianness and word-size. + + Elf_Ehdr: + ELF file header + + Elf_Phdr: + Program header + + Elf_Shdr: + Section header + + Elf_Sym: Symbol table entry """ - def __init__(self, little_endian=True, elfclass=32): - assert elfclass == 32 or elfclass == 64 - self.little_endian = little_endian - self.elfclass = elfclass - self._create_structs() - + def __init__(self, little_endian=True, elfclass=32): + assert elfclass == 32 or elfclass == 64 + self.little_endian = little_endian + self.elfclass = elfclass + self._create_structs() + def _create_structs(self): - if self.little_endian: - self.Elf_byte = ULInt8 - self.Elf_half = ULInt16 - self.Elf_word = ULInt32 - self.Elf_addr = ULInt32 if self.elfclass == 32 else ULInt64 - self.Elf_offset = self.Elf_addr - self.Elf_sword = SLInt32 - self.Elf_xword = ULInt32 if self.elfclass == 32 else ULInt64 - self.Elf_sxword = SLInt32 if self.elfclass == 32 else SLInt64 - else: - self.Elf_byte = UBInt8 - self.Elf_half = UBInt16 - self.Elf_word = UBInt32 - self.Elf_addr = UBInt32 if self.elfclass == 32 else UBInt64 - self.Elf_offset = self.Elf_addr - self.Elf_sword = SBInt32 - self.Elf_xword = UBInt32 if self.elfclass == 32 else UBInt64 - self.Elf_sxword = SBInt32 if self.elfclass == 32 else SBInt64 - - self._create_ehdr() - self._create_phdr() - self._create_shdr() - self._create_sym() - + if self.little_endian: + self.Elf_byte = ULInt8 + self.Elf_half = ULInt16 + self.Elf_word = ULInt32 + self.Elf_addr = ULInt32 if self.elfclass == 32 else ULInt64 + self.Elf_offset = self.Elf_addr + self.Elf_sword = SLInt32 + self.Elf_xword = ULInt32 if self.elfclass == 32 else ULInt64 + self.Elf_sxword = SLInt32 if self.elfclass == 32 else SLInt64 + else: + self.Elf_byte = UBInt8 + self.Elf_half = UBInt16 + self.Elf_word = UBInt32 + self.Elf_addr = UBInt32 if self.elfclass == 32 else UBInt64 + self.Elf_offset = self.Elf_addr + self.Elf_sword = SBInt32 + self.Elf_xword = UBInt32 if self.elfclass == 32 else UBInt64 + self.Elf_sxword = SBInt32 if self.elfclass == 32 else SBInt64 + + self._create_ehdr() + self._create_phdr() + self._create_shdr() + self._create_sym() + def _create_ehdr(self): - self.Elf_Ehdr = Struct('Elf_Ehdr', - Struct('e_ident', - Array(4, self.Elf_byte('EI_MAG')), - Enum(self.Elf_byte('EI_CLASS'), **ENUM_EI_CLASS), - Enum(self.Elf_byte('EI_DATA'), **ENUM_EI_DATA), - Enum(self.Elf_byte('EI_VERSION'), **ENUM_E_VERSION), - Padding(9) - ), - Enum(self.Elf_half('e_type'), **ENUM_E_TYPE), - Enum(self.Elf_half('e_machine'), **ENUM_E_MACHINE), - Enum(self.Elf_word('e_version'), **ENUM_E_VERSION), - self.Elf_addr('e_entry'), - self.Elf_offset('e_phoff'), - self.Elf_offset('e_shoff'), - self.Elf_word('e_flags'), - self.Elf_half('e_ehsize'), - self.Elf_half('e_phentsize'), - self.Elf_half('e_phnum'), - self.Elf_half('e_shentsize'), - self.Elf_half('e_shnum'), - self.Elf_half('e_shstrndx'), - ) - - def _create_phdr(self): - if self.elfclass == 32: - self.Elf_Phdr = Struct('Elf_Phdr', - Enum(self.Elf_word('p_type'), **ENUM_P_TYPE), - self.Elf_offset('p_offset'), - self.Elf_addr('p_vaddr'), - self.Elf_addr('p_paddr'), - self.Elf_word('p_filesz'), - self.Elf_word('p_memsz'), - self.Elf_word('p_flags'), - self.Elf_word('p_align'), - ) - else: - self.Elf_Phdr = Struct('Elf_Phdr', - Enum(self.Elf_word('p_type'), **ENUM_P_TYPE), - self.Elf_word('p_flags'), - self.Elf_offset('p_offset'), - self.Elf_addr('p_vaddr'), - self.Elf_addr('p_paddr'), - self.Elf_word('p_filesz'), - self.Elf_word('p_memsz'), - self.Elf_word('p_align'), - ) - + self.Elf_Ehdr = Struct('Elf_Ehdr', + Struct('e_ident', + Array(4, self.Elf_byte('EI_MAG')), + Enum(self.Elf_byte('EI_CLASS'), **ENUM_EI_CLASS), + Enum(self.Elf_byte('EI_DATA'), **ENUM_EI_DATA), + Enum(self.Elf_byte('EI_VERSION'), **ENUM_E_VERSION), + Padding(9) + ), + Enum(self.Elf_half('e_type'), **ENUM_E_TYPE), + Enum(self.Elf_half('e_machine'), **ENUM_E_MACHINE), + Enum(self.Elf_word('e_version'), **ENUM_E_VERSION), + self.Elf_addr('e_entry'), + self.Elf_offset('e_phoff'), + self.Elf_offset('e_shoff'), + self.Elf_word('e_flags'), + self.Elf_half('e_ehsize'), + self.Elf_half('e_phentsize'), + self.Elf_half('e_phnum'), + self.Elf_half('e_shentsize'), + self.Elf_half('e_shnum'), + self.Elf_half('e_shstrndx'), + ) + + def _create_phdr(self): + if self.elfclass == 32: + self.Elf_Phdr = Struct('Elf_Phdr', + Enum(self.Elf_word('p_type'), **ENUM_P_TYPE), + self.Elf_offset('p_offset'), + self.Elf_addr('p_vaddr'), + self.Elf_addr('p_paddr'), + self.Elf_word('p_filesz'), + self.Elf_word('p_memsz'), + self.Elf_word('p_flags'), + self.Elf_word('p_align'), + ) + else: + self.Elf_Phdr = Struct('Elf_Phdr', + Enum(self.Elf_word('p_type'), **ENUM_P_TYPE), + self.Elf_word('p_flags'), + self.Elf_offset('p_offset'), + self.Elf_addr('p_vaddr'), + self.Elf_addr('p_paddr'), + self.Elf_word('p_filesz'), + self.Elf_word('p_memsz'), + self.Elf_word('p_align'), + ) + def _create_shdr(self): - self.Elf_Shdr = Struct('Elf_Shdr', - self.Elf_word('sh_name'), - Enum(self.Elf_word('sh_type'), **ENUM_SH_TYPE), - self.Elf_xword('sh_flags'), - self.Elf_addr('sh_addr'), - self.Elf_offset('sh_offset'), - self.Elf_xword('sh_size'), - self.Elf_word('sh_link'), - self.Elf_word('sh_info'), - self.Elf_xword('sh_addralign'), - self.Elf_xword('sh_entsize'), - ) - + self.Elf_Shdr = Struct('Elf_Shdr', + self.Elf_word('sh_name'), + Enum(self.Elf_word('sh_type'), **ENUM_SH_TYPE), + self.Elf_xword('sh_flags'), + self.Elf_addr('sh_addr'), + self.Elf_offset('sh_offset'), + self.Elf_xword('sh_size'), + self.Elf_word('sh_link'), + self.Elf_word('sh_info'), + self.Elf_xword('sh_addralign'), + self.Elf_xword('sh_entsize'), + ) + def _create_sym(self): - if self.elfclass == 32: - self.Elf_Sym = Struct('Elf_Sym', - self.Elf_word('st_name'), - self.Elf_addr('st_value'), - self.Elf_word('st_size'), - self.Elf_byte('st_info'), - self.Elf_byte('st_other'), - self.Elf_half('st_shndx'), - ) - else: - self.Elf_Sym = Struct('Elf_Sym', - self.Elf_word('st_name'), - self.Elf_byte('st_info'), - self.Elf_byte('st_other'), - self.Elf_half('st_shndx'), - self.Elf_addr('st_value'), - self.Elf_xword('st_size'), - ) - - - + if self.elfclass == 32: + self.Elf_Sym = Struct('Elf_Sym', + self.Elf_word('st_name'), + self.Elf_addr('st_value'), + self.Elf_word('st_size'), + self.Elf_byte('st_info'), + self.Elf_byte('st_other'), + self.Elf_half('st_shndx'), + ) + else: + self.Elf_Sym = Struct('Elf_Sym', + self.Elf_word('st_name'), + self.Elf_byte('st_info'), + self.Elf_byte('st_other'), + self.Elf_half('st_shndx'), + self.Elf_addr('st_value'), + self.Elf_xword('st_size'), + ) + + + diff --git a/z.py b/z.py index 2a5485c..fc295e7 100644 --- a/z.py +++ b/z.py @@ -1,57 +1,58 @@ -import sys -from elftools.elf.structs import ELFStructs -from elftools.elf.elffile import ELFFile -from elftools.elf.sections import * - -# read a little-endian, 64-bit file -es = ELFStructs(True, 64) - -stream = open('binfiles/z.elf', 'rb') - -efile = ELFFile(stream) - -print '===> %s sections!' % efile.num_sections() -print '===> %s segments!' % efile.num_segments() - -for sec in efile.iter_sections(): - print type(sec), sec.name - if isinstance(sec, SymbolTableSection): - print ' linked string table:', sec.stringtable.name - -for seg in efile.iter_segments(): - print seg['p_type'], seg['p_offset'] - - -#~ print 'num', efile.num_sections() -#~ sec = efile.get_section(39) -#~ print sec.header -#~ print sec.name -#~ print sec['sh_type'] -#~ print map(ord, sec.data()) - -#~ print sec.stream -#~ print map(ord, efile._stringtable) - -#~ print efile.header -#~ print dir(efile) -#~ print efile['e_type'] - -#~ shtable_offset = efile['e_shoff'] -#~ strtable_section_offset = shtable_offset + efile['e_shstrndx'] * efile['e_shentsize'] - -#~ # get to the section header for the sh string table -#~ print strtable_section_offset -#~ stream.seek(strtable_section_offset) -#~ sheader = es.Elf_Shdr.parse_stream(stream) -#~ print sheader - -#~ # yay, looks correct!! -#~ stream.seek(sheader.sh_offset) -#~ buf = stream.read(sheader.sh_size) -#~ for c in buf: - #~ sys.stdout.write('%02X' % ord(c)) - - - - -#~ print es.Elf_Ehdr +import sys +from elftools.elf.structs import ELFStructs +from elftools.elf.elffile import ELFFile +from elftools.elf.sections import * + +# read a little-endian, 64-bit file +es = ELFStructs(True, 64) + +stream = open('binfiles/z.elf', 'rb') +#stream = open('binfiles/z32.elf', 'rb') + +efile = ELFFile(stream) + +print '===> %s sections!' % efile.num_sections() +print '===> %s segments!' % efile.num_segments() + +for sec in efile.iter_sections(): + print type(sec), sec.name + if isinstance(sec, SymbolTableSection): + print ' linked string table:', sec.stringtable.name + +for seg in efile.iter_segments(): + print seg['p_type'], seg['p_offset'] + + +#~ print 'num', efile.num_sections() +#~ sec = efile.get_section(39) +#~ print sec.header +#~ print sec.name +#~ print sec['sh_type'] +#~ print map(ord, sec.data()) + +#~ print sec.stream +#~ print map(ord, efile._stringtable) + +#~ print efile.header +#~ print dir(efile) +#~ print efile['e_type'] + +#~ shtable_offset = efile['e_shoff'] +#~ strtable_section_offset = shtable_offset + efile['e_shstrndx'] * efile['e_shentsize'] + +#~ # get to the section header for the sh string table +#~ print strtable_section_offset +#~ stream.seek(strtable_section_offset) +#~ sheader = es.Elf_Shdr.parse_stream(stream) +#~ print sheader + +#~ # yay, looks correct!! +#~ stream.seek(sheader.sh_offset) +#~ buf = stream.read(sheader.sh_size) +#~ for c in buf: + #~ sys.stdout.write('%02X' % ord(c)) + + + + +#~ print es.Elf_Ehdr -- 2.30.2