Improved plots

packing-box · Feb 1, 2025 · 219b75c · 219b75c
1 parent 292f8ea
commit 219b75c
Show file tree

Hide file tree

Showing 12 changed files with 456 additions and 146 deletions.
diff --git a/.gitignore b/.gitignore
@@ -76,4 +76,4 @@ TODO
 script.py
 tool.py
 /*.png
-*.exe
+/*.exe
diff --git a/src/exeplot/VERSION.txt b/src/exeplot/VERSION.txt
@@ -1 +1 @@
-0.1.0
+0.2.0
diff --git a/src/exeplot/__conf__.py b/src/exeplot/__conf__.py
@@ -13,12 +13,13 @@
     'font_family':    "serif",
     'font_size':      10,
     'img_format':     "png",
+    'shadow':         True,
     'style':          "default",
-#    'transparent':    False,
+    'transparent':    False,
 }
 
 
-def configure():
+def configure():  # pragma: no cover
     from configparser import ConfigParser
     from os.path import exists, expanduser
     path = expanduser("~/.exeplot.conf")
@@ -43,6 +44,9 @@ def configure_fonts(**kw):
     kw['suptitle-font'] = {'fontfamily': kw.pop('suptitle_font_family', config['font_family']),
                            'fontsize': kw.pop('suptitle_font_size', int(config['font_size'] * 1.2)),
                            'fontweight': kw.pop('suptitle_font_weight', "normal")}
+    kw['annotation-font'] = {'fontfamily': kw.pop('suptitle_font_family', config['font_family']),
+                           'fontsize': kw.pop('suptitle_font_size', int(config['font_size'] * .5)),
+                           'fontweight': kw.pop('suptitle_font_weight', "normal")}
     for p in "xy":
         kw[f'{p}label-font'] = {'fontfamily': kw.pop(f'{p}label_font_family', config['font_family']),
                                 'fontsize': kw.pop(f'{p}label_font_size', config['font_size']),
@@ -56,28 +60,32 @@ def save_figure(f):
          function ; put it in the "figures" subfolder of the current experiment's folder if relevant. """
     @wraps(f)
     def _wrapper(*a, **kw):
+        import matplotlib.pyplot as plt
         from os import makedirs
         from os.path import basename, dirname, splitext
-        logger.info("Preparing plot data...")
+        from .plots.__common__ import Binary
+        plot_type = f.__globals__['__name__'].split(".")[-1]
+        logger.info(f"Preparing {plot_type} plot data...")
         configure()
-        imgs = f(*a, **configure_fonts(**kw))
-        ext = "." + kw.get('img_format', config['img_format'])
-        kw_plot = {k: kw.get(k, config[k]) for k in ["bbox_inches", "dpi"]}
+        kw = configure_fonts(**kw)
+        imgs = f(*a, **kw)
+        r = []
+        kw_plot = {k: kw.get(k, config[k]) for k in ["bbox_inches", "dpi", "transparent"]}
         for img in (imgs if isinstance(imgs, (list, tuple, type(x for x in []))) else [imgs]):
-            if img is None:
-                img = kw.get('img_name') or splitext(basename(a[0]))[0]
-            if not img.endswith(ext):
+            img = img or kw.get('img_name') or f"{splitext(basename(a[0]))[0]}_{plot_type}"
+            if not img.endswith(ext := "." + kw.get('img_format', config['img_format'])):
                 img += ext
-            if d := dirname(img):
-                makedirs(d, exist_ok=True)
-            if kw.get('interactive_mode', False):
+            makedirs(dirname(img) or ".", exist_ok=True)
+            if kw.get('interactive_mode', False):  # pragma: no cover
                 from code import interact
+                logger.info(f"{img}: use 'plt.savefig(img, **kw_plot)' to save the figure")
                 ns = {k: v for k, v in globals().items()}
                 ns.update(locals())
-                l.info(f"{img}: use 'plt.savefig(img, **kw_plot)' to save the figure")
                 interact(local=ns)
             logger.info(f"Saving to {img}...")
             plt.savefig(img, **kw_plot)
             logger.debug(f"> saved to {img}...")
+            r.append(img)
+        return r
     return _wrapper
 
diff --git a/src/exeplot/__main__.py b/src/exeplot/__main__.py
@@ -13,7 +13,7 @@ def _parser(name, description, examples):
                           epilog="usage examples:\n  " + "\n  ".join(examples) if len(examples) > 0 else None)
 
 
-def _setup(parser):
+def _setup(parser):  # pragma: no cover
     args = parser.parse_args()
     if hasattr(args, "verbose"):
         import logging
@@ -31,18 +31,12 @@ def main():
     plots = parser.add_subparsers(dest="type", help="plot type")
     for plot in _plots:
         plot_func = globals()[plot]
-        plot_parser = plot_func.__args__(plots.add_parser(plot, help=plot_func.__doc__.strip()))
-        """
-        opt = plot_parser.add_argument_group("style arguments")
-        for a in ["title-font", "suptitle-font", "xlabel-font", "ylable-font"]:
-            for i in ["family", "size", "weight"]:
-                kw = {'help': "", 'metavar': i.upper()}
-                if i == "size":
-                    kw['type'] = int
-                elif i == "weight":
-                    kw['choices'] = ("normal", "bold", "italic")
-                opt.add_argument(f"--{a}-{i}", **kw)
-        """
+        plot_parser = plot_func.__args__(plots.add_parser(plot, help=plot_func.__doc__.strip(), add_help=False))
+        opt = plot_parser.add_argument_group("options")
+        opt.add_argument("--no-title", action="store_true", help="do not display the title (default: False)")
+        extra = plot_parser.add_argument_group("extra arguments")
+        extra.add_argument("-h", "--help", action="help", help="show this help message and exit")
+        extra.add_argument("-i", "--interactive-mode", action="store_true", help="open Python console to edit the plot")
     args = _setup(parser)
     exe = args.executable if isinstance(args.executable, list) else [args.executable]
     delattr(args, "executable")

diff --git a/src/exeplot/plots/__common__.py b/src/exeplot/plots/__common__.py
@@ -4,11 +4,15 @@
 from statistics import mean
 
 
+CACHE_DIR = os.path.expanduser("~/.exeplot")
 # https://matplotlib.org/2.0.2/examples/color/named_colors.html
 COLORS = {
     None:       ["salmon", "gold", "plum", "darkkhaki", "orchid", "sandybrown", "purple", "khaki", "peru", "thistle"],
+    'header':   "black",
     'headers':  "black",
     'overlay':  "lightgray",
+    'section header':  "black",
+    'section headers': "black",
     '<undef>':  "lightgray",
     # common
     'text':     "darkseagreen",   # code
@@ -41,14 +45,17 @@
 MIN_ZONE_WIDTH = 3  # minimum number of samples on the entropy plot for a section (so that it can still be visible even
                     #  if it is far smaller than the other sections)
 N_SAMPLES = 2048
+SHADOW = {'shade': .3, 'ox': .005, 'oy': -.005, 'linewidth': 0.}
 SUBLABELS = {
-    'ep':          lambda d: "EP at 0x%.8x in %s" % d['entrypoint'][1:],
+    'ep':          lambda d: "EP at 0x%.8x in %s" % d['ep'][1:],
     'size':        lambda d: "Size = %s" % _human_readable_size(d['size'], 1),
     'size-ep':     lambda d: "Size = %s\nEP at 0x%.8x in %s" % \
-                             (_human_readable_size(d['size'], 1), d['entrypoint'][1], d['entrypoint'][2]),
+                             (_human_readable_size(d['size'], 1), d['ep'][1], d['ep'][2]),
+    'size-ent':    lambda d: "Size = %s\nAverage entropy: %.2f\nOverall entropy: %.2f" % \
+                             (_human_readable_size(d['size'], 1), mean(d['entropy']) * 8, d['entropy*']),
     'size-ep-ent': lambda d: "Size = %s\nEP at 0x%.8x in %s\nAverage entropy: %.2f\nOverall entropy: %.2f" % \
-                             (_human_readable_size(d['size'], 1), d['entrypoint'][1], d['entrypoint'][2],
-                              mean(d['entropy']) * 8, d['entropy*']),
+                             (_human_readable_size(d['size'], 1), d['ep'][1], d['ep'][2], mean(d['entropy']) * 8,
+                              d['entropy*']),
 }
 
 
@@ -65,7 +72,7 @@ def _ensure_str(s, encoding='utf-8', errors='strict'):
 
 def _human_readable_size(size, precision=0):
     i, units = 0, ["B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"]
-    while size >= 1024 and i < len(units):
+    while size >= 1024 and i < len(units)-1:
         i += 1
         size /= 1024.0
     return "%.*f%s" % (precision, size, units[i])
@@ -74,7 +81,7 @@ def _human_readable_size(size, precision=0):
 class Binary:
     def __init__(self, path, **kwargs):
         from lief import logging, parse
-        self.path = str(path)
+        self.path = os.path.abspath(str(path))
         self.basename = os.path.basename(self.path)
         self.stem = os.path.splitext(os.path.basename(self.path))[0]
         l = kwargs.get('logger')
@@ -97,20 +104,132 @@ def __getattr__(self, name):
         except AttributeError:
             return getattr(self.__binary, name)
 
+    def __iter__(self):
+        for _ in self.__sections_data():
+            yield _
+
     def __str__(self):
         return self.path
 
     def __get_ep_and_section(self):
+        b = self.__binary
         try:
             if self.type in ["ELF", "MachO"]:
-                self.__ep = self.__binary.virtual_address_to_offset(self.__binary.entrypoint)
-                self.__ep_section = self.__binary.section_from_offset(self.__ep)
+                self.__ep = b.virtual_address_to_offset(b.entrypoint)
+                self.__ep_section = b.section_from_offset(self.__ep)
             elif self.type == "PE":
-                self.__ep = self.__binary.rva_to_offset(self.__binary.optional_header.addressof_entrypoint)
-                self.__ep_section = self.__binary.section_from_rva(self.__binary.optional_header.addressof_entrypoint)
+                self.__ep = b.rva_to_offset(b.optional_header.addressof_entrypoint)
+                self.__ep_section = b.section_from_rva(b.optional_header.addressof_entrypoint)
         except (AttributeError, TypeError):
             self.__ep, self.__ep_section = None, None
 
+    def __sections_data(self):
+        b = self.__binary
+        # create a first section for the headers
+        if self.type == "PE":
+            h_len = b.sizeof_headers
+        elif self.type == "ELF":
+            h_len = b.header.header_size + b.header.program_header_size * b.header.numberof_segments
+        elif self.type == "MachO":
+            h_len = [28, 32][str(b.header.magic)[-3:] == "_64"] + b.header.sizeof_cmds
+        yield 0, f"[0] Header ({_human_readable_size(h_len)})", 0, h_len, "black"
+        # then handle binary's sections
+        color_cursor, i = 0, 1
+        for section in sorted(b.sections, key=lambda s: s.offset):
+            if section.name == "" and section.size == 0 and len(section.content) == 0:
+                continue
+            try:
+                c = COLORS[self.section_names[section.name].lower().lstrip("._").strip("\x00\n ")]
+            except KeyError:
+                co = COLORS[None]
+                c = co[color_cursor % len(co)]
+                color_cursor += 1
+            start, end = section.offset, section.offset + section.size
+            yield i, f"[{i}] {self.section_names[section.name]} ({_human_readable_size(end - start)})", start, end, c
+            i += 1
+        # sections header at the end for ELF files
+        if self.type == "ELF":
+            start, end = end, end + b.header.section_header_size * b.header.numberof_sections
+            yield i, f"[{i}] Section Header ({_human_readable_size(end - start)})", start, end, "black"
+            i += 1
+        # finally, handle the overlay
+        start, end = self.size - b.overlay.nbytes, self.size
+        yield i, f"[{i}] Overlay ({_human_readable_size(end - start)})", start, self.size, "lightgray"
+        i += 1
+        yield i, f"TOTAL: {_human_readable_size(self.size)}", None, None, "white"
+
+    def __segments_data(self):
+        b = self.__binary
+        if self.type == "PE":
+            return  # segments only apply to ELF and MachO
+        elif self.type == "ELF":
+            for i, s in enumerate(sorted(b.segments, key=lambda x: (x.file_offset, x.physical_size))):
+                yield i, f"[{i}] {str(s.type).split('.')[1]} ({_human_readable_size(s.physical_size)})", \
+                      s.file_offset, s.file_offset+s.physical_size, "lightgray"
+        elif self.type == "MachO":
+            for i, s in enumerate(sorted(b.segments, key=lambda x: (x.file_offset, x.file_size))):
+                yield i, f"[{i}] {s.name} ({_human_readable_size(s.file_size)})", \
+                      s.file_offset, s.file_offset+s.file_size, "lightgray"
+
+    def _data(self, segments=False, overlap=False):
+        data = [self.__sections_data, self.__segments_data][segments]
+        # generator for getting next items, taking None value into account for the start offset
+        def _nexts(n):
+            for j, t, s, e, c in data():
+                if j <= n or s is None:
+                    continue
+                yield j, t, s, e, c
+        # collect data, including x positions, [w]idths, [t]exts and [c]olors
+        x, w, t, c, cursors, legend, layer = {0: []}, {0: []}, {0: []}, {0: []}, {0: 0}, {'colors': [], 'texts': []}, 0
+        for i, text, start, end, color in data():
+            legend['colors'].append(color), legend['texts'].append(text)
+            if start is None or end is None:
+                continue
+            end = min(self.size, end)
+            width = end - start
+            if overlap:
+                # set the layer first
+                for n in range(layer + 1):
+                    if start >= cursors[n]:
+                        layer = n
+                        break
+                if start < cursors[layer]:
+                    layer += 1
+                # create layer data if layer does not exist yet
+                if layer not in x:
+                    x[layer], w[layer], t[layer], c[layer], cursors[layer] = [], [], [], [], 0
+                # if not starting at layer's cursor, fill up to start index with a blank section
+                if start > cursors[layer]:
+                    x[layer].append(cursors[layer]), w[layer].append(start - cursors[layer])
+                    t[layer].append("_"), c[layer].append("white")
+                # then add the current section
+                cursors[layer] = end
+                x[layer].append(start), w[layer].append(width), t[layer].append(text), c[layer].append(color)
+            else:
+                # adjust "end" if section overlap
+                for j, _, start2, _, _ in _nexts(i):
+                    end = min(start2, end)
+                    width = end - start
+                    break
+                x[0].append(start), w[0].append(width), t[0].append(text), c[0].append(color)
+                # add a blank if the next section does not start from the end
+                for j, _, start2, _, _ in _nexts(i):
+                    if j <= i or start2 is None:
+                        continue
+                    if start2 > end:
+                        x[0].append(end), w[0].append(start2 - end), t[0].append("_"), c[0].append("white")
+                    break
+        for i in range(len(x)):
+            if len(x[i]) > 0:
+                end = x[i][-1] + w[i][-1]
+                if end < self.size:
+                    x[i].append(end), w[i].append(self.size-end), t[i].append("_"), c[i].append("white")
+                if sum(w[i]) != self.size:
+                    for start, width, section, color in zip(x[i], w[i], t[i], c[i]):
+                        print(f"LAYER {i}", section, color, start, width)
+                    raise ValueError(f"Sizes do not match at layer {i} ({sum(w[i])} != {self.size})")
+                yield i, x[i], w[i], t[i], c[i], legend
+
     @cached_property
     def entrypoint(self):
         self.__get_ep_and_section()
@@ -121,6 +240,13 @@ def entrypoint_section(self):
         self.__get_ep_and_section()
         return self.__ep_section
 
+    @cached_property
+    def hash(self):
+        from hashlib import sha256
+        m = sha256()
+        m.update(self.rawbytes)
+        return m.hexdigest()
+
     @property
     def rawbytes(self):
         with open(self.path, "rb") as f:
@@ -129,8 +255,7 @@ def rawbytes(self):
 
     @cached_property        
     def section_names(self):
-        __sn = lambda s: _ensure_str(s).strip("\x00") or _ensure_str(s) or "<empty>"
-        names = {s.name: __sn(s.name) for s in self.__binary.sections}
+        names = {s.name: _ensure_str(s.name).strip("\x00") or "<empty>" for s in self.__binary.sections}
         # names from string table only applies to PE
         if self.type != "PE":
             return names
@@ -139,10 +264,11 @@ def section_names(self):
         if all(match(r"/\d+$", n) is None for n in names.keys()):
             return names
         real_names = {}
+        str_table_offset = self.__binary.header.pointerto_symbol_table + self.__binary.header.numberof_symbols * 18
         with open(self.path, "rb") as f:
             for n in names:
                 if match(r"/\d+$", n):
-                    f.seek(string_table_offset + int(name[1:]))
+                    f.seek(str_table_offset + int(n[1:]))
                     n2 = b"".join(iter(lambda: f.read(1), b'\x00')).decode("utf-8", errors="ignore")
                 else:
                     n2 = n

diff --git a/src/exeplot/plots/__init__.py b/src/exeplot/plots/__init__.py
@@ -12,6 +12,7 @@
     name = f[:-3]
     module = importlib.import_module(f".{name}", package=__name__)
     if hasattr(module, "plot") and callable(getattr(module, "plot")):
-        globals()[f"{name}"] = getattr(module, "plot")
+        globals()[f"{name}"] = f = getattr(module, "plot")
+        f.__args__ = getattr(module, "arguments")
         __all__.append(name)
-Original file line number
+Diff line change
@@ Expand Up / @@ -76,4 +76,4 @@ TODO @@
     script.py
     tool.py
     /*.png
-    *.exe
+    /*.exe