Skip to content

Commit

Permalink
Improved plots
Browse files Browse the repository at this point in the history
  • Loading branch information
dhondta committed Feb 1, 2025
1 parent 292f8ea commit 219b75c
Show file tree
Hide file tree
Showing 12 changed files with 456 additions and 146 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -76,4 +76,4 @@ TODO
script.py
tool.py
/*.png
*.exe
/*.exe
2 changes: 1 addition & 1 deletion src/exeplot/VERSION.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.1.0
0.2.0
34 changes: 21 additions & 13 deletions src/exeplot/__conf__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,13 @@
'font_family': "serif",
'font_size': 10,
'img_format': "png",
'shadow': True,
'style': "default",
# 'transparent': False,
'transparent': False,
}


def configure():
def configure(): # pragma: no cover
from configparser import ConfigParser
from os.path import exists, expanduser
path = expanduser("~/.exeplot.conf")
Expand All @@ -43,6 +44,9 @@ def configure_fonts(**kw):
kw['suptitle-font'] = {'fontfamily': kw.pop('suptitle_font_family', config['font_family']),
'fontsize': kw.pop('suptitle_font_size', int(config['font_size'] * 1.2)),
'fontweight': kw.pop('suptitle_font_weight', "normal")}
kw['annotation-font'] = {'fontfamily': kw.pop('suptitle_font_family', config['font_family']),
'fontsize': kw.pop('suptitle_font_size', int(config['font_size'] * .5)),
'fontweight': kw.pop('suptitle_font_weight', "normal")}
for p in "xy":
kw[f'{p}label-font'] = {'fontfamily': kw.pop(f'{p}label_font_family', config['font_family']),
'fontsize': kw.pop(f'{p}label_font_size', config['font_size']),
Expand All @@ -56,28 +60,32 @@ def save_figure(f):
function ; put it in the "figures" subfolder of the current experiment's folder if relevant. """
@wraps(f)
def _wrapper(*a, **kw):
import matplotlib.pyplot as plt
from os import makedirs
from os.path import basename, dirname, splitext
logger.info("Preparing plot data...")
from .plots.__common__ import Binary
plot_type = f.__globals__['__name__'].split(".")[-1]
logger.info(f"Preparing {plot_type} plot data...")
configure()
imgs = f(*a, **configure_fonts(**kw))
ext = "." + kw.get('img_format', config['img_format'])
kw_plot = {k: kw.get(k, config[k]) for k in ["bbox_inches", "dpi"]}
kw = configure_fonts(**kw)
imgs = f(*a, **kw)
r = []
kw_plot = {k: kw.get(k, config[k]) for k in ["bbox_inches", "dpi", "transparent"]}
for img in (imgs if isinstance(imgs, (list, tuple, type(x for x in []))) else [imgs]):
if img is None:
img = kw.get('img_name') or splitext(basename(a[0]))[0]
if not img.endswith(ext):
img = img or kw.get('img_name') or f"{splitext(basename(a[0]))[0]}_{plot_type}"
if not img.endswith(ext := "." + kw.get('img_format', config['img_format'])):
img += ext
if d := dirname(img):
makedirs(d, exist_ok=True)
if kw.get('interactive_mode', False):
makedirs(dirname(img) or ".", exist_ok=True)
if kw.get('interactive_mode', False): # pragma: no cover
from code import interact
logger.info(f"{img}: use 'plt.savefig(img, **kw_plot)' to save the figure")
ns = {k: v for k, v in globals().items()}
ns.update(locals())
l.info(f"{img}: use 'plt.savefig(img, **kw_plot)' to save the figure")
interact(local=ns)
logger.info(f"Saving to {img}...")
plt.savefig(img, **kw_plot)
logger.debug(f"> saved to {img}...")
r.append(img)
return r
return _wrapper

20 changes: 7 additions & 13 deletions src/exeplot/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def _parser(name, description, examples):
epilog="usage examples:\n " + "\n ".join(examples) if len(examples) > 0 else None)


def _setup(parser):
def _setup(parser): # pragma: no cover
args = parser.parse_args()
if hasattr(args, "verbose"):
import logging
Expand All @@ -31,18 +31,12 @@ def main():
plots = parser.add_subparsers(dest="type", help="plot type")
for plot in _plots:
plot_func = globals()[plot]
plot_parser = plot_func.__args__(plots.add_parser(plot, help=plot_func.__doc__.strip()))
"""
opt = plot_parser.add_argument_group("style arguments")
for a in ["title-font", "suptitle-font", "xlabel-font", "ylable-font"]:
for i in ["family", "size", "weight"]:
kw = {'help': "", 'metavar': i.upper()}
if i == "size":
kw['type'] = int
elif i == "weight":
kw['choices'] = ("normal", "bold", "italic")
opt.add_argument(f"--{a}-{i}", **kw)
"""
plot_parser = plot_func.__args__(plots.add_parser(plot, help=plot_func.__doc__.strip(), add_help=False))
opt = plot_parser.add_argument_group("options")
opt.add_argument("--no-title", action="store_true", help="do not display the title (default: False)")
extra = plot_parser.add_argument_group("extra arguments")
extra.add_argument("-h", "--help", action="help", help="show this help message and exit")
extra.add_argument("-i", "--interactive-mode", action="store_true", help="open Python console to edit the plot")
args = _setup(parser)
exe = args.executable if isinstance(args.executable, list) else [args.executable]
delattr(args, "executable")
Expand Down
152 changes: 139 additions & 13 deletions src/exeplot/plots/__common__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,15 @@
from statistics import mean


CACHE_DIR = os.path.expanduser("~/.exeplot")
# https://matplotlib.org/2.0.2/examples/color/named_colors.html
COLORS = {
None: ["salmon", "gold", "plum", "darkkhaki", "orchid", "sandybrown", "purple", "khaki", "peru", "thistle"],
'header': "black",
'headers': "black",
'overlay': "lightgray",
'section header': "black",
'section headers': "black",
'<undef>': "lightgray",
# common
'text': "darkseagreen", # code
Expand Down Expand Up @@ -41,14 +45,17 @@
MIN_ZONE_WIDTH = 3 # minimum number of samples on the entropy plot for a section (so that it can still be visible even
# if it is far smaller than the other sections)
N_SAMPLES = 2048
SHADOW = {'shade': .3, 'ox': .005, 'oy': -.005, 'linewidth': 0.}
SUBLABELS = {
'ep': lambda d: "EP at 0x%.8x in %s" % d['entrypoint'][1:],
'ep': lambda d: "EP at 0x%.8x in %s" % d['ep'][1:],
'size': lambda d: "Size = %s" % _human_readable_size(d['size'], 1),
'size-ep': lambda d: "Size = %s\nEP at 0x%.8x in %s" % \
(_human_readable_size(d['size'], 1), d['entrypoint'][1], d['entrypoint'][2]),
(_human_readable_size(d['size'], 1), d['ep'][1], d['ep'][2]),
'size-ent': lambda d: "Size = %s\nAverage entropy: %.2f\nOverall entropy: %.2f" % \
(_human_readable_size(d['size'], 1), mean(d['entropy']) * 8, d['entropy*']),
'size-ep-ent': lambda d: "Size = %s\nEP at 0x%.8x in %s\nAverage entropy: %.2f\nOverall entropy: %.2f" % \
(_human_readable_size(d['size'], 1), d['entrypoint'][1], d['entrypoint'][2],
mean(d['entropy']) * 8, d['entropy*']),
(_human_readable_size(d['size'], 1), d['ep'][1], d['ep'][2], mean(d['entropy']) * 8,
d['entropy*']),
}


Expand All @@ -65,7 +72,7 @@ def _ensure_str(s, encoding='utf-8', errors='strict'):

def _human_readable_size(size, precision=0):
i, units = 0, ["B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"]
while size >= 1024 and i < len(units):
while size >= 1024 and i < len(units)-1:
i += 1
size /= 1024.0
return "%.*f%s" % (precision, size, units[i])
Expand All @@ -74,7 +81,7 @@ def _human_readable_size(size, precision=0):
class Binary:
def __init__(self, path, **kwargs):
from lief import logging, parse
self.path = str(path)
self.path = os.path.abspath(str(path))
self.basename = os.path.basename(self.path)
self.stem = os.path.splitext(os.path.basename(self.path))[0]
l = kwargs.get('logger')
Expand All @@ -97,20 +104,132 @@ def __getattr__(self, name):
except AttributeError:
return getattr(self.__binary, name)

def __iter__(self):
for _ in self.__sections_data():
yield _

def __str__(self):
return self.path

def __get_ep_and_section(self):
b = self.__binary
try:
if self.type in ["ELF", "MachO"]:
self.__ep = self.__binary.virtual_address_to_offset(self.__binary.entrypoint)
self.__ep_section = self.__binary.section_from_offset(self.__ep)
self.__ep = b.virtual_address_to_offset(b.entrypoint)
self.__ep_section = b.section_from_offset(self.__ep)
elif self.type == "PE":
self.__ep = self.__binary.rva_to_offset(self.__binary.optional_header.addressof_entrypoint)
self.__ep_section = self.__binary.section_from_rva(self.__binary.optional_header.addressof_entrypoint)
self.__ep = b.rva_to_offset(b.optional_header.addressof_entrypoint)
self.__ep_section = b.section_from_rva(b.optional_header.addressof_entrypoint)
except (AttributeError, TypeError):
self.__ep, self.__ep_section = None, None

def __sections_data(self):
b = self.__binary
# create a first section for the headers
if self.type == "PE":
h_len = b.sizeof_headers
elif self.type == "ELF":
h_len = b.header.header_size + b.header.program_header_size * b.header.numberof_segments
elif self.type == "MachO":
h_len = [28, 32][str(b.header.magic)[-3:] == "_64"] + b.header.sizeof_cmds
yield 0, f"[0] Header ({_human_readable_size(h_len)})", 0, h_len, "black"
# then handle binary's sections
color_cursor, i = 0, 1
for section in sorted(b.sections, key=lambda s: s.offset):
if section.name == "" and section.size == 0 and len(section.content) == 0:
continue
try:
c = COLORS[self.section_names[section.name].lower().lstrip("._").strip("\x00\n ")]
except KeyError:
co = COLORS[None]
c = co[color_cursor % len(co)]
color_cursor += 1
start, end = section.offset, section.offset + section.size
yield i, f"[{i}] {self.section_names[section.name]} ({_human_readable_size(end - start)})", start, end, c
i += 1
# sections header at the end for ELF files
if self.type == "ELF":
start, end = end, end + b.header.section_header_size * b.header.numberof_sections
yield i, f"[{i}] Section Header ({_human_readable_size(end - start)})", start, end, "black"
i += 1
# finally, handle the overlay
start, end = self.size - b.overlay.nbytes, self.size
yield i, f"[{i}] Overlay ({_human_readable_size(end - start)})", start, self.size, "lightgray"
i += 1
yield i, f"TOTAL: {_human_readable_size(self.size)}", None, None, "white"

def __segments_data(self):
b = self.__binary
if self.type == "PE":
return # segments only apply to ELF and MachO
elif self.type == "ELF":
for i, s in enumerate(sorted(b.segments, key=lambda x: (x.file_offset, x.physical_size))):
yield i, f"[{i}] {str(s.type).split('.')[1]} ({_human_readable_size(s.physical_size)})", \
s.file_offset, s.file_offset+s.physical_size, "lightgray"
elif self.type == "MachO":
for i, s in enumerate(sorted(b.segments, key=lambda x: (x.file_offset, x.file_size))):
yield i, f"[{i}] {s.name} ({_human_readable_size(s.file_size)})", \
s.file_offset, s.file_offset+s.file_size, "lightgray"

def _data(self, segments=False, overlap=False):
data = [self.__sections_data, self.__segments_data][segments]
# generator for getting next items, taking None value into account for the start offset
def _nexts(n):
for j, t, s, e, c in data():
if j <= n or s is None:
continue
yield j, t, s, e, c
# collect data, including x positions, [w]idths, [t]exts and [c]olors
x, w, t, c, cursors, legend, layer = {0: []}, {0: []}, {0: []}, {0: []}, {0: 0}, {'colors': [], 'texts': []}, 0
for i, text, start, end, color in data():
legend['colors'].append(color), legend['texts'].append(text)
if start is None or end is None:
continue
end = min(self.size, end)
width = end - start
if overlap:
# set the layer first
for n in range(layer + 1):
if start >= cursors[n]:
layer = n
break
if start < cursors[layer]:
layer += 1
# create layer data if layer does not exist yet
if layer not in x:
x[layer], w[layer], t[layer], c[layer], cursors[layer] = [], [], [], [], 0
# if not starting at layer's cursor, fill up to start index with a blank section
if start > cursors[layer]:
x[layer].append(cursors[layer]), w[layer].append(start - cursors[layer])
t[layer].append("_"), c[layer].append("white")
# then add the current section
cursors[layer] = end
x[layer].append(start), w[layer].append(width), t[layer].append(text), c[layer].append(color)
else:
# adjust "end" if section overlap
for j, _, start2, _, _ in _nexts(i):
end = min(start2, end)
width = end - start
break
x[0].append(start), w[0].append(width), t[0].append(text), c[0].append(color)
# add a blank if the next section does not start from the end
for j, _, start2, _, _ in _nexts(i):
if j <= i or start2 is None:
continue
if start2 > end:
x[0].append(end), w[0].append(start2 - end), t[0].append("_"), c[0].append("white")
break
for i in range(len(x)):
if len(x[i]) > 0:
end = x[i][-1] + w[i][-1]
if end < self.size:
x[i].append(end), w[i].append(self.size-end), t[i].append("_"), c[i].append("white")
if sum(w[i]) != self.size:
for start, width, section, color in zip(x[i], w[i], t[i], c[i]):
print(f"LAYER {i}", section, color, start, width)
raise ValueError(f"Sizes do not match at layer {i} ({sum(w[i])} != {self.size})")
yield i, x[i], w[i], t[i], c[i], legend

@cached_property
def entrypoint(self):
self.__get_ep_and_section()
Expand All @@ -121,6 +240,13 @@ def entrypoint_section(self):
self.__get_ep_and_section()
return self.__ep_section

@cached_property
def hash(self):
from hashlib import sha256
m = sha256()
m.update(self.rawbytes)
return m.hexdigest()

@property
def rawbytes(self):
with open(self.path, "rb") as f:
Expand All @@ -129,8 +255,7 @@ def rawbytes(self):

@cached_property
def section_names(self):
__sn = lambda s: _ensure_str(s).strip("\x00") or _ensure_str(s) or "<empty>"
names = {s.name: __sn(s.name) for s in self.__binary.sections}
names = {s.name: _ensure_str(s.name).strip("\x00") or "<empty>" for s in self.__binary.sections}
# names from string table only applies to PE
if self.type != "PE":
return names
Expand All @@ -139,10 +264,11 @@ def section_names(self):
if all(match(r"/\d+$", n) is None for n in names.keys()):
return names
real_names = {}
str_table_offset = self.__binary.header.pointerto_symbol_table + self.__binary.header.numberof_symbols * 18
with open(self.path, "rb") as f:
for n in names:
if match(r"/\d+$", n):
f.seek(string_table_offset + int(name[1:]))
f.seek(str_table_offset + int(n[1:]))
n2 = b"".join(iter(lambda: f.read(1), b'\x00')).decode("utf-8", errors="ignore")
else:
n2 = n
Expand Down
3 changes: 2 additions & 1 deletion src/exeplot/plots/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
name = f[:-3]
module = importlib.import_module(f".{name}", package=__name__)
if hasattr(module, "plot") and callable(getattr(module, "plot")):
globals()[f"{name}"] = getattr(module, "plot")
globals()[f"{name}"] = f = getattr(module, "plot")
f.__args__ = getattr(module, "arguments")
__all__.append(name)

Loading

0 comments on commit 219b75c

Please sign in to comment.