diff --git a/dfint64_patch/strings_context/extract_strings_with_subs.py b/dfint64_patch/strings_context/extract_strings_with_subs.py index 979dd32..8cd7efc 100644 --- a/dfint64_patch/strings_context/extract_strings_with_subs.py +++ b/dfint64_patch/strings_context/extract_strings_with_subs.py @@ -4,13 +4,13 @@ from collections import defaultdict from dataclasses import dataclass -from io import BufferedReader from operator import itemgetter from pathlib import Path from typing import NamedTuple import lief from omegaconf import DictConfig +from tqdm import tqdm from dfint64_patch.config import with_config from dfint64_patch.cross_references.cross_references_relative import find_relative_cross_references @@ -24,9 +24,12 @@ def extract_strings_with_xrefs(pe: lief.PE.Binary) -> dict[ExtractedStringInfo, string_section = pe.sections[1] strings = list( - extract_strings_from_raw_bytes( - string_section.content, - base_address=Rva(string_section.virtual_address), + tqdm( + extract_strings_from_raw_bytes( + string_section.content, + base_address=Rva(string_section.virtual_address), + ), + desc="extract_strings_from_raw_bytes", ), ) @@ -48,34 +51,42 @@ class StringCrossReference(NamedTuple): cross_reference: Rva -def extract_strings_grouped_by_subs(pe_file: BufferedReader) -> dict[Rva, list[StringCrossReference]]: - pe = lief.PE.parse(pe_file) - assert pe is not None - code_section = pe.sections[0] +def group_by_subroutines( + strings_with_xrefs: dict[ExtractedStringInfo, list[Rva]], + subroutines: list[SubroutineInfo], +) -> dict[SubroutineInfo, list[StringCrossReference]]: + raw_result: dict[SubroutineInfo, list[StringCrossReference]] = defaultdict(list) + + for string_info, xrefs in tqdm(strings_with_xrefs.items(), desc="group_by_subroutines"): + for xref in xrefs: + subroutine = which_subroutine(subroutines, xref) + if not subroutine: + continue + raw_result[subroutine].append(StringCrossReference(string_info.string, xref)) + + return raw_result - image_base = pe.optional_header.imagebase +def extract_strings_grouped_by_subs(pe: lief.PE.Binary) -> dict[SubroutineInfo, list[StringCrossReference]]: + code_section = pe.sections[0] strings_with_xrefs = extract_strings_with_xrefs(pe) subroutines = list( - extract_subroutines( - code_section.content, - base_offset=code_section.virtual_address, + tqdm( + extract_subroutines( + code_section.content, + base_offset=code_section.virtual_address, + ), + desc="extract_subroutines", ) ) - raw_result: dict[SubroutineInfo, list[StringCrossReference]] = defaultdict(list) - for string_info, xrefs in strings_with_xrefs.items(): - for xref in xrefs: - subroutine = which_subroutine(subroutines, xref) - if not subroutine: - continue - raw_result[subroutine].append(StringCrossReference(string_info.string, xref)) + raw_result = group_by_subroutines(strings_with_xrefs, subroutines) - result: dict[Rva, list[StringCrossReference]] = {} + result: dict[SubroutineInfo, list[StringCrossReference]] = {} for subroutine, string_xrefs in sorted(raw_result.items(), key=itemgetter(0)): sorted_xrefs = sorted(string_xrefs, key=lambda x: x.cross_reference) - result[Rva(image_base + subroutine.start)] = sorted_xrefs + result[subroutine] = sorted_xrefs return result @@ -89,10 +100,14 @@ class ExtractConfig(DictConfig): @with_config(ExtractConfig, ".extract.yaml") def main(conf: ExtractConfig) -> None: with Path(conf.file_name).open("rb") as pe_file: - for subroutine, strings in extract_strings_grouped_by_subs(pe_file).items(): - print(f"sub_{subroutine:x}:") + pe = lief.PE.parse(pe_file) + assert pe is not None + + image_base = pe.optional_header.imagebase + for subroutine, strings in extract_strings_grouped_by_subs(pe).items(): + print(f"[sub_{image_base + subroutine.start:x}]") for string in strings: - print(f"\t{string.string}") + print(string.string) print() diff --git a/pyproject.toml b/pyproject.toml index bb7760e..19cfa7c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,6 +27,7 @@ patch = "dfint64_patch.patch:main" [tool.poe.tasks] extract.script = "dfint64_patch.extract_strings.cli:main" +extract_with_subs.script = "dfint64_patch.strings_context.extract_strings_with_subs:main" [[tool.poetry.source]] name = "dfint"