Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 39 additions & 24 deletions dfint64_patch/strings_context/extract_strings_with_subs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@

from collections import defaultdict
from dataclasses import dataclass
from io import BufferedReader
from operator import itemgetter
from pathlib import Path
from typing import NamedTuple

import lief
from omegaconf import DictConfig
from tqdm import tqdm

from dfint64_patch.config import with_config
from dfint64_patch.cross_references.cross_references_relative import find_relative_cross_references
Expand All @@ -24,9 +24,12 @@ def extract_strings_with_xrefs(pe: lief.PE.Binary) -> dict[ExtractedStringInfo,
string_section = pe.sections[1]

strings = list(
extract_strings_from_raw_bytes(
string_section.content,
base_address=Rva(string_section.virtual_address),
tqdm(
extract_strings_from_raw_bytes(
string_section.content,
base_address=Rva(string_section.virtual_address),
),
desc="extract_strings_from_raw_bytes",
),
)

Expand All @@ -48,34 +51,42 @@ class StringCrossReference(NamedTuple):
cross_reference: Rva


def extract_strings_grouped_by_subs(pe_file: BufferedReader) -> dict[Rva, list[StringCrossReference]]:
pe = lief.PE.parse(pe_file)
assert pe is not None
code_section = pe.sections[0]
def group_by_subroutines(
strings_with_xrefs: dict[ExtractedStringInfo, list[Rva]],
subroutines: list[SubroutineInfo],
) -> dict[SubroutineInfo, list[StringCrossReference]]:
raw_result: dict[SubroutineInfo, list[StringCrossReference]] = defaultdict(list)

for string_info, xrefs in tqdm(strings_with_xrefs.items(), desc="group_by_subroutines"):
for xref in xrefs:
subroutine = which_subroutine(subroutines, xref)
if not subroutine:
continue
raw_result[subroutine].append(StringCrossReference(string_info.string, xref))

return raw_result

image_base = pe.optional_header.imagebase

def extract_strings_grouped_by_subs(pe: lief.PE.Binary) -> dict[SubroutineInfo, list[StringCrossReference]]:
code_section = pe.sections[0]
strings_with_xrefs = extract_strings_with_xrefs(pe)

subroutines = list(
extract_subroutines(
code_section.content,
base_offset=code_section.virtual_address,
tqdm(
extract_subroutines(
code_section.content,
base_offset=code_section.virtual_address,
),
desc="extract_subroutines",
)
)

raw_result: dict[SubroutineInfo, list[StringCrossReference]] = defaultdict(list)
for string_info, xrefs in strings_with_xrefs.items():
for xref in xrefs:
subroutine = which_subroutine(subroutines, xref)
if not subroutine:
continue
raw_result[subroutine].append(StringCrossReference(string_info.string, xref))
raw_result = group_by_subroutines(strings_with_xrefs, subroutines)

result: dict[Rva, list[StringCrossReference]] = {}
result: dict[SubroutineInfo, list[StringCrossReference]] = {}
for subroutine, string_xrefs in sorted(raw_result.items(), key=itemgetter(0)):
sorted_xrefs = sorted(string_xrefs, key=lambda x: x.cross_reference)
result[Rva(image_base + subroutine.start)] = sorted_xrefs
result[subroutine] = sorted_xrefs

return result

Expand All @@ -89,10 +100,14 @@ class ExtractConfig(DictConfig):
@with_config(ExtractConfig, ".extract.yaml")
def main(conf: ExtractConfig) -> None:
with Path(conf.file_name).open("rb") as pe_file:
for subroutine, strings in extract_strings_grouped_by_subs(pe_file).items():
print(f"sub_{subroutine:x}:")
pe = lief.PE.parse(pe_file)
assert pe is not None

image_base = pe.optional_header.imagebase
for subroutine, strings in extract_strings_grouped_by_subs(pe).items():
print(f"[sub_{image_base + subroutine.start:x}]")
for string in strings:
print(f"\t{string.string}")
print(string.string)

print()

Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ patch = "dfint64_patch.patch:main"

[tool.poe.tasks]
extract.script = "dfint64_patch.extract_strings.cli:main"
extract_with_subs.script = "dfint64_patch.strings_context.extract_strings_with_subs:main"

[[tool.poetry.source]]
name = "dfint"
Expand Down