diff --git a/analyzer/windows/analyzer.py b/analyzer/windows/analyzer.py index 057f1c801a1..9778336417c 100644 --- a/analyzer/windows/analyzer.py +++ b/analyzer/windows/analyzer.py @@ -1408,10 +1408,11 @@ def _handle_process2(self, data): return self._inject_process(int(pid), int(tid), int(mode)) - def _handle_file_new(self, file_path): + def _handle_file_new(self, data): """Notification of a new dropped file.""" - if os.path.exists(file_path): - self.analyzer.files.add_file(file_path.decode(), self.pid) + pid, file_path = data.split(b",", 1) + if os.path.exists(file_path.decode()): + self.analyzer.files.add_file(file_path.decode(), pid.decode()) def _handle_file_cape(self, data): """Notification of a new dropped file.""" @@ -1432,9 +1433,9 @@ def _handle_file_cape(self, data): def _handle_file_del(self, data): """Notification of a file being removed (if it exists) - we have to dump it before it's being removed.""" - file_path = data.decode() - if os.path.exists(file_path): - self.analyzer.files.delete_file(file_path, self.pid) + pid, file_path = data.split(b",", 1) + if os.path.exists(file_path.decode()): + self.analyzer.files.delete_file(file_path.decode(), pid.decode()) def _handle_file_dump(self, file_path): # We extract the file path. @@ -1492,19 +1493,15 @@ def _handle_file_move(self, data): if b"::" not in data: log.warning("Received FILE_MOVE command from monitor with an incorrect argument") return - - old_filepath, new_filepath = data.split(b"::", 1) - new_filepath = new_filepath.decode() - self.analyzer.files.move_file(old_filepath.decode(), new_filepath, self.pid) + pid, paths = data.split(b",", 1) + old_filepath, new_filepath = paths.split(b"::", 1) + self.analyzer.files.move_file(old_filepath.decode(), new_filepath.decode(), pid.decode()) def dispatch(self, data): response = "NOPE" if not data or b":" not in data: log.critical("Unknown command received from the monitor: %s", data.strip()) else: - # Backwards compatibility (old syntax is, e.g., "FILE_NEW:" vs the - # new syntax, e.g., "1234:FILE_NEW:"). - # if data[0].isupper(): command, arguments = data.strip().split(b":", 1) # Uncomment to debug monitor commands # if command not in (b"DEBUG", b"INFO"): diff --git a/analyzer/windows/dll/capemon.dll b/analyzer/windows/dll/capemon.dll index b4a5f189da9..0fb1e7a838c 100755 Binary files a/analyzer/windows/dll/capemon.dll and b/analyzer/windows/dll/capemon.dll differ diff --git a/analyzer/windows/dll/capemon_x64.dll b/analyzer/windows/dll/capemon_x64.dll index 0759f13b2bd..b8a0186709f 100755 Binary files a/analyzer/windows/dll/capemon_x64.dll and b/analyzer/windows/dll/capemon_x64.dll differ diff --git a/changelog.md b/changelog.md index cdaa0359847..3b42169a879 100644 --- a/changelog.md +++ b/changelog.md @@ -1,3 +1,13 @@ +### [04.02.2026] +* Network Analysis: + * Integrated process mapping directly into `network` processing module. + * Added ability to show network details (DNS, HTTP, TCP/UDP) captured from behavioral analysis in the network results. + * This allows recovery of network activity that might be missing from PCAP (e.g., due to capture evasion or failed interception). + * Centralized network utility functions into `lib/cuckoo/common/network_utils.py` for better maintainability and performance. + * New configuration option `process_map` under `[network]` section in `processing.conf`. +* Web UI: + * Added Process Name and PID columns across all network analysis views (TCP, UDP, ICMP, DNS, HTTP, IRC, SMTP). + ### [28.01.2026] * CAPE Agent: * Ported to Golang for improved stealth, performance, and zero-dependency deployment. diff --git a/conf/default/processing.conf.default b/conf/default/processing.conf.default index 4ea1a3124af..686a3d17639 100644 --- a/conf/default/processing.conf.default +++ b/conf/default/processing.conf.default @@ -99,6 +99,10 @@ enabled = no [network] enabled = yes sort_pcap = no +# Enable mapping of network events to specific processes using behavioral analysis data +process_map = no +# Adds network connections seen in behavior but not in PCAP. Requires process_map = yes +merge_behavior_map = no # DNS whitelisting to ignore domains/IPs configured in network.py dnswhitelist = yes # additional entries @@ -324,5 +328,3 @@ enabled = no # plain-text TLS streams into the task PCAP. enabled = no -[network_process_map] -enabled = no diff --git a/lib/cuckoo/common/network_utils.py b/lib/cuckoo/common/network_utils.py new file mode 100644 index 00000000000..86302687f28 --- /dev/null +++ b/lib/cuckoo/common/network_utils.py @@ -0,0 +1,275 @@ +# Copyright (C) 2010-2015 Cuckoo Foundation. +# This file is part of Cuckoo Sandbox - http://www.cuckoosandbox.org +# See the file 'docs/LICENSE' for copying permission. + +import datetime +from contextlib import suppress +from urllib.parse import urlparse + +DNS_APIS = { + "getaddrinfo", + "getaddrinfow", + "getaddrinfoex", + "getaddrinfoexw", + "gethostbyname", + "gethostbynamew", + "dnsquery_a", + "dnsquery_w", + "dnsqueryex", + "dnsquery", +} + + +HTTP_HINT_APIS = { + "internetcrackurla", + "internetcrackurlw", + "httpsendrequesta", + "httpsendrequestw", + "internetsendrequesta", + "internetsendrequestw", + "internetconnecta", + "internetconnectw", + "winhttpopenrequest", + "winhttpsendrequest", + "winhttpconnect", + "winhttpopen", + "internetopenurla", + "internetopenurlw", + "httpopenrequesta", + "httpopenrequestw", + "isvalidurl", +} + + +TLS_HINT_APIS = { + "sslencryptpacket", + "ssldecryptpacket", + "initializesecuritycontexta", + "initializesecuritycontextw", + "initializesecuritycontextexa", + "initializesecuritycontextexw", + "acceptsecuritycontext", +} + + +def _norm_domain(d): + if not d or not isinstance(d, str): + return None + d = d.strip().strip(".").lower() + return d or None + + +def _parse_behavior_ts(ts_str): + """ + Parse behavior timestamp like: '2026-01-22 23:46:58,199' -> epoch float + Returns None if parsing fails. + """ + if not ts_str or not isinstance(ts_str, str): + return None + try: + return datetime.datetime.strptime(ts_str, "%Y-%m-%d %H:%M:%S,%f").timestamp() + except ValueError: + return None + + +def _get_call_args_dict(call): + """Convert arguments list to a dictionary for O(1) access.""" + return {a["name"]: a["value"] for a in call.get("arguments", []) if "name" in a} + + +def _extract_domain_from_call(call, args_map): + # Check named arguments first + for name in ( + "hostname", + "host", + "node", + "nodename", + "name", + "domain", + "szName", + "pszName", + "lpName", + "query", + "queryname", + "dns_name", + "QueryName", + "lpstrName", + "pName", + ): + v = args_map.get(name) + if isinstance(v, str) and v.strip(): + return v + + # Heuristic scan of all string arguments + for v in args_map.values(): + if isinstance(v, str): + s = v.strip() + if "." in s and " " not in s and s.count(".") <= 10: + return s + + return None + + +def _get_arg_any(args_map, *names): + """Return the first matching argument value for any of the provided names.""" + for n in names: + if n in args_map: + return args_map[n] + return None + + +def _norm_ip(ip): + if ip is None: + return None + if not isinstance(ip, str): + ip = str(ip) + ip = ip.strip() + return ip or None + + +def _looks_like_http(buf): + if not buf or not isinstance(buf, str): + return False + + first = buf.splitlines()[0].strip() if buf else "" + if not first: + return False + + u = first.upper() + if u.startswith("HTTP/1.") or u.startswith("HTTP/2"): + return True + + methods = ("GET ", "POST ", "HEAD ", "PUT ", "DELETE ", "OPTIONS ", "PATCH ", "TRACE ") + if any(u.startswith(m) for m in methods) and " HTTP/1." in u: + return True + + if u.startswith("CONNECT ") and " HTTP/1." in u: + return True + + return False + + +def _http_host_from_buf(buf): + if not buf or not isinstance(buf, str): + return None + + lines = buf.splitlines() + if not lines: + return None + + for line in lines[1:50]: + if line.lower().startswith("host:"): + try: + return line.split(":", 1)[1].strip() + except IndexError: + continue + + with suppress(Exception): + first = lines[0].strip() + parts = first.split() + if len(parts) >= 2: + target = parts[1].strip() + url = _extract_first_url(target) + if url: + host = _host_from_url(url) + if host: + return host + + with suppress(Exception): + first = lines[0].strip() + parts = first.split() + if len(parts) >= 2 and parts[0].upper() == "CONNECT": + return parts[1].strip() + + return None + + +def _safe_int(x): + with suppress(Exception): + return int(x) + return None + + +def _host_from_url(url): + if not url or not isinstance(url, str): + return None + + with suppress(Exception): + u = urlparse(url) + return u.hostname + + return None + + +def _extract_first_url(text): + if not text or not isinstance(text, str): + return None + s = text.strip() + for scheme in ("http://", "https://"): + idx = s.lower().find(scheme) + if idx != -1: + return s[idx:].split()[0].strip('"\',') + return None + + +def _add_http_host(http_host_map, host, pinfo, sock=None): + """ + Store host keys in a stable way. + Adds: + - normalized host + - if host is host:port and port parses, also normalized host-only + """ + hk = _norm_domain(host) + if not hk: + return + + entry = dict(pinfo) + if sock is not None: + entry["socket"] = sock + + http_host_map[hk].append(entry) + + if ":" in hk: + h_only, p = hk.rsplit(":", 1) + if _safe_int(p) is not None and h_only: + http_host_map[h_only].append(entry) + + +def _extract_tls_server_name(call, args_map): + """ + Best-effort server name extraction for TLS/SChannel/SSPI. + """ + for name in ( + "sni", + "SNI", + "ServerName", + "servername", + "server_name", + "TargetName", + "targetname", + "Host", + "host", + "hostname", + "Url", + "URL", + "url", + ): + v = args_map.get(name) + if isinstance(v, str) and v.strip(): + s = v.strip() + u = _extract_first_url(s) + if u: + return _host_from_url(u) or s + if "." in s and " " not in s and len(s) < 260: + return s + + for v in args_map.values(): + if isinstance(v, str): + s = v.strip() + if "." in s and " " not in s and len(s) < 260: + u = _extract_first_url(s) + if u: + return _host_from_url(u) or s + return s + + return None diff --git a/modules/processing/behavior.py b/modules/processing/behavior.py index b110ca063c5..98c5343d485 100644 --- a/modules/processing/behavior.py +++ b/modules/processing/behavior.py @@ -8,8 +8,28 @@ import mmap import os import struct +from collections import defaultdict from contextlib import suppress +from lib.cuckoo.common.network_utils import ( + DNS_APIS, + HTTP_HINT_APIS, + TLS_HINT_APIS, + _add_http_host, + _extract_domain_from_call, + _extract_first_url, + _extract_tls_server_name, + _get_arg_any, + _get_call_args_dict, + _host_from_url, + _http_host_from_buf, + _looks_like_http, + _norm_domain, + _norm_ip, + _parse_behavior_ts, + _safe_int, +) + from lib.cuckoo.common.abstracts import Processing from lib.cuckoo.common.compressor import CuckooBsonCompressor from lib.cuckoo.common.config import Config @@ -1201,6 +1221,113 @@ def run(self): return self.tree +class NetworkMap: + """ + Generates mappings between processes and network events (IPs, Hosts, DNS) + to be used by network_process_map.py module. + """ + + key = "network_map" + + def __init__(self): + self.endpoint_map = defaultdict(list) # (ip, port) -> [pinfo] + self.http_host_map = defaultdict(list) # host -> [pinfo] + self.dns_intents = defaultdict(list) # domain -> [intent] + + def event_apicall(self, call, process): + if call.get("category") != "network": + return + + api = (call.get("api") or "").lower() + args_map = _get_call_args_dict(call) + + pinfo = { + "process_id": process.get("process_id"), + "process_name": process.get("process_name", ""), + } + + # 1. Endpoint Map (Socket/IP/Port) + sock = _get_arg_any(args_map, "socket", "sock", "fd", "handle") + ip = _norm_ip(_get_arg_any(args_map, "ip", "dst", "dstip", "ip_address", "address", "remote_ip", "server")) + port = _get_arg_any(args_map, "port", "dport", "dstport", "remote_port", "server_port") + buf = _get_arg_any(args_map, "Buffer", "buffer", "buf", "data") + + if api in {"connect", "wsaconnect", "connectex", "sendto", "wsasendto", "recvfrom", "wsarecvfrom"}: + p_int = _safe_int(port) + if ip and p_int is not None: + entry = dict(pinfo) + if sock is not None: + entry["socket"] = sock + + self.endpoint_map[(ip, p_int)].append(entry) + + # 2. HTTP Host Map + if api in {"send", "wsasend", "sendto", "wsasendto"} and _looks_like_http(buf): + host = _http_host_from_buf(buf) + if host: + _add_http_host(self.http_host_map, host, pinfo, sock=sock) + + if api in HTTP_HINT_APIS: + url = _get_arg_any(args_map, "URL", "url", "lpszUrl", "lpUrl", "uri", "pszUrl", "pUrl") + if isinstance(url, str) and url.strip(): + u = _extract_first_url(url) or url.strip() + host = _host_from_url(u) + if host: + _add_http_host(self.http_host_map, host, pinfo, sock=sock) + + if isinstance(buf, str): + u2 = _extract_first_url(buf) + if u2: + host2 = _host_from_url(u2) + if host2: + _add_http_host(self.http_host_map, host2, pinfo, sock=sock) + + if api in TLS_HINT_APIS: + sni = _extract_tls_server_name(call, args_map) + if sni: + _add_http_host(self.http_host_map, sni, pinfo, sock=sock) + + if isinstance(buf, str) and _looks_like_http(buf): + host3 = _http_host_from_buf(buf) + if host3: + _add_http_host(self.http_host_map, host3, pinfo, sock=sock) + + # 3. DNS Intents + if api in DNS_APIS: + domain = _norm_domain(_extract_domain_from_call(call, args_map)) + if domain: + ts_epoch = _parse_behavior_ts(call.get("timestamp")) + self.dns_intents[domain].append( + { + "process": dict(pinfo), + "ts_epoch": ts_epoch, + "api": api, + } + ) + + def run(self): + # Sort DNS intents by timestamp + for d in list(self.dns_intents.keys()): + self.dns_intents[d].sort(key=lambda x: (x["ts_epoch"] is None, x["ts_epoch"] or 0.0)) + + # We need to return dicts with string keys for JSON serialization + # endpoint_map keys are (ip, port) tuples. Convert to "ip:port" strings? + # Or list of objects? + # Actually, if we store this in behavior result, it will be saved to report.json/bson. + # BSON/JSON keys must be strings. + # Let's convert tuple keys to string representation "ip:port" + + endpoint_map_str = {} + for (ip, port), entries in self.endpoint_map.items(): + endpoint_map_str[f"{ip}:{port}"] = entries + + return { + "endpoint_map": endpoint_map_str, + "http_host_map": self.http_host_map, + "dns_intents": self.dns_intents, + } + + class EncryptedBuffers: """Generates summary information.""" @@ -1293,6 +1420,7 @@ def run(self): Summary(self.options), Enhanced(), EncryptedBuffers(), + NetworkMap(), ] enabled_instances = [instance for instance in instances if getattr(self.options, instance.key, True)] diff --git a/modules/processing/network.py b/modules/processing/network.py index 2ba4000f6d6..7d3346aa39a 100644 --- a/modules/processing/network.py +++ b/modules/processing/network.py @@ -17,11 +17,12 @@ import tempfile import traceback from base64 import b64encode -from collections import OrderedDict, namedtuple +from collections import OrderedDict, namedtuple, defaultdict from contextlib import suppress from hashlib import md5, sha1, sha256 from itertools import islice from json import loads +from typing import Any, Dict, List, Optional from urllib.parse import urlunparse import cachetools.func @@ -35,6 +36,7 @@ from lib.cuckoo.common.dns import resolve from lib.cuckoo.common.exceptions import CuckooProcessingError from lib.cuckoo.common.irc import ircMessage +from lib.cuckoo.common.network_utils import _norm_domain from lib.cuckoo.common.objects import File from lib.cuckoo.common.path_utils import path_delete, path_exists, path_mkdir, path_read_file, path_write_file from lib.cuckoo.common.safelist import is_safelisted_domain @@ -43,6 +45,9 @@ # from lib.cuckoo.common.safelist import is_safelisted_ip log = logging.getLogger(__name__) + + + try: import re2 as re except ImportError: @@ -1109,6 +1114,287 @@ def _import_ja3_fprints(self): return ja3_fprints + def _load_network_map(self) -> Dict: + with suppress(Exception): + return self.results.get("behavior", {}).get("network_map") or {} + return {} + + def _reconstruct_endpoint_map(self, raw_map: Dict[str, List[Dict]]) -> Dict[tuple, List[Dict]]: + """ + Convert JSON-friendly "ip:port" keys back to (ip, int(port)) tuples. + """ + endpoint_map = {} + for key, val in raw_map.items(): + try: + ip, port_str = key.rsplit(":", 1) + port = int(port_str) + endpoint_map[(ip, port)] = val + except (ValueError, IndexError): + continue + return endpoint_map + + def _pick_best(self, candidates: List[Dict]) -> Optional[Dict]: + if not candidates: + return None + + for c in candidates: + if c.get("process_name"): + return c + + return candidates[0] + + def _match_dns_process(self, dns_entry: Dict, dns_intents: Dict, max_skew_seconds: float = 10.0) -> Optional[Dict]: + """ + Match a network.dns entry to the closest behavior DNS intent by: + - same domain + - closest timestamp (if both sides have timestamps) + + Returns process dict or None. + """ + req = _norm_domain(dns_entry.get("request")) + if not req: + return None + + candidates = dns_intents.get(req) or [] + if not candidates: + return None + + net_ts = dns_entry.get("first_seen") + if not isinstance(net_ts, (int, float)): + return candidates[0].get("process") + + best = None + best_delta = None + + for c in candidates: + bts = c.get("ts_epoch") + if not isinstance(bts, (int, float)): + continue + + delta = abs(net_ts - bts) + if best is None or delta < best_delta: + best = c + best_delta = delta + + if best is not None and best_delta is not None and best_delta <= max_skew_seconds: + return best.get("process") + + return candidates[0].get("process") + + def _pcap_first_epoch(self, network: Dict) -> Optional[float]: + ts = [] + for k in ("dns", "http"): + for e in network.get(k) or []: + v = e.get("first_seen") + if isinstance(v, (int, float)): + ts.append(float(v)) + return min(ts) if ts else None + + def _build_dns_events_rel(self, network: Dict, dns_intents: Dict, max_skew_seconds: float = 10.0) -> List[Dict]: + """ + Returns a list of dns events: + [{"t_rel": float, "process": {...}|None, "request": "example.com"}] + """ + out = [] + first_epoch = self._pcap_first_epoch(network) + if first_epoch is None: + return out + + for d in network.get("dns") or []: + first_seen = d.get("first_seen") + if not isinstance(first_seen, (int, float)): + continue + t_rel = float(first_seen) - float(first_epoch) + proc = self._match_dns_process(d, dns_intents, max_skew_seconds=max_skew_seconds) + out.append({"t_rel": t_rel, "process": proc, "request": d.get("request")}) + + out.sort(key=lambda x: x["t_rel"]) + return out + + def _nearest_dns_process_by_rel_time(self, dns_events_rel: List[Dict], t_rel: Any, max_skew: float = 5.0) -> Optional[Dict]: + if not dns_events_rel or not isinstance(t_rel, (int, float)): + return None + + best = None + best_delta = None + for e in dns_events_rel: + delta = abs(e["t_rel"] - float(t_rel)) + if best is None or delta < best_delta: + best = e + best_delta = delta + + if best is not None and best_delta is not None and best_delta <= max_skew: + return best.get("process") + return None + + def _set_proc_fields(self, obj: Dict, proc: Optional[Dict]): + """ + Add process_id/process_name onto an existing network entry. + If proc is None, sets them to None (keeps template stable). + """ + if proc: + obj["process_id"] = proc.get("process_id") + obj["process_name"] = proc.get("process_name") + else: + obj["process_id"] = None + obj["process_name"] = None + + def _process_map(self, network: Dict): + net_map = self._load_network_map() + + if not network or not net_map: + return + + endpoint_map = self._reconstruct_endpoint_map(net_map.get("endpoint_map", {})) + http_host_map = net_map.get("http_host_map", {}) + dns_intents = net_map.get("dns_intents", {}) + + for flow in network.get("tcp") or []: + proc = None + if flow.get("dst") and flow.get("dport") is not None: + proc = self._pick_best(endpoint_map.get((flow["dst"], int(flow["dport"])), [])) + + self._set_proc_fields(flow, proc) + + dns_events_rel = self._build_dns_events_rel(network, dns_intents, max_skew_seconds=10.0) + for d in network.get("dns") or []: + proc = self._match_dns_process(d, dns_intents, max_skew_seconds=10.0) + self._set_proc_fields(d, proc) + + for flow in network.get("udp") or []: + proc = None + dst = flow.get("dst") + dport = flow.get("dport") + sport = flow.get("sport") + + if dst and dport is not None: + proc = self._pick_best(endpoint_map.get((dst, int(dport)), [])) + + if not proc and (dport == 53 or sport == 53): + t_rel = flow.get("time") + proc = self._nearest_dns_process_by_rel_time(dns_events_rel, t_rel, max_skew=5.0) + + self._set_proc_fields(flow, proc) + + for key in ("http", "http_ex", "https_ex"): + for h in network.get(key) or []: + proc = None + + host = h.get("host") + if isinstance(host, str) and host: + # Normalize key for lookup + norm_host = _norm_domain(host) + if norm_host: + proc = self._pick_best(http_host_map.get(norm_host, [])) + + # Try fallback to IP if host lookup failed or wasn't present, + # but only if original logic supported it. + if not proc and ":" in host: + raw = host.rsplit(":", 1)[0].strip() + norm_raw = _norm_domain(raw) + if norm_raw: + proc = self._pick_best(http_host_map.get(norm_raw, [])) + + if not proc: + dst = h.get("dst") + dport = h.get("dport") + if dst and dport is not None: + proc = self._pick_best(endpoint_map.get((dst, int(dport)), [])) + + self._set_proc_fields(h, proc) + + # Aggregate process information for the 'hosts' summary + ip_to_procs = defaultdict(dict) + for flow_type in ("tcp", "udp"): + for flow in network.get(flow_type, []): + if flow.get("process_id") and flow.get("dst"): + ip_to_procs[flow["dst"]][flow["process_id"]] = flow.get("process_name", "Unknown") + + for host in network.get("hosts", []): + procs = ip_to_procs.get(host["ip"]) + if procs: + if len(procs) == 1: + pid, name = list(procs.items())[0] + host["process_id"] = pid + host["process_name"] = name + else: + host["process_name"] = ", ".join(f"{name} ({pid})" for pid, name in procs.items()) + host["process_id"] = None + + def _merge_behavior_network(self, results): + """ + Merge network events found in behavior logs but missing in PCAP. + Marks them with source='behavior'. + """ + net_map = self._load_network_map() + if not net_map: + return + + network = results.get("network", {}) + + # 1. DNS + dns_intents = net_map.get("dns_intents", {}) + existing_dns = {_norm_domain(d.get("request")) for d in network.get("dns", []) if d.get("request")} + + for domain, intents in dns_intents.items(): + if domain not in existing_dns: + first_intent = intents[0] + proc = first_intent.get("process", {}) + entry = { + "request": domain, + "answers": [], + "type": "A", + "source": "behavior", + "process_id": proc.get("process_id"), + "process_name": proc.get("process_name"), + "time": first_intent.get("ts_epoch"), + } + network.setdefault("dns", []).append(entry) + + # 2. HTTP + http_host_map = net_map.get("http_host_map", {}) + existing_hosts = {h.get("host") for h in network.get("http", [])} + http_events = (network.get("http", []) or []) + (network.get("http_ex", []) or []) + (network.get("https_ex", []) or []) + existing_hosts = {_norm_domain(h.get("host")) for h in http_events if h.get("host")} + for host, procs in http_host_map.items(): + if host not in existing_hosts: + proc = procs[0] if procs else {} + entry = { + "host": host, + "port": 80, + "uri": "/", + "method": "GET", + "source": "behavior", + "process_id": proc.get("process_id"), + "process_name": proc.get("process_name"), + } + network.setdefault("http", []).append(entry) + + # 3. Connections (TCP/UDP) + endpoint_map = self._reconstruct_endpoint_map(net_map.get("endpoint_map", {})) + + existing_endpoints = set() + for t in network.get("tcp", []): + existing_endpoints.add((t.get("dst"), t.get("dport"))) + for u in network.get("udp", []): + existing_endpoints.add((u.get("dst"), u.get("dport"))) + + for (ip, port), procs in endpoint_map.items(): + if (ip, port) not in existing_endpoints: + proc = procs[0] if procs else {} + entry = { + "src": "behavior", + "sport": 0, + "dst": ip, + "dport": port, + "source": "behavior", + "process_id": proc.get("process_id"), + "process_name": proc.get("process_name"), + } + # Heuristic: DNS is usually UDP, HTTP/others usually TCP + target_list = "udp" if port == 53 else "tcp" + network.setdefault(target_list, []).append(entry) + def run(self): if not path_exists(self.pcap_path): log.debug('The PCAP file does not exist at path "%s"', self.pcap_path) @@ -1149,6 +1435,11 @@ def run(self): except Exception: log.exception("Error running httpreplay-based PCAP analysis") + if proc_cfg.network.process_map: + self._process_map(results) + if proc_cfg.network.merge_behavior_map: + self._merge_behavior_network(results) + return results def get_tlsmaster(self): diff --git a/modules/processing/network_process_map.py b/modules/processing/network_process_map.py deleted file mode 100644 index 4d692d1f82e..00000000000 --- a/modules/processing/network_process_map.py +++ /dev/null @@ -1,585 +0,0 @@ -# Copyright (C) 2010-2015 Cuckoo Foundation. -# This file is part of Cuckoo Sandbox - http://www.cuckoosandbox.org -# See the file 'docs/LICENSE' for copying permission. - -import logging -from collections import defaultdict -from contextlib import suppress -from datetime import datetime -from urllib.parse import urlparse - -from lib.cuckoo.common.abstracts import Processing - -log = logging.getLogger(__name__) - - -DNS_APIS = { - "getaddrinfo", - "getaddrinfow", - "getaddrinfoex", - "getaddrinfoexw", - "gethostbyname", - "gethostbynamew", - "dnsquery_a", - "dnsquery_w", - "dnsqueryex", - "dnsquery", -} - - -HTTP_HINT_APIS = { - "internetcrackurla", - "internetcrackurlw", - "httpsendrequesta", - "httpsendrequestw", - "internetsendrequesta", - "internetsendrequestw", - "internetconnecta", - "internetconnectw", - "winhttpopenrequest", - "winhttpsendrequest", - "winhttpconnect", - "winhttpopen", - "internetopenurla", - "internetopenurlw", - "httpopenrequesta", - "httpopenrequestw", -} - - -TLS_HINT_APIS = { - "sslencryptpacket", - "ssldecryptpacket", - "initializesecuritycontexta", - "initializesecuritycontextw", - "initializesecuritycontextexa", - "initializesecuritycontextexw", - "acceptsecuritycontext", -} - - -def _norm_domain(d): - if not d or not isinstance(d, str): - return None - d = d.strip().strip(".").lower() - return d or None - - -def _parse_behavior_ts(ts_str): - """ - Parse behavior timestamp like: '2026-01-22 23:46:58,199' -> epoch float - Returns None if parsing fails. - """ - if not ts_str or not isinstance(ts_str, str): - return None - with suppress(Exception): - dt = datetime.strptime(ts_str, "%Y-%m-%d %H:%M:%S,%f") - return dt.timestamp() - return None - - -def _extract_domain_from_call(call): - for name in ( - "hostname", "host", "node", "nodename", "name", "domain", - "szName", "pszName", "lpName", "query", "queryname", "dns_name", - "QueryName", "lpstrName", "pName" - ): - v = _get_arg(call, name) - if isinstance(v, str) and v.strip(): - return v - - for a in call.get("arguments", []) or []: - v = a.get("value") - if isinstance(v, str): - s = v.strip() - if "." in s and " " not in s and s.count(".") <= 10: - return s - - return None - - -def _get_arg(call, name): - for a in call.get("arguments", []) or []: - if a.get("name") == name: - return a.get("value") - return None - - -def _get_arg_any(call, *names): - """Return the first matching argument value for any of the provided names.""" - for n in names: - v = _get_arg(call, n) - if v is not None: - return v - return None - - -def _norm_ip(ip): - if ip is None: - return None - if not isinstance(ip, str): - ip = str(ip) - ip = ip.strip() - return ip or None - - -def _looks_like_http(buf): - if not buf or not isinstance(buf, str): - return False - - first = buf.splitlines()[0].strip() if buf else "" - if not first: - return False - - u = first.upper() - if u.startswith("HTTP/1.") or u.startswith("HTTP/2"): - return True - - methods = ( - "GET ", "POST ", "HEAD ", "PUT ", "DELETE ", "OPTIONS ", "PATCH ", "TRACE " - ) - if any(u.startswith(m) for m in methods) and " HTTP/1." in u: - return True - - if u.startswith("CONNECT ") and " HTTP/1." in u: - return True - - return False - - -def _http_host_from_buf(buf): - if not buf or not isinstance(buf, str): - return None - - lines = buf.splitlines() - if not lines: - return None - - for line in lines[1:50]: - if line.lower().startswith("host:"): - return line.split(":", 1)[1].strip() - - with suppress(Exception): - first = lines[0].strip() - parts = first.split() - if len(parts) >= 2: - target = parts[1].strip() - url = _extract_first_url(target) - if url: - host = _host_from_url(url) - if host: - return host - - with suppress(Exception): - first = lines[0].strip() - parts = first.split() - if len(parts) >= 2 and parts[0].upper() == "CONNECT": - return parts[1].strip() - - return None - - -def _safe_int(x): - with suppress(Exception): - return int(x) - return None - - -def _host_from_url(url): - if not url or not isinstance(url, str): - return None - - with suppress(Exception): - u = urlparse(url) - return u.hostname - - return None - - -def _extract_first_url(text): - if not text or not isinstance(text, str): - return None - s = text.strip() - for scheme in ("http://", "https://"): - idx = s.lower().find(scheme) - if idx != -1: - return s[idx:].split()[0].strip('"\',') - return None - - -def _norm_hostkey(host): - if not host or not isinstance(host, str): - return None - h = host.strip().strip(".").lower() - return h or None - - -def _add_http_host(http_host_map, host, pinfo, sock=None): - """ - Store host keys in a stable way. - Adds: - - normalized host - - if host is host:port and port parses, also normalized host-only - """ - hk = _norm_hostkey(host) - if not hk: - return - - entry = dict(pinfo) - if sock is not None: - entry["socket"] = sock - - http_host_map[hk].append(entry) - - if ":" in hk: - h_only, p = hk.rsplit(":", 1) - if _safe_int(p) is not None and h_only: - http_host_map[h_only].append(entry) - - -def _extract_tls_server_name(call): - """ - Best-effort server name extraction for TLS/SChannel/SSPI. - Common arg names seen in hooks vary; keep it conservative. - """ - for name in ( - "sni", "SNI", - "ServerName", "servername", "server_name", - "TargetName", "targetname", - "Host", "host", "hostname", - "Url", "URL", "url", - ): - v = _get_arg(call, name) - if isinstance(v, str) and v.strip(): - s = v.strip() - u = _extract_first_url(s) - if u: - return _host_from_url(u) or s - if "." in s and " " not in s and len(s) < 260: - return s - - for a in call.get("arguments", []) or []: - v = a.get("value") - if isinstance(v, str): - s = v.strip() - if "." in s and " " not in s and len(s) < 260: - u = _extract_first_url(s) - if u: - return _host_from_url(u) or s - return s - - return None - - -class NetworkProcessMap(Processing): - """ - Augment existing results["network"] entries with process attribution fields. - - Adds (when available): - - process_id - - process_name - - No separate network_process_map output is produced. - """ - - order = 5 - - def _load_behavior(self): - with suppress(Exception): - b = self.results.get("behavior") - if b: - return b - - return None - - def _load_network(self): - with suppress(Exception): - return self.results.get("network") or {} - - return {} - - def _build_endpoint_to_process_map(self, behavior): - """ - Build: - - endpoint_map[(ip, port)] -> [{process_id, process_name, socket?}, ...] - - http_host_map[host] -> [{process_id, process_name, socket?}, ...] - """ - endpoint_map = defaultdict(list) - http_host_map = defaultdict(list) - - if not behavior: - return endpoint_map, http_host_map - - for p in (behavior.get("processes") or []): - pid = p.get("process_id") - if pid is None: - continue - - pinfo = { - "process_id": pid, - "process_name": p.get("process_name", ""), - } - - for c in p.get("calls", []): - if c.get("category") != "network": - continue - - api = (c.get("api") or "").lower() - sock = _get_arg_any(c, "socket", "sock", "fd", "handle") - ip = _norm_ip(_get_arg_any(c, "ip", "dst", "dstip", "ip_address", "address", "remote_ip", "server")) - port = _get_arg_any(c, "port", "dport", "dstport", "remote_port", "server_port") - buf = _get_arg_any(c, "Buffer", "buffer", "buf", "data") - - if api in ("connect", "wsaconnect", "connectex", "sendto", "wsasendto", "recvfrom", "wsarecvfrom"): - p_int = _safe_int(port) - if ip and p_int is not None: - entry = dict(pinfo) - if sock is not None: - entry["socket"] = sock - - endpoint_map[(ip, p_int)].append(entry) - - if api in ("connect", "wsaconnect", "connectex"): - continue - - if api in ("send", "wsasend", "sendto", "wsasendto") and _looks_like_http(buf): - host = _http_host_from_buf(buf) - if host: - _add_http_host(http_host_map, host, pinfo, sock=sock) - - if api in HTTP_HINT_APIS: - url = _get_arg_any(c, "url", "lpszUrl", "lpUrl", "uri", "pszUrl", "pUrl") - if isinstance(url, str) and url.strip(): - u = _extract_first_url(url) or url.strip() - host = _host_from_url(u) - if host: - _add_http_host(http_host_map, host, pinfo, sock=sock) - - if isinstance(buf, str): - u2 = _extract_first_url(buf) - if u2: - host2 = _host_from_url(u2) - if host2: - _add_http_host(http_host_map, host2, pinfo, sock=sock) - - if api in TLS_HINT_APIS: - sni = _extract_tls_server_name(c) - if sni: - _add_http_host(http_host_map, sni, pinfo, sock=sock) - - if isinstance(buf, str) and _looks_like_http(buf): - host3 = _http_host_from_buf(buf) - if host3: - _add_http_host(http_host_map, host3, pinfo, sock=sock) - - return endpoint_map, http_host_map - - def _pick_best(self, candidates): - if not candidates: - return None - - for c in candidates: - if c.get("process_name"): - return c - - return candidates[0] - - def _build_dns_intents(self, behavior): - """ - Build: domain -> list of {process info + ts_epoch} - """ - intents = defaultdict(list) - if not behavior: - return intents - - for p in (behavior.get("processes") or []): - pid = p.get("process_id") - if pid is None: - continue - - pinfo = { - "process_id": pid, - "process_name": p.get("process_name", ""), - } - - for c in p.get("calls", []): - if c.get("category") != "network": - continue - - api = (c.get("api") or "").lower() - if api not in DNS_APIS: - continue - - domain = _norm_domain(_extract_domain_from_call(c)) - if not domain: - continue - - ts_epoch = _parse_behavior_ts(c.get("timestamp")) - intents[domain].append( - { - "process": dict(pinfo), - "ts_epoch": ts_epoch, - "api": api, - } - ) - - for d in list(intents.keys()): - intents[d].sort(key=lambda x: (x["ts_epoch"] is None, x["ts_epoch"] or 0.0)) - - return intents - - def _match_dns_process(self, dns_entry, dns_intents, max_skew_seconds=10.0): - """ - Match a network.dns entry to the closest behavior DNS intent by: - - same domain - - closest timestamp (if both sides have timestamps) - - Returns process dict or None. - """ - req = _norm_domain(dns_entry.get("request")) - if not req: - return None - - candidates = dns_intents.get(req) or [] - if not candidates: - return None - - net_ts = dns_entry.get("first_seen") - if not isinstance(net_ts, (int, float)): - return candidates[0].get("process") - - best = None - best_delta = None - - for c in candidates: - bts = c.get("ts_epoch") - if not isinstance(bts, (int, float)): - continue - - delta = abs(net_ts - bts) - if best is None or delta < best_delta: - best = c - best_delta = delta - - if best is not None and best_delta is not None and best_delta <= max_skew_seconds: - return best.get("process") - - return candidates[0].get("process") - - def _pcap_first_epoch(self, network): - ts = [] - for k in ("dns", "http"): - for e in (network.get(k) or []): - v = e.get("first_seen") - if isinstance(v, (int, float)): - ts.append(float(v)) - return min(ts) if ts else None - - def _build_dns_events_rel(self, network, dns_intents, max_skew_seconds=10.0): - """ - Returns a list of dns events: - [{"t_rel": float, "process": {...}|None, "request": "example.com"}] - """ - out = [] - first_epoch = self._pcap_first_epoch(network) - if first_epoch is None: - return out - - for d in (network.get("dns") or []): - first_seen = d.get("first_seen") - if not isinstance(first_seen, (int, float)): - continue - t_rel = float(first_seen) - float(first_epoch) - proc = self._match_dns_process(d, dns_intents, max_skew_seconds=max_skew_seconds) - out.append({"t_rel": t_rel, "process": proc, "request": d.get("request")}) - - out.sort(key=lambda x: x["t_rel"]) - return out - - def _nearest_dns_process_by_rel_time(self, dns_events_rel, t_rel, max_skew=5.0): - if not dns_events_rel or not isinstance(t_rel, (int, float)): - return None - - best = None - best_delta = None - for e in dns_events_rel: - delta = abs(e["t_rel"] - float(t_rel)) - if best is None or delta < best_delta: - best = e - best_delta = delta - - if best is not None and best_delta is not None and best_delta <= max_skew: - return best.get("process") - return None - - def _set_proc_fields(self, obj, proc): - """ - Add process_id/process_name onto an existing network entry. - If proc is None, sets them to None (keeps template stable). - """ - if proc: - obj["process_id"] = proc.get("process_id") - obj["process_name"] = proc.get("process_name") - else: - obj["process_id"] = None - obj["process_name"] = None - - def run(self): - behavior = self._load_behavior() - network = self._load_network() - - endpoint_map, http_host_map = self._build_endpoint_to_process_map(behavior) - - for flow in (network.get("tcp") or []): - proc = None - if flow.get("dst") and flow.get("dport") is not None: - proc = self._pick_best(endpoint_map.get((flow["dst"], int(flow["dport"])), [])) - - self._set_proc_fields(flow, proc) - - dns_intents = self._build_dns_intents(behavior) - dns_events_rel = self._build_dns_events_rel(network, dns_intents, max_skew_seconds=10.0) - for d in (network.get("dns") or []): - proc = self._match_dns_process(d, dns_intents, max_skew_seconds=10.0) - self._set_proc_fields(d, proc) - - for flow in (network.get("udp") or []): - proc = None - dst = flow.get("dst") - dport = flow.get("dport") - sport = flow.get("sport") - - if dst and dport is not None: - proc = self._pick_best(endpoint_map.get((dst, int(dport)), [])) - - if not proc and (dport == 53 or sport == 53): - t_rel = flow.get("time") - proc = self._nearest_dns_process_by_rel_time(dns_events_rel, t_rel, max_skew=5.0) - - self._set_proc_fields(flow, proc) - - for key in ("http", "http_ex", "https_ex"): - for h in (network.get(key) or []): - proc = None - - host = h.get("host") - if isinstance(host, str) and host: - proc = self._pick_best(http_host_map.get(host, [])) - - if not proc and ":" in host: - raw = host.rsplit(":", 1)[0].strip() - if raw: - proc = self._pick_best(http_host_map.get(raw, [])) - - if not proc: - dst = h.get("dst") - dport = h.get("dport") - if dst and dport is not None: - proc = self._pick_best(endpoint_map.get((dst, int(dport)), [])) - - self._set_proc_fields(h, proc) - - self.results.setdefault("network", {}) - self.results["network"] = network - - return {} diff --git a/web/templates/analysis/network/_dns.html b/web/templates/analysis/network/_dns.html index 0c914942d53..b9102616b5b 100644 --- a/web/templates/analysis/network/_dns.html +++ b/web/templates/analysis/network/_dns.html @@ -14,7 +14,7 @@
DNS Reque {% for p in network.dns %} - {{p.request}} [VT] + {{p.request}} {% if p.source == "behavior" %}behavior{% endif %} [VT] {% if config.display_et_portal %} [ET] {% endif %} diff --git a/web/templates/analysis/network/_dns_not_ajax.html b/web/templates/analysis/network/_dns_not_ajax.html index 6935b613021..c0ae1d94788 100644 --- a/web/templates/analysis/network/_dns_not_ajax.html +++ b/web/templates/analysis/network/_dns_not_ajax.html @@ -5,10 +5,11 @@ Name Response Post-Analysis Lookup + {% if settings.NETWORK_PROC_MAP %} Process Name (PID) {% endif %} {% for p in analysis.network.dns %} - {{p.request}} [VT] + {{p.request}} {% if p.source == "behavior" %}behavior{% endif %} [VT] {% if config.display_et_portal %} [ET] {% endif %} @@ -48,6 +49,15 @@ {% endif %} {% endif %} + {% if settings.NETWORK_PROC_MAP %} + + {% if p.process_name %} + {{ p.process_name }}{% if p.process_id %} ({{ p.process_id }}){% endif %} + {% else %} + - + {% endif %} + + {% endif %} {% endfor %} diff --git a/web/templates/analysis/network/_hosts.html b/web/templates/analysis/network/_hosts.html index 31d687d1154..c1eb25adcca 100644 --- a/web/templates/analysis/network/_hosts.html +++ b/web/templates/analysis/network/_hosts.html @@ -11,6 +11,7 @@
HostsIP Country Name ASN + {% if settings.NETWORK_PROC_MAP %} Process Name (PID) {% endif %} {% for host in network.hosts %} @@ -34,6 +35,15 @@
Hosts{{host.asn}}{% if host.asn_name %} - {{host.asn_name}}{% endif %} {% endif %} + {% if settings.NETWORK_PROC_MAP %} + + {% if host.process_name %} + {{ host.process_name }}{% if host.process_id %} ({{ host.process_id }}){% endif %} + {% else %} + - + {% endif %} + + {% endif %} {% endif %} {% endfor %} diff --git a/web/templates/analysis/network/_hosts_not_ajax.html b/web/templates/analysis/network/_hosts_not_ajax.html index e5b81ff087e..fc6b177f6d0 100644 --- a/web/templates/analysis/network/_hosts_not_ajax.html +++ b/web/templates/analysis/network/_hosts_not_ajax.html @@ -6,6 +6,7 @@ IP Country Name ASN + {% if settings.NETWORK_PROC_MAP %} Process Name (PID) {% endif %} {% for host in analysis.network.hosts %} {% if host.ip|slice:":7" != "192.168" %} @@ -29,6 +30,15 @@ {% if host.asn %} {{host.asn}} {% endif %} + {% if settings.NETWORK_PROC_MAP %} + + {% if host.process_name %} + {{ host.process_name }}{% if host.process_id %} ({{ host.process_id }}){% endif %} + {% else %} + - + {% endif %} + + {% endif %} {% endif %} {% endfor %} diff --git a/web/templates/analysis/network/_http.html b/web/templates/analysis/network/_http.html index 922bfe1297c..af715355e64 100644 --- a/web/templates/analysis/network/_http.html +++ b/web/templates/analysis/network/_http.html @@ -72,7 +72,7 @@
HTTP Re {% for request in network.http %} - {{request.uri}} + {{request.uri}} {% if request.source == "behavior" %}behavior{% endif %}
{{request.data}}
{% if settings.NETWORK_PROC_MAP %} diff --git a/web/templates/analysis/network/_icmp.html b/web/templates/analysis/network/_icmp.html index d4bba980723..526cf7db7af 100644 --- a/web/templates/analysis/network/_icmp.html +++ b/web/templates/analysis/network/_icmp.html @@ -10,13 +10,23 @@
ICMP Tra Destination ICMP Type Data + {% if settings.NETWORK_PROC_MAP %} Process Name (PID) {% endif %} {% for packet in network.icmp %} - {{packet.src}} + {{packet.src}} {% if packet.source == "behavior" %}behavior{% endif %} {{packet.dst}} {{packet.type}} {{packet.data}} + {% if settings.NETWORK_PROC_MAP %} + + {% if packet.process_name %} + {{ packet.process_name }}{% if packet.process_id %} ({{ packet.process_id }}){% endif %} + {% else %} + - + {% endif %} + + {% endif %} {% endfor %} diff --git a/web/templates/analysis/network/_irc.html b/web/templates/analysis/network/_irc.html index 41631dfdc6a..e6d2961f209 100644 --- a/web/templates/analysis/network/_irc.html +++ b/web/templates/analysis/network/_irc.html @@ -9,12 +9,22 @@
IRC Tr Command Params Type + {% if settings.NETWORK_PROC_MAP %} Process Name (PID) {% endif %} {% for irc in network.irc %} - {{irc.command}} + {{irc.command}} {% if irc.source == "behavior" %}behavior{% endif %} {{irc.params}} {{irc.type}} + {% if settings.NETWORK_PROC_MAP %} + + {% if irc.process_name %} + {{ irc.process_name }}{% if irc.process_id %} ({{ irc.process_id }}){% endif %} + {% else %} + - + {% endif %} + + {% endif %} {% endfor %} diff --git a/web/templates/analysis/network/_smtp.html b/web/templates/analysis/network/_smtp.html index d99fed0f263..a0971a15ee4 100644 --- a/web/templates/analysis/network/_smtp.html +++ b/web/templates/analysis/network/_smtp.html @@ -8,10 +8,11 @@
SMTP T Destination Data + {% if settings.NETWORK_PROC_MAP %} Process Name (PID) {% endif %} {% for req in network.smtp_ex %} - {{req.dst}}:{{req.dport}} + {{req.dst}}:{{req.dport}} {% if req.source == "behavior" %}behavior{% endif %} {% if req %} @@ -42,6 +43,15 @@
SMTP T
+ {% if settings.NETWORK_PROC_MAP %} + + {% if req.process_name %} + {{ req.process_name }}{% if req.process_id %} ({{ req.process_id }}){% endif %} + {% else %} + - + {% endif %} + + {% endif %} {% endfor %} @@ -52,11 +62,21 @@
SMTP T Destination Data + {% if settings.NETWORK_PROC_MAP %} Process Name (PID) {% endif %} {% for req in network.smtp %} - {{req.dst}} + {{req.dst}} {% if req.source == "behavior" %}behavior{% endif %}
{{req.raw}}
+ {% if settings.NETWORK_PROC_MAP %} + + {% if req.process_name %} + {{ req.process_name }}{% if req.process_id %} ({{ req.process_id }}){% endif %} + {% else %} + - + {% endif %} + + {% endif %} {% endfor %} diff --git a/web/templates/analysis/network/_tcp.html b/web/templates/analysis/network/_tcp.html index ed256c02757..54f03cf8832 100644 --- a/web/templates/analysis/network/_tcp.html +++ b/web/templates/analysis/network/_tcp.html @@ -18,7 +18,7 @@
T {% for p in network.tcp %} - {{p.src}} + {{p.src}} {% if p.source == "behavior" %}behavior{% endif %} {{p.sport}} {{p.dst}} {% if network.iplookups %} diff --git a/web/templates/analysis/network/_udp.html b/web/templates/analysis/network/_udp.html index d8a55c606fd..1824f182956 100644 --- a/web/templates/analysis/network/_udp.html +++ b/web/templates/analysis/network/_udp.html @@ -18,7 +18,7 @@
U {% for p in network.udp %} - {{p.src}} + {{p.src}} {% if p.source == "behavior" %}behavior{% endif %} {{p.sport}} {{p.dst}} {% if network.iplookups %} diff --git a/web/web/settings.py b/web/web/settings.py index 78b48c40d4c..b285414c582 100644 --- a/web/web/settings.py +++ b/web/web/settings.py @@ -98,7 +98,7 @@ ADMIN = web_cfg.admin.enabled ANON_VIEW = web_cfg.general.anon_viewable ALLOW_DL_REPORTS_TO_ALL = web_cfg.general.reports_dl_allowed_to_all -NETWORK_PROC_MAP = pro_cfg.network_process_map.enabled +NETWORK_PROC_MAP = pro_cfg.network.process_map # If false run next command # python3 manage.py runserver_plus 0.0.0.0:8000 --traceback --keep-meta-shutdown