From 654729d23499385d3942c95aefc808d2f80736d2 Mon Sep 17 00:00:00 2001 From: doomedraven Date: Fri, 26 Sep 2025 16:56:17 +0200 Subject: [PATCH] GCS (#2712) * GCS --- conf/default/reporting.conf.default | 18 +++ docs/book/src/installation/host/gcs.rst | 46 ++++++++ docs/book/src/installation/host/index.rst | 1 + modules/reporting/gcs.py | 121 ++++++++++++++++++++ web/web/middleware/__init__.py | 1 + web/web/middleware/disable_auth_in_local.py | 26 +++++ 6 files changed, 213 insertions(+) create mode 100644 docs/book/src/installation/host/gcs.rst create mode 100644 modules/reporting/gcs.py create mode 100644 web/web/middleware/disable_auth_in_local.py diff --git a/conf/default/reporting.conf.default b/conf/default/reporting.conf.default index b40edc5ce0f..5797fba99da 100644 --- a/conf/default/reporting.conf.default +++ b/conf/default/reporting.conf.default @@ -216,3 +216,21 @@ enabled = no [browserext] enabled = no + +# Google Cloud Storage - Store all copy of analysis foldr in GCS +[gcs] +enabled = no +# The name of your Google Cloud Storage bucket where files will be uploaded. +bucket_name = your-gcs-bucket-name + +# Comma-separated list of DIRECTORY names to exclude from the upload. +# Good examples are 'shots' (contains all screenshots) or 'memory' (for full memory dumps). +exclude_dirs = logs, shots + +# Comma-separated list of exact FILENAMES to exclude from the upload. +# Good examples are large report formats you don't need in GCS. +exclude_files = + +# The absolute path to your Google Cloud service account JSON key file. +# This file is required for authentication. +credentials_path = data/gcp-credentials.json diff --git a/docs/book/src/installation/host/gcs.rst b/docs/book/src/installation/host/gcs.rst new file mode 100644 index 00000000000..6921640bed7 --- /dev/null +++ b/docs/book/src/installation/host/gcs.rst @@ -0,0 +1,46 @@ +.. _installation-and-setup: + +Installation and Setup +---------------------- + +Follow these steps to install and configure the GCS reporting module in your CAPE Sandbox environment. + +Prerequisites: Google Cloud Setup +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Before installing the module, you need to prepare your Google Cloud environment. + +1. **Create a GCS Bucket:** If you don't already have one, create a new bucket in the `Google Cloud Console `_. + +2. **Create a Service Account:** + * Go to **IAM & Admin** > **Service Accounts** in the Google Cloud Console. + * Click **Create Service Account** and give it a name (e.g., ``cape-sandbox-uploader``). + * Grant it the **Storage Object Creator** or **Storage Object Admin** role. This permission is necessary to write files to the bucket. + +3. **Download JSON Key:** + * After creating the service account, go to its **Keys** tab. + * Click **Add Key** > **Create new key**. + * Select ``JSON`` as the key type and click **Create**. A JSON file will be downloaded. + * **Securely move this JSON file to your CAPE server**, for example, to ``/opt/CAPEv2/data/gcp-credentials.json``. + + .. warning:: + Do not place the credentials file in a publicly accessible directory. + + +Module Installation and Configuration +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +1. **Install the Python Library:** + The module depends on the official Google Cloud Storage library. Install it within your CAPE virtual environment. + + .. note:: + Install dependency ``poetry run pip install google-cloud-storage``. + +2. **Update Configuration:** + * Edit ``/opt/CAPEv2/conf/reporting.conf``. + * ``[gcs]`` section, enable ``enabled=yes``. + * Set ``bucket_name`` to the name of your GCS bucket. + * Set ``credentials_path`` to the **absolute path** where you saved your service account JSON key file. + +3. **Restart CAPE-processor:** + Restart the CAPE service: ``systemctl restart cape-processor`` for the changes to take effect. diff --git a/docs/book/src/installation/host/index.rst b/docs/book/src/installation/host/index.rst index f2ff24c0a18..29b59773223 100644 --- a/docs/book/src/installation/host/index.rst +++ b/docs/book/src/installation/host/index.rst @@ -13,3 +13,4 @@ reference system for the commands examples. configuration routing cloud + gcs diff --git a/modules/reporting/gcs.py b/modules/reporting/gcs.py new file mode 100644 index 00000000000..2b8b39f442d --- /dev/null +++ b/modules/reporting/gcs.py @@ -0,0 +1,121 @@ +import os +import logging +from lib.cuckoo.common.constants import CUCKOO_ROOT +from lib.cuckoo.common.abstracts import Report +from lib.cuckoo.common.exceptions import CuckooReportError + +# Set up a logger for this module +log = logging.getLogger(__name__) + +try: + # Import the Google Cloud Storage client library + from google.cloud import storage + from google.oauth2 import service_account + + HAVE_GCS = True +except ImportError: + HAVE_GCS = False + + +class GCS(Report): + """ + Uploads all analysis files to a Google Cloud Storage (GCS) bucket. + """ + + # This Report module is not executed by default + order = 9999 + + def run(self, results): + """ + Run the Report module. + + Args: + results (dict): The analysis results dictionary. + """ + # Ensure the required library is installed + if not HAVE_GCS: + log.error( + "Failed to run GCS reporting module: the 'google-cloud-storage' " + "library is not installed. Please run 'poetry run pip install google-cloud-storage'." + ) + return + + # Read configuration options from gcs.conf + # Read configuration options from gcs.conf and validate them + bucket_name = self.options.get("bucket_name") + if not bucket_name: + raise CuckooReportError("GCS bucket_name is not configured in reporting.conf -> gcs") + + credentials_path_str = self.options.get("credentials_path") + if not credentials_path_str: + raise CuckooReportError("GCS credentials_path is not configured in reporting.conf -> gcs") + + credentials_path = os.path.join(CUCKOO_ROOT, credentials_path_str) + if not os.path.isfile(credentials_path): + raise CuckooReportError( + "GCS credentials_path '%s' is invalid or file does not exist in reporting.conf -> gcs", credentials_path + ) + + # Read the exclusion lists, defaulting to empty strings + exclude_dirs_str = self.options.get("exclude_dirs", "") + exclude_files_str = self.options.get("exclude_files", "") + + # --- NEW: Parse the exclusion strings into sets for efficient lookups --- + # The `if item.strip()` ensures we don't have empty strings from trailing commas + exclude_dirs = {item.strip() for item in exclude_dirs_str.split(",") if item.strip()} + exclude_files = {item.strip() for item in exclude_files_str.split(",") if item.strip()} + + if exclude_dirs: + log.debug("GCS reporting will exclude directories: %s", exclude_dirs) + if exclude_files: + log.debug("GCS reporting will exclude files: %s", exclude_files) + + try: + # --- Authentication --- + log.debug("Authenticating with Google Cloud Storage...") + credentials = service_account.Credentials.from_service_account_file(credentials_path) + storage_client = storage.Client(credentials=credentials) + bucket = storage_client.bucket(bucket_name) + + # Check if the bucket exists and is accessible + if not bucket.exists(): + raise CuckooReportError( + "The specified GCS bucket '%s' does not exist or you don't have permission to access it.", bucket_name + ) + + # --- File Upload --- + # Use the analysis ID as a "folder" in the bucket + analysis_id = results.get("info", {}).get("id") + if not analysis_id: + raise CuckooReportError("Could not get analysis ID from results.") + + log.debug("Uploading files for analysis ID %d to GCS bucket '%s'", analysis_id, bucket_name) + + # self.analysis_path is the path to the analysis results directory + # e.g., /opt/cape/storage/analyses/123/ + source_directory = self.analysis_path + + for root, dirs, files in os.walk(source_directory): + # We modify 'dirs' in-place to prevent os.walk from descending into them. + # This is the most efficient way to skip entire directory trees. + dirs[:] = [d for d in dirs if d not in exclude_dirs] + + for filename in files: + # --- NEW: File Exclusion Logic --- + if filename in exclude_files: + log.debug("Skipping excluded file: %s", os.path.join(root, filename)) + continue # Skip to the next file + + local_path = os.path.join(root, filename) + relative_path = os.path.relpath(local_path, source_directory) + blob_name = f"{analysis_id}/{relative_path}" + + log.debug("Uploading '%s' to '%s'", local_path, blob_name) + + blob = bucket.blob(blob_name) + blob.upload_from_filename(local_path) + + log.info("Successfully uploaded files for analysis %d to GCS.", analysis_id) + + except Exception as e: + raise CuckooReportError("Failed to upload report to GCS: %s", str(e)) diff --git a/web/web/middleware/__init__.py b/web/web/middleware/__init__.py index 45e59dff342..1872ed1b405 100644 --- a/web/web/middleware/__init__.py +++ b/web/web/middleware/__init__.py @@ -1,2 +1,3 @@ from .custom_auth import CustomAuth # noqa from .db_transaction import DBTransactionMiddleware # noqa +from .disable_auth_in_local import DisableAllauthMiddleware # noqa diff --git a/web/web/middleware/disable_auth_in_local.py b/web/web/middleware/disable_auth_in_local.py new file mode 100644 index 00000000000..df1e068cca3 --- /dev/null +++ b/web/web/middleware/disable_auth_in_local.py @@ -0,0 +1,26 @@ +from allauth.account.middleware import AccountMiddleware + + +class DisableAllauthMiddleware: + def __init__(self, get_response): + self.get_response = get_response + # Instantiate the real AllAuth middleware that we will be wrapping. + self.allauth_middleware = AccountMiddleware(get_response) + + def __call__(self, request): + # Get the remote IP address, handling proxies. + remote_ip = request.META.get("HTTP_X_FORWARDED_FOR", request.META.get("REMOTE_ADDR", "")).split(",")[0].strip() + + # Define the IPs for which we want to skip the middleware. + local_ips = ["127.0.0.1", "::1", "localhost"] + + if remote_ip in local_ips: + # The IP is local. Skip the AllAuth middleware by calling + # the next middleware/view in the chain directly. + print("Skipping AllAuth middleware for local request.") # Optional: for debugging + response = self.get_response(request) + return response + else: + # The IP is not local. Execute the AllAuth middleware as usual + # by calling its __call__ method. + return self.allauth_middleware(request)