diff --git a/python/libcuopt/pyproject.toml b/python/libcuopt/pyproject.toml index 5286c5252..e2c114252 100644 --- a/python/libcuopt/pyproject.toml +++ b/python/libcuopt/pyproject.toml @@ -53,7 +53,8 @@ libcuopt = "libcuopt" select = [ "distro-too-large-compressed", ] -max_allowed_size_compressed = '650M' + +max_allowed_size_compressed = '660M' [project.scripts] cuopt_cli = "libcuopt._cli_wrapper:main" diff --git a/regression/benchmark_scripts/benchmark.py b/regression/benchmark_scripts/benchmark.py new file mode 100644 index 000000000..ead8fe98a --- /dev/null +++ b/regression/benchmark_scripts/benchmark.py @@ -0,0 +1,382 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + + +import os +from multiprocessing import Process +import rmm +import time +import pandas as pd +import glob +import logging as log +from datetime import datetime +import argparse + +log.getLogger().setLevel(log.INFO) + + +def create_regression_markdown(data, regression_path, test_type_string): + regression_md_file = ( + regression_path + "/" + test_type_string + "_regressions.md" + ) + + md_data = "*No regressions*" + # This to reduce size of slack message + limit_no_of_regression_list = 5 + + if len(data) > 0: + status = "*!! Regressions found !!*" + end_msg = ( + "\n*Continues ...*" + if len(data) > limit_no_of_regression_list + else "" + ) + table = data[:limit_no_of_regression_list].to_string(index=False) + md_data = status + f"\n```\n{table}\n```" + end_msg + + with open(regression_md_file, "w") as fp: + fp.write(md_data) + + +def record_regressions( + test_name, data, req_metrics, regression_path, test_type_string +): + regression_file = ( + regression_path + "/" + test_type_string + "_regressions.csv" + ) + + regression_df = pd.DataFrame( + { + "Test Name": [], + "Metric Name": [], + "Value": [], + "Avg Value": [], + "Regression(%)": [], + } + ) + for name in req_metrics: + if name.startswith("bks_change_"): + pchange = data[name].iloc[-1].item() + metric_name = name.replace("bks_change_", "") + limit = req_metrics[metric_name]["bks"].get("threshold", 5) + prev_val_mean = pchange + latest_val = pchange + else: + limit = req_metrics[name].get("threshold", 5) + prev_val_mean = ( + data[name][:-1][-30:].mean().item() + if len(data) > 1 + else data[name].iloc[-1].item() + ) + latest_val = data[name].iloc[-1].item() + + if prev_val_mean == 0: + pchange = latest_val + else: + pchange = ((latest_val - prev_val_mean) / prev_val_mean) * 100 + + if abs(pchange) >= limit: + regression_df.loc[len(regression_df)] = [ + test_name, + name, + latest_val, + prev_val_mean, + pchange, + ] + + regression_df.to_csv(regression_file) + create_regression_markdown( + regression_df, regression_path, test_type_string + ) + + +def get_bks_change(metrics, required_metrics): + bks_metrics = {} + for metric, value in required_metrics.items(): + if "bks" in value.keys(): + bks = value["bks"]["value"] + if bks is None: + continue + current = metrics[metric] + if bks == 0: + bks_metrics["bks_change_" + metric] = abs(current) * 100 + elif current == 0: + bks_metrics["bks_change_" + metric] = abs(bks) * 100 + else: + bks_metrics["bks_change_" + metric] = abs( + ((current - bks) / bks) * 100 + ) + + return bks_metrics + + +def record_result( + test_name, metrics, required_metrics, csv_path, test_type_string +): + file_path = csv_path + "/" + if test_type_string == "lp" or test_type_string == "mip": + file_path += test_type_string + "_" + test_name + ".csv" + else: + file_path += test_name + ".csv" + bks_metrics = get_bks_change(metrics, required_metrics) + + # Add default metrics to data + required_metrics.update(bks_metrics) + metrics.update(bks_metrics) + req_metrics = list(required_metrics.keys()) + ["date_time", "git_commit"] + + current_data = pd.DataFrame( + {key: [metrics[key]] for key in sorted(req_metrics)} + ) + if os.path.isfile(file_path): + previous_data = pd.read_csv(file_path, index_col=0) + updated_data = pd.concat( + [previous_data, current_data], ignore_index=True + ) + else: + updated_data = current_data + record_regressions( + test_name, updated_data, required_metrics, csv_path, test_type_string + ) + updated_data.to_csv(file_path) + + +def run_benchmark( + test_name, + data_model, + solver_settings, + required_metrics, + csv_path, + git_commit, + test_status_file, + d_type, +): + import rmm + + mr = rmm.mr.get_current_device_resource() + + from utils import LPMetrics, RoutingMetrics + from cuopt import linear_programming + from cuopt import routing + + start_time = time.time() + if d_type == "lp" or d_type == "mip": + metrics = LPMetrics()._asdict() + solver_settings.set_parameter("infeasibility_detection", False) + solver_settings.set_parameter("time_limit", 60) + solution = linear_programming.Solve(data_model, solver_settings) + else: + metrics = RoutingMetrics()._asdict() + solution = routing.Solve(data_model) + end_time = time.time() + + metrics["gpu_memory_usage"] = int( + mr.allocation_counts.peak_bytes / (1024 * 1024) + ) + metrics["date_time"] = datetime.now().strftime("%m_%d_%Y_%H_%M_%S") + metrics["git_commit"] = git_commit + + success_status = False + + if d_type == "lp" or d_type == "mip": + ## Optimal solution + acceptable_termination = ["Optimal", "TimeLimit", "FeasibleFound"] + if solution.get_termination_reason() in acceptable_termination: + success_status = True + metrics["solver_time"] = solution.get_solve_time() + metrics["primal_objective_value"] = solution.get_primal_objective() + if d_type == "lp": + lp_stats = solution.get_lp_stats() + metrics["nb_iterations"] = lp_stats["nb_iterations"] + else: + milp_stats = solution.get_milp_stats() + metrics["mip_gap"] = milp_stats["mip_gap"] + metrics["max_constraint_violation"] = milp_stats[ + "max_constraint_violation" + ] + metrics["max_int_violation"] = milp_stats["max_int_violation"] + metrics["max_variable_bound_violation"] = milp_stats[ + "max_variable_bound_violation" + ] + record_result( + test_name, metrics, required_metrics, csv_path, d_type + ) + else: + if solution.get_status() == 0: + success_status = True + metrics["solver_time"] = end_time - start_time + metrics["total_objective_value"] = solution.get_total_objective() + metrics["vehicle_count"] = solution.get_vehicle_count() + + objectives = solution.get_objective_values() + if "prize" in required_metrics: + metrics["prize"] = objectives[routing.Objective.PRIZE] + if "cost" in required_metrics: + metrics["cost"] = objectives[routing.Objective.COST] + if "travel_time" in required_metrics: + metrics["travel_time"] = objectives[ + routing.Objective.TRAVEL_TIME + ] + record_result( + test_name, metrics, required_metrics, csv_path, d_type + ) + return "SUCCESS" if success_status is True else "FAILED" + + +def reinitialize_rmm(): + pool_size = 2**30 + rmm.reinitialize(pool_allocator=True, initial_pool_size=pool_size) + + base_mr = rmm.mr.get_current_device_resource() + stats_mr = rmm.mr.StatisticsResourceAdaptor(base_mr) + rmm.mr.set_current_device_resource(stats_mr) + + return base_mr, stats_mr + + +def worker( + gpu_id, + dataset_file_path, + csv_path, + git_commit, + log_path, + test_status_file, + n_gpus, + d_type="routing", +): + import os + + os.environ["CUDA_VISIBLE_DEVICES"] = gpu_id + + from utils import get_configuration + + data_files = [] + if d_type == "lp" or d_type == "mip": + data_files = glob.glob(dataset_file_path + "/*.mps") + else: + data_files = glob.glob(dataset_file_path + "/*_config.json") + idx = int(gpu_id) + n_files = 1 # len(data_files) + + while idx < n_files: + mr, stats_mr = reinitialize_rmm() + + data_file = data_files[idx] + test_name = str(data_file) + status = "FAILED" + try: + test_name, data_model, solver_settings, requested_metrics = ( + get_configuration(data_file, dataset_file_path, d_type) + ) + log.basicConfig( + level=log.INFO, + filename=log_path + "/" + test_name + "_log.txt", + filemode="a+", + format="%(asctime)-15s %(levelname)-8s %(message)s", + ) + log.getLogger().setLevel(log.INFO) + log.info( + f"------------- Test Start : {test_name} gpu id : {gpu_id} -------------------" + ) + status = run_benchmark( + test_name, + data_model, + solver_settings, + requested_metrics, + csv_path, + git_commit, + test_status_file, + d_type, + ) + except Exception as e: + log.error(str(e)) + + with open(test_status_file, "a") as f: + f.write("\n") + f.write(test_name + ": " + status) + + # Delete instance of rmm + del mr + del stats_mr + + log.info( + f"------------- Test End : {test_name} gpu id : {gpu_id} -------------------" + ) + idx = idx + n_gpus + + +def run( + dataset_file_path, + csv_path, + git_commit, + log_path, + test_status_file, + n_gpus, + d_type, +): + # Restricting n_gpus to one to avoid resource sharing + # n_gpus = 1 + procs = [] + for gpu_id in range(int(n_gpus)): + p = Process( + target=worker, + args=( + str(gpu_id), + dataset_file_path, + csv_path, + git_commit, + log_path, + test_status_file, + int(n_gpus), + d_type, + ), + ) + p.start() + procs.append(p) + + for p in procs: + p.join() + print("All processes finished.") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + + parser.add_argument( + "-c", "--config-path", type=str, help="Path to all configuration file" + ) + parser.add_argument( + "-r", + "--csv-path", + type=str, + help="Path to store result files, this would be for github where results gets stored", + ) + parser.add_argument( + "-g", + "--git-commit", + type=str, + help="git commit sha to keep track of runs", + ) + parser.add_argument("-l", "--log-path", type=str, help="Path to log files") + parser.add_argument( + "-s", + "--test-status-file", + type=str, + help="All test status will be stored in this file", + ) + parser.add_argument( + "-n", "--num-gpus", type=str, help="Number of GPUs available" + ) + parser.add_argument( + "-t", "--type", type=str, default="", help="Type of benchmark" + ) + args = parser.parse_args() + run( + args.config_path, + args.csv_path, + args.git_commit, + args.log_path, + args.test_status_file, + args.num_gpus, + args.type, + ) diff --git a/regression/benchmark_scripts/configs/README.md b/regression/benchmark_scripts/configs/README.md new file mode 100644 index 000000000..fb8283656 --- /dev/null +++ b/regression/benchmark_scripts/configs/README.md @@ -0,0 +1,19 @@ +# Creating configuration and data file for routing + +- For each test, create a configuration file and a corresponding data file. +- Refer `test_name_config.json` for the format of the configuration file. +- Supported metrics can be found in `cuopt/regression/benchmark_scripts/utils.py` +- File names should start with test names followed by `config` or data depending on type of it. +- Data file should be as per openapi spec of cuopt server +- These configuration and data files needs to be uploaded to `s3://cuopt-datasets/regression_datasets/` + + ``` + aws s3 cp /path/to/files s3://cuopt-datasets/regression_datasets/ + ``` + +# Creating configuration and data file for lp and milp + +- For each test, create a mps file +- Refer `lp_config.json` and `mip_config.json` for the format of the configuration file. +- Supported metrics can be found in `cuopt/regression/benchmark_scripts/utils.py` +- These configuration and data files needs to be in the LP_DATASETS_PATH set in config.sh diff --git a/regression/benchmark_scripts/configs/example_test_creation.py b/regression/benchmark_scripts/configs/example_test_creation.py new file mode 100644 index 000000000..0b376eb00 --- /dev/null +++ b/regression/benchmark_scripts/configs/example_test_creation.py @@ -0,0 +1,76 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. +# SPDX-License-Identifier: Apache-2.0 +# CONFIDENTIAL, provided under NDA. + +from cuopt.routing import utils +import json + +""" +This is an example of creating a modified test from Homberger dataset. +In this test, the RC2_10_5 test is modified so that the vehicle count is reduced to 12 and the order prizes are set. +The prizes are high enough so that prize always becomes the primary objective. +One can easily use an existing json file and modify the data as well by loading the json as a dictionary +""" +test_name = "prize_collection_vrp" +# test_name = "LC1_10_9" + +# base_file_name = "/home/nfs/rgandham/git-repos/reopt/datasets/pdptw/LC1_10_9.pdptw" +base_file_name = ( + "/home/nfs/rgandham/git-repos/reopt/datasets/cvrptw/RC2_10_5.TXT" +) + +# model_dict = utils.create_model_dictionary_from_file(base_file_name, is_pdp=True) +model_dict = utils.create_model_dictionary_from_file(base_file_name) + + +# Reduce the fleet size to 12 +num_vehicles = 12 +fleet_data = model_dict["fleet_data"] + +vehicle_locations = fleet_data["vehicle_locations"] +vehicle_tw = fleet_data["vehicle_time_windows"] +capacities = fleet_data["capacities"] + +new_locs = [vehicle_locations[i] for i in range(num_vehicles)] +new_tw = [vehicle_tw[i] for i in range(num_vehicles)] +new_cap = [[capacities[0][i] for i in range(num_vehicles)]] * 1 + +fleet_data["vehicle_locations"] = new_locs +fleet_data["vehicle_time_windows"] = new_tw +fleet_data["capacities"] = new_cap + +# Add prizes +task_data = model_dict["task_data"] + +n_tasks = len(task_data["demand"][0]) + +prizes = [10000.0] * n_tasks +task_data["prizes"] = prizes + + +# Set 10 min time limit +solver_config = {} +solver_config["time_limit"] = 600 + +model_dict["solver_config"] = solver_config + +test_config_file_name = test_name + "_config.json" +model_data_file_name = test_name + "_data.json" + +test_config = {} +test_config["test_name"] = test_name +test_config["file_name"] = model_data_file_name +test_config["metrics"] = [ + "vehicle_count", + "total_cost", + "prize", + "memory_usage", +] + +with open(test_config_file_name, "w") as fp: + json.dump(test_config, fp) + fp.close() + +with open(model_data_file_name, "w") as fp: + json.dump(model_dict, fp) + fp.close() diff --git a/regression/benchmark_scripts/configs/test_name_config.json b/regression/benchmark_scripts/configs/test_name_config.json new file mode 100644 index 000000000..d87b7bc84 --- /dev/null +++ b/regression/benchmark_scripts/configs/test_name_config.json @@ -0,0 +1,23 @@ +{ + "test_name": "test_name", + "file_name": "test_name_data.json", + "metrics": { + "total_objective_value": { + "threshold": 5, + "unit": "total_objective_value" + }, + "vehicle_count": { + "threshold": 5, + "unit": "vehicle_count" + }, + "solver_time": { + "threshold": 5, + "unit": "seconds" + }, + "gpu_memory_usage": { + "threshold": 20, + "unit": "MB" + } + }, + "details": "Add details about you test" +} diff --git a/regression/benchmark_scripts/configs/test_name_data.json b/regression/benchmark_scripts/configs/test_name_data.json new file mode 100644 index 000000000..e6918ad58 --- /dev/null +++ b/regression/benchmark_scripts/configs/test_name_data.json @@ -0,0 +1,117 @@ +{ + "cost_waypoint_graph_data": { + "waypoint_graph": null + }, + "travel_time_waypoint_graph_data": { + "waypoint_graph": null + }, + "cost_matrix_data": { + "data": { + "0": [ + [ + 0, + 1, + 1 + ], + [ + 1, + 0, + 1 + ], + [ + 1, + 1, + 0 + ] + ] + } + }, + "travel_time_matrix_data": { + "data": null + }, + "task_data": { + "task_locations": [ + 0, + 1, + 2 + ], + "demand": [ + [ + 0, + 1, + 1 + ], + [ + 0, + 3, + 1 + ] + ], + "task_time_windows": [ + [ + 0, + 10 + ], + [ + 0, + 4 + ], + [ + 2, + 4 + ] + ], + "service_times": [ + 0, + 1, + 1 + ] + }, + "fleet_data": { + "vehicle_locations": [ + [ + 0, + 0 + ], + [ + 0, + 0 + ] + ], + "capacities": [ + [ + 2, + 2 + ], + [ + 4, + 1 + ] + ], + "vehicle_time_windows": [ + [ + 0, + 10 + ], + [ + 0, + 10 + ] + ], + "skip_first_trips": [ + false, + false + ], + "drop_return_trips": [ + false, + false + ], + "vehicle_max_costs": [ + 20, + 20 + ] + }, + "solver_config": { + "time_limit": 10 + } +} diff --git a/regression/benchmark_scripts/results/test_name.csv b/regression/benchmark_scripts/results/test_name.csv new file mode 100644 index 000000000..85bf3d976 --- /dev/null +++ b/regression/benchmark_scripts/results/test_name.csv @@ -0,0 +1,2 @@ +,solver_time,total_objective_value,vehicle_count +0,10.004132270812988,3.0,1 diff --git a/regression/benchmark_scripts/utils.py b/regression/benchmark_scripts/utils.py new file mode 100644 index 000000000..04b1cbb9f --- /dev/null +++ b/regression/benchmark_scripts/utils.py @@ -0,0 +1,70 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + + +from cuopt_server.utils.utils import build_routing_datamodel_from_json +from cuopt.linear_programming.solver_settings import SolverSettings +import cuopt_mps_parser +import os +import json +from typing import NamedTuple + + +def build_datamodel_from_mps(data): + """ + data: A file in mps format + """ + + if os.path.isfile(data): + data_model = cuopt_mps_parser.ParseMps(data) + else: + raise ValueError( + f"Invalid type : {type(data)} has been provided as input, " + "requires mps input" + ) + solver_settings = SolverSettings() + + return data_model, solver_settings + + +class RoutingMetrics(NamedTuple): + total_objective_value: float = -1 + vehicle_count: int = -1 + cost: float = -1 + prize: float = -1 + travel_time: float = -1 + solver_time: float = -1 + gpu_memory_usage: float = -1 + git_commit: str = "" + date_time: str = "" + + +class LPMetrics(NamedTuple): + primal_objective_value: float = -1 + solver_time: float = -1 + gpu_memory_usage: float = -1 + git_commit: str = "" + date_time: str = "" + + +def get_configuration(data_file, data_file_path, d_type): + data = {} + test_name = None + requested_metrics = {} + + if d_type == "lp" or d_type == "mip": + with open(data_file_path + "/" + d_type + "_config.json") as f: + data = json.load(f) + test_name = data_file.split("/")[-1].split(".")[0] + data_model, solver_settings = build_datamodel_from_mps(data_file) + requested_metrics = data["metrics"] + else: + with open(data_file) as f: + data = json.load(f) + test_name = data["test_name"] + data_model, solver_settings = build_routing_datamodel_from_json( + data_file_path + "/" + data["file_name"] + ) + requested_metrics = data["metrics"] + + return test_name, data_model, solver_settings, requested_metrics diff --git a/regression/config.sh b/regression/config.sh new file mode 100644 index 000000000..2b20597d0 --- /dev/null +++ b/regression/config.sh @@ -0,0 +1,60 @@ +#!/bin/bash +# shellcheck disable=all +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + + +THIS_DIR=$(cd $(dirname ${BASH_SOURCE[0]}) && pwd) + +CUOPT_SCRIPTS_DIR=$THIS_DIR +OUTPUT_DIR=$SCRATCH_DIR/benchmark_runs/ + +ACCOUNT=datascience_rapids_testing +PARTITION="batch" +GPUS_PER_NODE=8 + +# Path to the squashs file containing the container image +IMAGE="nvidia/cuopt:26.2.0a-cuda12.9-py3.13" + +ALL_CONFIGS_PATH=$SCRATCH_DIR/configs/ +ROUTING_CONFIGS_PATH=$SCRATCH_DIR/routing_configs/ +ROUTING_DATASETS_PATH=$SCRATCH_DIR/routing_datasets/ +LP_DATASETS_PATH=$SCRATCH_DIR/lp_datasets/ +MIP_DATASETS_PATH=$SCRATCH_DIR/mip_datasets/ + +STATUS_FILE=$OUTPUT_DIR/status.txt +WORKER_RMM_POOL_SIZE=${WORKER_RMM_POOL_SIZE:-24G} + +DATASETS_DIR=$SCRATCH_DIR/datasets + +# Assume CUOPT_SLACK_APP_ID is defined! +CUOPT_SLACK_APP_ID="MY_SLACK_APP_ID" +WEBHOOK_URL=${WEBHOOK_URL:-https://hooks.slack.com/services/${CUOPT_SLACK_APP_ID}} +S3_FILE_PREFIX="MY_S3_FILE_PREFIX" +S3_URL_PREFIX="MY_S3_URL_PREFIX" + +# Most are defined using the bash := or :- syntax, which means they +# will be set only if they were previously unset. The project config +# is loaded first, which gives it the opportunity to override anything +# in this file that uses that syntax. If there are variables in this +# file that should not be overridded by a project, then they will +# simply not use that syntax and override, since these variables are +# read last. +RESULTS_ARCHIVE_DIR=$OUTPUT_DIR/results +RESULTS_DIR=$RESULTS_ARCHIVE_DIR/latest +METADATA_FILE=$RESULTS_DIR/metadata.sh +WORKSPACE=$OUTPUT_DIR/workspace +TESTING_DIR=$WORKSPACE/testing +BENCHMARK_DIR=$WORKSPACE/benchmark +SCRIPTS_DIR=$THIS_DIR + +BUILD_LOG_FILE=$RESULTS_DIR/build_log.txt +DATE=${DATE:-$(date --utc "+%Y-%m-%d_%H:%M:%S")_UTC} + +# vars that are not overridden by the project. + +# These must remain relative to $RESULTS_DIR since some scripts assume +# that, and also assume the names "tests" and "benchmarks", and +# therefore cannot be overridden by a project. +TESTING_RESULTS_DIR=${RESULTS_DIR}/tests +BENCHMARK_RESULTS_DIR=${RESULTS_DIR}/benchmarks diff --git a/regression/create-html-reports.sh b/regression/create-html-reports.sh new file mode 100755 index 000000000..5b0883dda --- /dev/null +++ b/regression/create-html-reports.sh @@ -0,0 +1,236 @@ +#!/bin/bash +# shellcheck disable=SC1090 +# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION. +# SPDX-License-Identifier: Apache-2.0 + +# Must ensure PROJECT_DIR is exported first then load env +export PROJECT_DIR=${PROJECT_DIR:-$(cd "$(dirname ${BASH_SOURCE[0]})" && pwd)} +source ${PROJECT_DIR}/config.sh +source ${PROJECT_DIR}/functions.sh + +# FIXME: this assumes all reports are from running pytests +ALL_REPORTS=$(find ${RESULTS_DIR}/benchmarks/results/ -name "*status.txt") + +# Create the html describing the build and test run +REPORT_METADATA_HTML="" +PROJECT_VERSION="unknown" +PROJECT_BUILD="" +PROJECT_CHANNEL="unknown" +PROJECT_REPO_URL="unknown" +PROJECT_REPO_BRANCH="unknown" +if [ -f $METADATA_FILE ]; then + source $METADATA_FILE +fi +# Assume if PROJECT_BUILD is set then a conda version string should be +# created, else a git version string. +if [[ "$PROJECT_BUILD" != "" ]]; then + REPORT_METADATA_HTML=" + + + +
conda version$PROJECT_VERSION
build$PROJECT_BUILD
channel$PROJECT_CHANNEL
+
" +else + REPORT_METADATA_HTML=" + + + +
commit hash$PROJECT_VERSION
repo$PROJECT_REPO_URL
branch$PROJECT_REPO_BRANCH
+
" +fi + +################################################################################ +# create the html reports for each individual run (each +# pytest-results*.txt file) +if [ "$ALL_REPORTS" != "" ]; then + for report in $ALL_REPORTS; do + # Get the individual report name, and use the .txt file path + # to form the html report being generated (same location as + # the .txt file). This will be an abs path since it is a file + # on disk being written. + report_name=$(basename -s .txt $report) + html_report_abs_path=$(dirname $report)/${report_name}.html + echo " + + + ${report_name} + + +

${report_name}


" > $html_report_abs_path + echo "$REPORT_METADATA_HTML" >> $html_report_abs_path + echo " + + + +" >> $html_report_abs_path + awk '{ if($2 == "FAILED") { + color = "red" + } else { + color = "green" + } + printf "\n", $1, color, $2, $3, $3 + }' $report >> $html_report_abs_path + echo "
test filestatuslogs
%s%s%s
+ + + " >> $html_report_abs_path + done +fi + +################################################################################ +# Create a .html file for each *_log.txt file, which is just the contents +# of the log with a line number and anchor id for each line that can +# be used for sharing links to lines. +ALL_LOGS=$(find -L ${BENCHMARK_RESULTS_DIR} -type f -name "*_log.txt" -print) + +for f in $ALL_LOGS; do + base_no_extension=$(basename ${f: 0:-4}) + html=${f: 0:-4}.html + echo " + + + $base_no_extension + + + +

${base_no_extension}


+" > $html + awk '{ print ""NR":
"$0"

"}' $f >> $html + echo " + +" >> $html +done + +################################################################################ +# create the top-level report +STATUS='FAILED' +STATUS_IMG='https://img.icons8.com/cotton/80/000000/cancel--v1.png' +if [ "$ALL_REPORTS" != "" ]; then + if ! (grep -w FAILED $ALL_REPORTS > /dev/null); then + STATUS='PASSED' + STATUS_IMG='https://img.icons8.com/bubbles/100/000000/approval.png' + fi +fi +BUILD_LOG_HTML="(build log not available or build not run)" +BUILD_STATUS="" +if [ -f $BUILD_LOG_FILE ]; then + if [ -f ${BUILD_LOG_FILE: 0:-4}.html ]; then + BUILD_LOG_HTML="log (plain text)" + else + BUILD_LOG_HTML="log" + fi + tail -3 $BUILD_LOG_FILE | grep -w "done." + if (tail -3 $BUILD_LOG_FILE | grep -qw "done."); then + BUILD_STATUS="PASSED" + else + BUILD_STATUS="FAILED" + fi +fi + +report=${RESULTS_DIR}/report.html +echo " + + + test report + + +" > $report +echo "$REPORT_METADATA_HTML" >> $report +echo "\"${STATUS}\"/ Overall status: $STATUS
" >> $report +echo "Build: ${BUILD_STATUS} ${BUILD_LOG_HTML}
" >> $report +if [ "$ALL_REPORTS" != "" ]; then + echo "
Test Status
" >>$report + echo " + + + + " >> $report + for f in $ALL_REPORTS; do + report_name=$(basename -s .txt $f) + # report_path should be of the form "tests/foo.html" + prefix_to_remove="$RESULTS_DIR/" + report_rel_path=${f/$prefix_to_remove} + report_path=$(dirname $report_rel_path)/${report_name}.html + + if (grep -w FAILED $f > /dev/null); then + status="FAILED" + color="red" + else + status="PASSED" + color="green" + fi + echo "" >> $report + done + echo "
TestStatus
${report_name}${status}
" >> $report +else + echo "Tests were not run." >> $report +fi +prefix_to_remove="$RESULTS_DIR/" +plot_rel_path=${f/$prefix_to_remove} +plot_path=$(dirname $plot_rel_path)/asv/html/index.html +prefix_to_remove="$RESULTS_DIR/benchmarks/results/" +log_rel_path=${f/$prefix_to_remove} +log_path=$(dirname $log_rel_path)/index.html +echo "

\"Plots\"
Plots : Rgression test results


" >>$report +echo "

\"Plots\"
Logs and Details : All the data for this run


" >>$report +echo " + +" >> $report + +################################################################################ +# (optional) generate the ASV html +if hasArg --run-asv; then + asv_config_file=$(find ${BENCHMARK_RESULTS_DIR}/results/asv -name "asv.conf.json") + if [ "$asv_config_file" != "" ]; then + asv update --config $asv_config_file + asv publish --config $asv_config_file + fi +fi + +################################################################################ +# Create an index.html for each dir (ALL_DIRS plus ".", but EXCLUDE +# the asv html) This is needed since S3 (and probably others) will not +# show the contents of a hosted directory by default, but will instead +# return the index.html if present. +# The index.html will just contain links to the individual files and +# subdirs present in each dir, just as if browsing in a file explorer. +ALL_DIRS=$(find -L ${RESULTS_DIR} -path ${BENCHMARK_RESULTS_DIR}/results/asv/html -prune -o -type d -printf "%P\n") + +for d in "." $ALL_DIRS; do + index=${RESULTS_DIR}/${d}/index.html + echo " + + + $d + + +

${d}


+" > $index + for f in ${RESULTS_DIR}/$d/*; do + b=$(basename $f) + # Do not include index.html in index.html (it's a link to itself) + if [[ "$b" == "index.html" ]]; then + continue + fi + if [ -d "$f" ]; then + echo "$b
" >> $index + # special case: if the file is a *_log.txt and has a corresponding .html + elif [[ "${f: -8}" == "_log.txt" ]] && [[ -f "${f: 0:-4}.html" ]]; then + markup="${b: 0:-4}.html" + plaintext=$b + echo "$markup (plain text)
" >> $index + elif [[ "${f: -9}" == "_log.html" ]] && [[ -f "${f: 0:-5}.txt" ]]; then + continue + else + echo "$b
" >> $index + fi + done + echo " + +" >> $index +done diff --git a/regression/cronjob.sh b/regression/cronjob.sh new file mode 100755 index 000000000..de05b22bc --- /dev/null +++ b/regression/cronjob.sh @@ -0,0 +1,170 @@ +#!/bin/bash +# shellcheck disable=SC1090 +# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Abort script on first error to ensure script-env.sh is sourced. +set -e + +if [[ -v SLURM_NODEID ]]; then + echo "Detected the env var SLURM_NODEID is set. Is this script running on a compute node?" + echo "This script must be run *outside* of a slurm job (this script starts slurm jobs, but is not a job itself)." + exit 1 +fi + +# Must ensure PROJECT_DIR is exported first then load rapids-mg-tools env +export PROJECT_DIR=${PROJECT_DIR:-$(cd "$(dirname ${BASH_SOURCE[0]})" && pwd)} + +source ${PROJECT_DIR}/config.sh +source ${PROJECT_DIR}/functions.sh + +RUN_BENCHMARKS=0 + +if hasArg --benchmark; then + RUN_BENCHMARKS=1 +fi + +if (! hasArg --test) && (! hasArg --benchmark); then + RUN_BENCHMARKS=1 +fi + +################################################################################ + +# Create a results dir unique for this run +setupResultsDir + +# Switch to allowing errors from commands, since test failures will +# result in non-zero return codes and this script should attempt to +# run all tests. +set +e + +################################################################################ +logger "Testing cuOpt in container..." +srun \ + --account $ACCOUNT \ + --partition $PARTITION \ + --job-name=test-container.testing \ + --nodes 1 \ + --gpus-per-node 1 \ + --time=120 \ + --export=ALL \ + --container-mounts=${CUOPT_SCRIPTS_DIR}:${CUOPT_SCRIPTS_DIR},${OUTPUT_DIR}:${OUTPUT_DIR} \ + --container-image=$IMAGE \ + --output=$BUILD_LOG_FILE \ + bash ${PROJECT_DIR}/test-container.sh +TESTING_FAILED=$? +logger "done testing container, return code was $TESTING_FAILED" + + +if [[ $TESTING_FAILED == 0 ]]; then + + ############################################################################ + # Setup and run tests + if [[ $RUN_BENCHMARKS == 1 ]]; then + logger "Running benchmarks..." + logger "GPUs per node : $GPUS_PER_NODE" + # SNMG tests - run in parallel + srun \ + --account $ACCOUNT \ + --partition $PARTITION \ + --job-name=run-nightly-benchmarks \ + --nodes 1 \ + --gpus-per-node $GPUS_PER_NODE \ + --time=4:00:00 \ + --export=ALL \ + --exclusive \ + -K \ + --container-mounts ${ROUTING_CONFIGS_PATH}:${ROUTING_CONFIGS_PATH},${CUOPT_SCRIPTS_DIR}:${CUOPT_SCRIPTS_DIR},${OUTPUT_DIR}:${OUTPUT_DIR} \ + --container-image=$IMAGE \ + --output=${BENCHMARK_RESULTS_DIR}/benchmark_routing_log.txt \ + bash ${CUOPT_SCRIPTS_DIR}/routing_regression_test.sh & + PID_1=$! + logger "Process ID $PID_1 in background" + + srun \ + --account $ACCOUNT \ + --partition $PARTITION \ + --job-name=run-nightly-benchmarks \ + --nodes 1 \ + --gpus-per-node $GPUS_PER_NODE \ + --time=4:00:00 \ + --export=ALL \ + --exclusive \ + -K \ + --container-mounts ${LP_DATASETS_PATH}:${LP_DATASETS_PATH},${CUOPT_SCRIPTS_DIR}:${CUOPT_SCRIPTS_DIR},${OUTPUT_DIR}:${OUTPUT_DIR} \ + --container-image=$IMAGE \ + --output=${BENCHMARK_RESULTS_DIR}/benchmark_lp_log.txt \ + bash ${CUOPT_SCRIPTS_DIR}/lp_regression_test.sh & + PID_2=$! + + srun \ + --account $ACCOUNT \ + --partition $PARTITION \ + --job-name=run-nightly-benchmarks \ + --nodes 1 \ + --gpus-per-node $GPUS_PER_NODE \ + --time=4:00:00 \ + --export=ALL \ + --exclusive \ + -K \ + --container-mounts ${MIP_DATASETS_PATH}:${MIP_DATASETS_PATH},${CUOPT_SCRIPTS_DIR}:${CUOPT_SCRIPTS_DIR},${OUTPUT_DIR}:${OUTPUT_DIR} \ + --container-image=$IMAGE \ + --output=${BENCHMARK_RESULTS_DIR}/benchmark_mip_log.txt \ + bash ${CUOPT_SCRIPTS_DIR}/mip_regression_test.sh & + PID_3=$! + + wait $PID_1 $PID_2 $PID_3 + fi + +else # if [[ $TESTING_FAILED == 0 ]] + logger "Container testing Failed!" +fi + +################################################################################ +# Send report based on contents of $RESULTS_DIR +# These steps do not require a worker node. + +# When running both testing and benchmark and if some benchmarks fail, +# the entire nightly will fail. The benchmark logs reported on Slack +# contains information about the failures. +logger "Generating report" + +if [ -f $METADATA_FILE ]; then + source $METADATA_FILE +fi + +# Copy all config files to one folder +cp $ROUTING_CONFIGS_PATH/*config.json $LP_DATASETS_PATH/*config.json $MIP_DATASETS_PATH/*config.json $ALL_CONFIGS_PATH/ + +RUN_ASV_OPTION="" +if hasArg --skip-asv; then + logger "Skipping running ASV" +else + # Only create/update the asv database if there is both a commit Hash and a branch otherwise + # asv will return an error. If there is $PROJECT_BUILD, that implies there is Neither the + # git commit hash nor the branch which are required to create/update the asv db + if [[ "$PROJECT_BUILD" == "" ]]; then + # Update/create the ASV database + logger "Updating ASV database" + python $PROJECT_DIR/update_asv_database.py --commitHash=$PROJECT_VERSION --repo-url=$PROJECT_REPO_URL --branch=$PROJECT_REPO_BRANCH --commitTime=$PROJECT_REPO_TIME --results-dir=$RESULTS_DIR --machine-name=$MACHINE --gpu-type=$GPU_TYPE --configs=$ALL_CONFIGS_PATH + RUN_ASV_OPTION=--run-asv + logger "Updated ASV database" + else + logger "Detected a conda install, cannot run ASV since a commit hash/time is needed." + fi +fi + +# The cuopt pull has missing .git folder which causes subsequent runs, lets delete and pull it fresh everytime. +rm -rf $RESULTS_DIR/benchmarks/results/asv/cuopt/ +rm -rf $RESULTS_DIR/tests + +${SCRIPTS_DIR}/create-html-reports.sh $RUN_ASV_OPTION + +if hasArg --skip-sending-report; then + logger "Skipping sending report." +else + logger "Uploading to S3, posting to Slack" + ${PROJECT_DIR}/send-slack-report.sh +fi + +logger "cronjob.sh done." diff --git a/regression/functions.sh b/regression/functions.sh new file mode 100644 index 000000000..9d8147e82 --- /dev/null +++ b/regression/functions.sh @@ -0,0 +1,135 @@ +#!/bin/bash +# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION. +# SPDX-License-Identifier: Apache-2.0 + +# This file is source'd from script-env.sh to add functions to the +# calling environment, hence no #!/bin/bash as the first line. This +# also assumes the variables used in this file have been defined +# elsewhere. + +NUMARGS=$# +ARGS=$* +function hasArg { + (( ${NUMARGS} != 0 )) && (echo " ${ARGS} " | grep -q " $1 ") +} + +function logger { + echo -e ">>>> $*" +} + +# Calling "setTee outfile" will cause all stdout and stderr of the +# current script to be output to "tee", which outputs to stdout and +# "outfile" simultaneously. This is useful by allowing a script to +# "tee" itself at any point without being called with tee. +_origFileDescriptorsSaved=0 +function setTee { + if [[ $_origFileDescriptorsSaved == 0 ]]; then + # Save off the original file descr 1 and 2 as 3 and 4 + exec 3>&1 4>&2 + _origFileDescriptorsSaved=1 + fi + teeFile=$1 + # Create a named pipe. + pipeName=$(mktemp -u) + mkfifo $pipeName + # Close the currnet 1 and 2 and restore to original (3, 4) in the + # event this function is called repeatedly. + exec 1>&- 2>&- + exec 1>&3 2>&4 + # Start a tee process reading from the named pipe. Redirect stdout + # and stderr to the named pipe which goes to the tee process. The + # named pipe "file" can be removed and the tee process stays alive + # until the fd is closed. + tee -a < $pipeName $teeFile & + exec > $pipeName 2>&1 + rm $pipeName +} + +# Call this to stop script output from going to "tee" after a prior +# call to setTee. +function unsetTee { + if [[ $_origFileDescriptorsSaved == 1 ]]; then + # Close the current fd 1 and 2 which should stop the tee + # process, then restore 1 and 2 to original (saved as 3, 4). + exec 1>&- 2>&- + exec 1>&3 2>&4 + fi +} + +# Creates a unique results dir based on date, then links the common +# results dir name to it. +function setupResultsDir { + mkdir -p ${RESULTS_ARCHIVE_DIR}/${DATE} + # Store the target of $RESULTS_DIR before $RESULTS_DIR get linked to + # a different dir + previous_results=$(readlink -f $RESULTS_DIR) + + rm -rf $RESULTS_DIR + ln -s ${RESULTS_ARCHIVE_DIR}/${DATE} $RESULTS_DIR + mkdir -p $TESTING_RESULTS_DIR + mkdir -p $BENCHMARK_RESULTS_DIR/results/ + + old_asv_dir=$previous_results/benchmarks/results/asv + if [ -d $old_asv_dir ]; then + cp -r $old_asv_dir $BENCHMARK_RESULTS_DIR/results + fi +} + + +# echos the name of the directory that $1 is linked to. Useful for +# getting the actual path of the results dir since that is often +# sym-linked to a unique (based on timestamp) results dir name. +function getNonLinkedFileName { + linkname=$1 + targetname=$(readlink -f $linkname) + if [[ "$targetname" != "" ]]; then + echo $targetname + else + echo $linkname + fi +} + +function waitForSlurmJobsToComplete { + ids=$* + jobs=$(python -c "print(\",\".join(\"$ids\".split()))") # make a comma-separated list + jobsInQueue=$(squeue --noheader --jobs=$jobs) + while [[ $jobsInQueue != "" ]]; do + sleep 2 + jobsInQueue=$(squeue --noheader --jobs=$jobs) + done +} + +# Clones repo from URL specified by $1 as name $2 in to directory +# $3. For example: +# "cloneRepo https://github.com/rapidsai/cugraph.git /my/repos cg" +# results in cugraph being cloned to /my/repos/cg. +# NOTE: This removes any existing cloned repos that match the +# destination. +function cloneRepo { + repo_url=$1 + repo_name=$2 + dest_dir=$3 + mkdir -p $dest_dir + pushd $dest_dir > /dev/null || exit + logger "Clone $repo_url in $dest_dir..." + if [ -d $repo_name ]; then + rm -rf $repo_name + if [ -d $repo_name ]; then + echo "ERROR: ${dest_dir}/$repo_name was not completely removed." + error 1 + fi + fi + git clone $repo_url + popd > /dev/null || exit +} + +# Only define this function if it has not already been defined in the +# current environment, which allows the project to override it from +# its functions.sh file that was previously source'd. +if [[ $(type -t activateCondaEnv) == "" ]]; then + function activateCondaEnv { + logger "Activating conda env ${CONDA_ENV}..." + eval "$(conda shell.bash hook)" + conda activate $CONDA_ENV + } +fi diff --git a/regression/get_datasets.py b/regression/get_datasets.py new file mode 100644 index 000000000..bb2a9f23d --- /dev/null +++ b/regression/get_datasets.py @@ -0,0 +1,915 @@ +# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # noqa +# SPDX-License-Identifier: Apache-2.0 + +import os +import sys +import urllib.request +import urllib.parse +import ssl +import subprocess + + +# From: https://plato.asu.edu/bench.html +# Folder containg instances: +# - https://miplib2010.zib.de/miplib2010.php +# - https://www.netlib.org/lp/data/ +# - https://old.sztaki.hu/~meszaros/public_ftp/lptestset/ (and it's subfolders) +# - https://plato.asu.edu/ftp/lptestset/ (and it's subfolders) +# - https://miplib.zib.de/tag_benchmark.html +# - https://miplib.zib.de/tag_collection.html + +LPFeasibleMittelmannSet = [ + "L1_sixm250obs", + "Linf_520c", + "a2864", + "bdry2", + "cont1", + "cont11", + "datt256_lp", + "dlr1", + "ex10", + "fhnw-binschedule1", + "fome13", + "graph40-40", + "irish-electricity", + "neos", + "neos3", + "neos-3025225", + "neos-5052403-cygnet", + "neos-5251015", + "ns1687037", + "ns1688926", + "nug08-3rd", + "pds-100", + "physiciansched3-3", + "qap15", + "rail02", + "rail4284", + "rmine15", + "s82", + "s100", + "s250r10", + "savsched1", + "scpm1", + "shs1023", + "square41", + "stat96v2", + "stormG2_1000", + "stp3d", + "supportcase10", + "tpl-tub-ws1617", + "woodlands09", + "Dual2_5000", + "Primal2_1000", + "thk_48", + "thk_63", + "L1_sixm1000obs", + "L2CTA3D", + "degme", + "dlr2", + "set-cover-model", +] + +MiplibInstances = [ + "30n20b8.mps", + "cryptanalysiskb128n5obj14.mps", + "graph20-20-1rand.mps", + "n2seq36q.mps", + "neos-4338804-snowy.mps", + "neos-957323.mps", + "rail01.mps", + "splice1k1.mps", + "50v-10.mps", + "cryptanalysiskb128n5obj16.mps", + "graphdraw-domain.mps", + "n3div36.mps", + "neos-4387871-tavua.mps", + "neos-960392.mps", + "rail02.mps", + "square41.mps", + "academictimetablesmall.mps", + "csched007.mps", + "h80x6320d.mps", + "n5-3.mps", + "neos-4413714-turia.mps", + "net12.mps", + "rail507.mps", + "square47.mps", + "air05.mps", + "csched008.mps", + "highschool1-aigio.mps", + "neos-1122047.mps", + "neos-4532248-waihi.mps", + "netdiversion.mps", + "ran14x18-disj-8.mps", + "supportcase10.mps", + "app1-1.mps", + "cvs16r128-89.mps", + "hypothyroid-k1.mps", + "neos-1171448.mps", + "neos-4647030-tutaki.mps", + "nexp-150-20-8-5.mps", + "rd-rplusc-21.mps", + "supportcase12.mps", + "app1-2.mps", + "dano3_3.mps", + "ic97_potential.mps", + "neos-1171737.mps", + "neos-4722843-widden.mps", + "ns1116954.mps", + "reblock115.mps", + "supportcase18.mps", + "assign1-5-8.mps", + "dano3_5.mps", + "icir97_tension.mps", + "neos-1354092.mps", + "neos-4738912-atrato.mps", + "ns1208400.mps", + "rmatr100-p10.mps", + "supportcase19.mps", + "atlanta-ip.mps", + "decomp2.mps", + "irish-electricity.mps", + "neos-1445765.mps", + "neos-4763324-toguru.mps", + "ns1644855.mps", + "rmatr200-p5.mps", + "supportcase22.mps", + "b1c1s1.mps", + "drayage-100-23.mps", + "irp.mps", + "neos-1456979.mps", + "neos-4954672-berkel.mps", + "ns1760995.mps", + "rocI-4-11.mps", + "supportcase26.mps", + "bab2.mps", + "drayage-25-23.mps", + "istanbul-no-cutoff.mps", + "neos-1582420.mps", + "neos-5049753-cuanza.mps", + "ns1830653.mps", + "rocII-5-11.mps", + "supportcase33.mps", + "bab6.mps", + "dws008-01.mps", + "k1mushroom.mps", + "neos17.mps", + "neos-5052403-cygnet.mps", + "ns1952667.mps", + "rococoB10-011000.mps", + "supportcase40.mps", + "beasleyC3.mps", + "eil33-2.mps", + "lectsched-5-obj.mps", + "neos-2075418-temuka.mps", + "neos-5093327-huahum.mps", + "nu25-pr12.mps", + "rococoC10-001000.mps", + "supportcase42.mps", + "binkar10_1.mps", + "eilA101-2.mps", + "leo1.mps", + "neos-2657525-crna.mps", + "neos-5104907-jarama.mps", + "neos-5104907-jarama.mps", + "nursesched-medium-hint03.mps", + "roi2alpha3n4.mps", + "supportcase6.mps", + "blp-ar98.mps", + "enlight_hard.mps", + "leo2.mps", + "neos-2746589-doon.mps", + "neos-5107597-kakapo.mps", + "nursesched-sprint02.mps", + "roi5alpha10n8.mps", + "supportcase7.mps", + "blp-ic98.mps", + "ex10.mps", + "lotsize.mps", + "neos-2978193-inde.mps", + "neos-5114902-kasavu.mps", + "nw04.mps", + "roll3000.mps", + "swath1.mps", + "bnatt400.mps", + "ex9.mps", + "mad.mps", + "neos-2987310-joes.mps", + "neos-5188808-nattai.mps", + "opm2-z10-s4.mps", + "s100.mps", + "swath3.mps", + "bnatt500.mps", + "exp-1-500-5-5.mps", + "map10.mps", + "neos-3004026-krka.mps", + "neos-5195221-niemur.mps", + "p200x1188c.mps", + "s250r10.mps", + "tbfp-network.mps", + "bppc4-08.mps", + "fast0507.mps", + "map16715-04.mps", + "neos-3024952-loue.mps", + "neos5.mps", + "peg-solitaire-a3.mps", + "satellites2-40.mps", + "thor50dday.mps", + "brazil3.mps", + "fastxgemm-n2r6s0t2.mps", + "markshare2.mps", + "neos-3046615-murg.mps", + "neos-631710.mps", + "pg5_34.mps", + "satellites2-60-fs.mps", + "timtab1.mps", + "buildingenergy.mps", + "fhnw-binpack4-48.mps", + "markshare_4_0.mps", + "neos-3083819-nubu.mps", + "neos-662469.mps", + "pg.mps", + "savsched1.mps", + "tr12-30.mps", + "cbs-cta.mps", + "fhnw-binpack4-4.mps", + "mas74.mps", + "neos-3216931-puriri.mps", + "neos-787933.mps", + "physiciansched3-3.mps", + "sct2.mps", + "traininstance2.mps", + "chromaticindex1024-7.mps", + "fiball.mps", + "mas76.mps", + "neos-3381206-awhea.mps", + "neos-827175.mps", + "physiciansched6-2.mps", + "seymour1.mps", + "traininstance6.mps", + "chromaticindex512-7.mps", + "gen-ip002.mps", + "mc11.mps", + "neos-3402294-bobin.mps", + "neos-848589.mps", + "piperout-08.mps", + "seymour.mps", + "trento1.mps", + "cmflsp50-24-8-8.mps", + "gen-ip054.mps", + "mcsched.mps", + "neos-3402454-bohle.mps", + "neos859080.mps", + "piperout-27.mps", + "sing326.mps", + "triptim1.mps", + "CMS750_4.mps", + "germanrr.mps", + "mik-250-20-75-4.mps", + "neos-3555904-turama.mps", + "neos-860300.mps", + "pk1.mps", + "sing44.mps", + "uccase12.mps", + "co-100.mps", + "gfd-schedulen180f7d50m30k18.mps", + "milo-v12-6-r2-40-1.mps", + "neos-3627168-kasai.mps", + "neos-873061.mps", + "proteindesign121hz512p9.mps", + "snp-02-004-104.mps", + "uccase9.mps", + "cod105.mps", + "glass4.mps", + "momentum1.mps", + "neos-3656078-kumeu.mps", + "neos8.mps", + "proteindesign122trx11p8.mps", + "sorrell3.mps", + "uct-subprob.mps", + "comp07-2idx.mps", + "glass-sc.mps", + "mushroom-best.mps", + "neos-3754480-nidda.mps", + "neos-911970.mps", + "qap10.mps", + "sp150x300d.mps", + "unitcal_7.mps", + "comp21-2idx.mps", + "gmu-35-40.mps", + "mzzv11.mps", + "neos-3988577-wolgan.mps", + "neos-933966.mps", + "radiationm18-12-05.mps", + "sp97ar.mps", + "var-smallemery-m6j6.mps", + "cost266-UUE.mps", + "gmu-35-50.mps", + "mzzv42z.mps", + "neos-4300652-rahue.mps", + "neos-950242.mps", + "radiationm40-10-02.mps", + "sp98ar.mps", + "wachplan.mps", +] + +MittelmannInstances = { + "emps": "https://old.sztaki.hu/~meszaros/public_ftp/lptestset/emps.c", + "problems": { + "irish-electricity": [ + "https://plato.asu.edu/ftp/lptestset/irish-electricity.mps.bz2", + "mps", + ], + "physiciansched3-3": [ + "https://plato.asu.edu/ftp/lptestset/physiciansched3-3.mps.bz2", + "mps", + ], + "16_n14": [ + "https://plato.asu.edu/ftp/lptestset/network/16_n14.mps.bz2", + "mps", + ], + "Dual2_5000": [ + "https://plato.asu.edu/ftp/lptestset/Dual2_5000.mps.bz2", + "mps", + ], + "L1_six1000": [ + "https://plato.asu.edu/ftp/lptestset/L1_sixm1000obs.bz2", + "netlib", + ], + "L1_sixm": ["", "mps"], + "L1_sixm1000obs": [ + "https://plato.asu.edu/ftp/lptestset/L1_sixm1000obs.bz2", + "netlib", + ], + "L1_sixm250": ["", "netlib"], + "L1_sixm250obs": [ + "https://plato.asu.edu/ftp/lptestset/L1_sixm250obs.bz2", + "netlib", + ], + "L2CTA3D": [ + "https://plato.asu.edu/ftp/lptestset/L2CTA3D.mps.bz2", + "mps", + ], + "Linf_520c": [ + "https://plato.asu.edu/ftp/lptestset/Linf_520c.bz2", + "netlib", + ], + "Primal2_1000": [ + "https://plato.asu.edu/ftp/lptestset/Primal2_1000.mps.bz2", + "mps", + ], + "a2864": ["https://plato.asu.edu/ftp/lptestset/a2864.mps.bz2", "mps"], + "bdry2": ["https://plato.asu.edu/ftp/lptestset/bdry2.bz2", "netlib"], + "braun": ["", "mps"], + "cont1": [ + "https://plato.asu.edu/ftp/lptestset/misc/cont1.bz2", + "netlib", + ], + "cont11": [ + "https://plato.asu.edu/ftp/lptestset/misc/cont11.bz2", + "netlib", + ], + "datt256": [ + "https://plato.asu.edu/ftp/lptestset/datt256_lp.mps.bz2", + "mps", + ], + "datt256_lp": [ + "https://plato.asu.edu/ftp/lptestset/datt256_lp.mps.bz2", + "mps", + ], + "degme": [ + "https://old.sztaki.hu/~meszaros/public_ftp/lptestset/New/degme.gz", + "netlib", + ], + "dlr1": ["https://plato.asu.edu/ftp/lptestset/dlr1.mps.bz2", "mps"], + "dlr2": ["https://plato.asu.edu/ftp/lptestset/dlr2.mps.bz2", "mps"], + "energy1": ["", "mps"], # Kept secret by Mittlemman + "energy2": ["", "mps"], + "ex10": ["https://plato.asu.edu/ftp/lptestset/ex10.mps.bz2", "mps"], + "fhnw-binschedule1": [ + "https://plato.asu.edu/ftp/lptestset/fhnw-binschedule1.mps.bz2", + "mps", + ], + "fome13": [ + "https://plato.asu.edu/ftp/lptestset/fome/fome13.bz2", + "netlib", + ], + "gamora": ["", "mps"], # Kept secret by Mittlemman + "goto14_256_1": ["", "mps"], + "goto14_256_2": ["", "mps"], + "goto14_256_3": ["", "mps"], + "goto14_256_4": ["", "mps"], + "goto14_256_5": ["", "mps"], + "goto16_64_1": ["", "mps"], + "goto16_64_2": ["", "mps"], + "goto16_64_3": ["", "mps"], + "goto16_64_4": ["", "mps"], + "goto16_64_5": ["", "mps"], + "goto32_512_1": ["", "mps"], + "goto32_512_2": ["", "mps"], + "goto32_512_3": ["", "mps"], + "goto32_512_4": ["", "mps"], + "goto32_512_5": ["", "mps"], + "graph40-40": [ + "https://plato.asu.edu/ftp/lptestset/graph40-40.mps.bz2", + "mps", + ], + "graph40-40_lp": [ + "https://plato.asu.edu/ftp/lptestset/graph40-40.mps.bz2", + "mps", + ], + "groot": ["", "mps"], # Kept secret by Mittlemman + "heimdall": ["", "mps"], # Kept secret by Mittlemman + "hulk": ["", "mps"], # Kept secret by Mittlemman + "i_n13": [ + "https://plato.asu.edu/ftp/lptestset/network/i_n13.mps.bz2", + "mps", + ], + "irish-e": ["", "mps"], + "karted": [ + "https://old.sztaki.hu/~meszaros/public_ftp/lptestset/New/karted.gz", + "netlib", + ], + "lo10": [ + "https://plato.asu.edu/ftp/lptestset/network/lo10.mps.bz2", + "mps", + ], + "loki": ["", "mps"], # Kept secret by Mittlemman + "long15": [ + "https://plato.asu.edu/ftp/lptestset/network/long15.mps.bz2", + "mps", + ], + "nebula": ["", "mps"], # Kept secret by Mittlemman + "neos": [ + "https://plato.asu.edu/ftp/lptestset/misc/neos.bz2", + "netlib", + ], + "neos-3025225": [ + "https://plato.asu.edu/ftp/lptestset/neos-3025225.mps.bz2", + "mps", + ], + "neos-3025225_lp": [ + "https://plato.asu.edu/ftp/lptestset/neos-3025225.mps.bz2", + "mps", + ], + "neos-5251015": [ + "https://plato.asu.edu/ftp/lptestset/neos-5251015.mps.bz2", + "mps", + ], + "neos-5251015_lp": [ + "https://plato.asu.edu/ftp/lptestset/neos-5251015.mps.bz2", + "mps", + ], + "neos3": [ + "https://plato.asu.edu/ftp/lptestset/misc/neos3.bz2", + "netlib", + ], + "neos-5052403-cygnet": [ + "https://plato.asu.edu/ftp/lptestset/neos-5052403-cygnet.mps.bz2", + "mps", + ], + "neos5251015_lp": [ + "https://plato.asu.edu/ftp/lptestset/neos-5251015.mps.bz2", + "mps", + ], + "neos5251915": [ + "https://plato.asu.edu/ftp/lptestset/neos-5251015.mps.bz2", + "mps", + ], + "netlarge1": [ + "https://plato.asu.edu/ftp/lptestset/network/netlarge1.mps.bz2", + "mps", + ], + "netlarge2": [ + "https://plato.asu.edu/ftp/lptestset/network/netlarge2.mps.bz2", + "mps", + ], + "netlarge3": [ + "https://plato.asu.edu/ftp/lptestset/network/netlarge3.mps.bz2", + "mps", + ], + "netlarge6": [ + "https://plato.asu.edu/ftp/lptestset/network/netlarge6.mps.bz2", + "mps", + ], + "ns1687037": [ + "https://plato.asu.edu/ftp/lptestset/misc/ns1687037.bz2", + "netlib", + ], + "ns1688926": [ + "https://plato.asu.edu/ftp/lptestset/misc/ns1688926.bz2", + "netlib", + ], + "nug08-3rd": [ + "https://plato.asu.edu/ftp/lptestset/nug/nug08-3rd.bz2", + "netlib", + ], + "pds-100": [ + "https://plato.asu.edu/ftp/lptestset/pds/pds-100.bz2", + "netlib", + ], + "psched3-3": ["", "mps"], + "qap15": ["https://plato.asu.edu/ftp/lptestset/qap15.mps.bz2", "mps"], + "rail02": ["https://miplib2010.zib.de/download/rail02.mps.gz", "mps"], + "rail4284": [ + "https://plato.asu.edu/ftp/lptestset/rail/rail4284.bz2", + "netlib", + ], + "rmine15": [ + "https://plato.asu.edu/ftp/lptestset/rmine15.mps.bz2", + "mps", + ], + "s100": ["https://plato.asu.edu/ftp/lptestset/s100.mps.bz2", "mps"], + "s250r10": [ + "https://plato.asu.edu/ftp/lptestset/s250r10.mps.bz2", + "mps", + ], + "s82": ["https://plato.asu.edu/ftp/lptestset/s82.mps.bz2", "mps"], + "savsched1": [ + "https://plato.asu.edu/ftp/lptestset/savsched1.mps.bz2", + "mps", + ], + "scpm1": ["https://plato.asu.edu/ftp/lptestset/scpm1.mps.bz2", "mps"], + "set-cover-model": [ + "https://plato.asu.edu/ftp/lptestset/set-cover-model.mps.bz2", + "mps", + ], + "shs1023": [ + "https://miplib2010.zib.de/download/shs1023.mps.gz", + "mps", + ], + "square15": [ + "https://plato.asu.edu/ftp/lptestset/network/square15.mps.bz2", + "mps", + ], + "square41": [ + "https://plato.asu.edu/ftp/lptestset/square41.mps.bz2", + "mps", + ], + "stat96v2": [ + "https://old.sztaki.hu/~meszaros/public_ftp/lptestset/misc/stat96v2.gz", + "netlib", + ], + "stormG2_1000": [ + "https://plato.asu.edu/ftp/lptestset/misc/stormG2_1000.bz2", + "netlib", + ], + "storm_1000": ["", "mps"], + "stp3d": [ + "https://miplib.zib.de/WebData/instances/stp3d.mps.gz", + "mps", + ], + "supportcase10": [ + "https://plato.asu.edu/ftp/lptestset/supportcase10.mps.bz2", + "mps", + ], + "support19": [ + "https://plato.asu.edu/ftp/lptestset/supportcase19.mps.bz2", + "mps", + ], + "supportcase19": [ + "https://plato.asu.edu/ftp/lptestset/supportcase19.mps.bz2", + "mps", + ], + "test03": ["", "mps"], # Kept secret by Mittlemman + "test13": ["", "mps"], # Kept secret by Mittlemman + "test23": ["", "mps"], # Kept secret by Mittlemman + "test33": ["", "mps"], # Kept secret by Mittlemman + "test43": ["", "mps"], # Kept secret by Mittlemman + "test53": ["", "mps"], # Kept secret by Mittlemman + "test63": ["", "mps"], # Kept secret by Mittlemman + "test83": ["", "mps"], # Kept secret by Mittlemman + "test93": ["", "mps"], # Kept secret by Mittlemman + "mars": ["", "mps"], # Kept secret by Mittlemman + "thk_48": [ + "https://plato.asu.edu/ftp/lptestset/thk_48.mps.bz2", + "mps", + ], + "thk_63": [ + "https://plato.asu.edu/ftp/lptestset/thk_63.mps.bz2", + "mps", + ], + "thor": ["", "mps"], # Kept secret by Mittlemman + "tpl-tub-ws": ["", "mps"], + "tpl-tub-ws1617": [ + "https://plato.asu.edu/ftp/lptestset/tpl-tub-ws1617.mps.bz2", + "mps", + ], + "wide15": [ + "https://plato.asu.edu/ftp/lptestset/network/wide15.mps.bz2", + "mps", + ], + "woodlands09": [ + "https://plato.asu.edu/ftp/lptestset/woodlands09.mps.bz2", + "mps", + ], + }, + "benchmarks": { + "simplex": [ + "L1_sixm", + "L1_sixm250obs", + "Linf_520c", + "a2864", + "bdry2", + "braun", + "cont1", + "cont11", + "datt256", + "dlr1", + "energy1", + "energy2", + "ex10", + "fhnw-binschedule1", + "fome13", + "gamora", + "graph40-40", + "groot", + "heimdall", + "hulk", + "irish-e", + "loki", + "nebula", + "neos", + "neos-3025225_lp", + "neos-5251015_lp", + "neos3", + "neos3025225", + "neos5052403", + "neos5251015_lp", + "ns1687037", + "ns1688926", + "nug08-3rd", + "pds-100", + "psched3-3", + "qap15", + "rail02", + "rail4284", + "rmine15", + "s100", + "s250r10", + "s82", + "savsched1", + "scpm1", + "shs1023", + "square41", + "stat96v2", + "stormG2_1000", + "storm_1000", + "stp3d", + "support10", + "test03", + "test13", + "test23", + "test33", + "test43", + "test53", + "thor", + "tpl-tub-ws", + "tpl-tub-ws16", + "woodlands09", + ], + "barrier": [ + "Dual2_5000", + "L1_six1000", + "L1_sixm1000obs", + "L1_sixm250", + "L1_sixm250obs", + "L2CTA3D", + "Linf_520c", + "Primal2_1000", + "a2864", + "bdry2", + "cont1", + "cont11", + "datt256", + "degme", + "dlr1", + "dlr2", + "ex10", + "fhnw-binschedule1", + "fome13", + "graph40-40", + "irish-e", + "karted", + "neos", + "neos-3025225_lp", + "neos-5251015_lp", + "neos3", + "neos3025225", + "neos5052403", + "neos5251915", + "ns1687037", + "ns1688926", + "nug08-3rd", + "pds-100", + "psched3-3", + "qap15", + "rail02", + "rail4284", + "rmine15", + "s100", + "s250r10", + "s82", + "savsched1", + "scpm1", + "set-cover-model", + "shs1023", + "square41", + "stat96v2", + "stormG2_1000", + "storm_1000", + "stp3d", + "support10", + "support19", + "supportcase19", + "thk_63", + "tpl-tub-ws", + "tpl-tub-ws16", + "woodlands09", + ], + "large": [ + "16_n14", + "goto14_256_1", + "goto14_256_2", + "goto14_256_3", + "goto14_256_4", + "goto14_256_5", + "goto16_64_1", + "goto16_64_2", + "goto16_64_3", + "goto16_64_4", + "goto16_64_5", + "goto32_512_1", + "goto32_512_2", + "goto32_512_3", + "goto32_512_4", + "goto32_512_5", + "i_n13", + "lo10", + "long15", + "netlarge1", + "netlarge2", + "netlarge3", + "netlarge6", + "square15", + "wide15", + ], + # <=100s in bench: https://plato.asu.edu/ftp/lpbar.html + "L0": [ + "ex10", + "datt256", + "graph40-40", + "neos5251915", + "nug08-3rd", + "qap15", + "savsched1", + "scpm1", + "a2864", + "support10", + "rmine15", + "fome13", + "L2CTA3D", + "neos5052403", + "karted", + "stp3d", + "woodlands09", + "rail4284", + "L1_sixm250", + "tpl-tub-ws", + ], + # >100 <1000 + "L1": [ + "s250r10", + "pds-100", + "set-cover-model", + "neos3025225", + "rail02", + "square41", + "degme", + "Linf_520c", + "cont1", + "neos", + "stat96v2", + "support19", + "shs1023", + "storm_1000", + ], + # >1000 + "L2": [ + "thk_63", + "Primal2_1000", + "L1_six1000", + "Dual2_5000", + "s100", + "fhnw-binschedule1", + "cont11", + "psched3-3", + ], + # t -> >15000 + "L3": [ + "dlr2", + "bdry2", + "dlr1", + "irish-e", + "ns1687037", + "ns1688926", + "s82", + ], + }, +} + + +def download(url, dst): + if os.path.exists(dst): + return + print(f"Downloading {url} into {dst}...") + # Bypass SSL verification for plato.asu.edu URLs + if "plato.asu.edu" in url: + context = ssl.create_default_context() + context.check_hostname = False + context.verify_mode = ssl.CERT_NONE + response = urllib.request.urlopen(url, context=context) + else: + response = urllib.request.urlopen(url) + data = response.read() + with open(dst, "wb") as fp: + fp.write(data) + + +def extract(file, dir, type): + basefile = os.path.basename(file) + outfile = "" + unzippedfile = "" + if basefile.endswith(".bz2"): + outfile = basefile.replace(".bz2", ".mps") + unzippedfile = basefile.replace(".bz2", "") + subprocess.run(f"cd {dir} && bzip2 -d {basefile}", shell=True) + elif basefile.endswith(".gz"): + outfile = basefile.replace(".gz", ".mps") + unzippedfile = basefile.replace(".gz", "") + subprocess.run( + f"cd {dir} && gunzip -c {basefile} > {unzippedfile}", shell=True + ) + subprocess.run(f"cd {dir} && rm -rf {basefile}", shell=True) + else: + raise Exception(f"Unknown file extension found for extraction {file}") + # download emps and compile + # Disable emps for now + if type == "netlib": + url = MittelmannInstances["emps"] + file = os.path.join(dir, "emps.c") + download(url, file) + subprocess.run( + f"cd {dir} && gcc -Wno-implicit-int emps.c -o emps", shell=True + ) + # determine output file and run emps + subprocess.run( + f"cd {dir} && ./emps {unzippedfile} > {outfile}", shell=True + ) + subprocess.run(f"cd {dir} && rm -rf {unzippedfile}", shell=True) + # cleanup emps and emps.c + subprocess.run(f"rm -rf {dir}/emps*", shell=True) + + +def download_lp_dataset(name, dir): + if name not in MittelmannInstances["problems"]: + raise Exception(f"Unknown dataset {name} passed") + if os.path.exists(dir): + if os.path.exists(os.path.join(dir, f"{name}.mps")): + print( + f"Dir for dataset {name} exists and contains {name}.mps. Skipping..." + ) + return + url, type = MittelmannInstances["problems"][name] + if url == "": + print(f"Dataset {name} doesn't have a URL. Skipping...") + return + file = os.path.join(dir, os.path.basename(url)) + download(url, file) + extract(file, dir, type) + + +def download_mip_dataset(name, dir): + base_url = "https://miplib.zib.de/WebData/instances" + url = f"{base_url}/{name}.gz" + outfile = f"{dir}/{name}.gz" + if os.path.exists(dir): + if os.path.exists(os.path.join(dir, f"{name}")): + print( + f"Dir for dataset {name} exists and contains {name}.mps. Skipping..." + ) + return + download(url, outfile) + extract(outfile, dir, "") + + +datasets_path = sys.argv[1] +dataset_type = sys.argv[2] + +if dataset_type == "lp": + for name in LPFeasibleMittelmannSet: + download_lp_dataset(name, datasets_path) +elif dataset_type == "mip": + for name in MiplibInstances: + download_mip_dataset(name, datasets_path) diff --git a/regression/lp_config.json b/regression/lp_config.json new file mode 100644 index 000000000..e2f8a9e93 --- /dev/null +++ b/regression/lp_config.json @@ -0,0 +1,13 @@ +{ + "details": "LP test", + "metrics": { + "primal_objective_value": { + "threshold": 1, + "unit": "primal_objective_value" + }, + "solver_time": { + "threshold": 1, + "unit": "seconds" + } + } +} diff --git a/regression/lp_regression_test.sh b/regression/lp_regression_test.sh new file mode 100644 index 000000000..23b28cd1f --- /dev/null +++ b/regression/lp_regression_test.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Abort script on first error +set -e + +# Must ensure PROJECT_DIR is exported first then load rapids-mg-tools env +export PROJECT_DIR=${PROJECT_DIR:-$(cd "$(dirname ${BASH_SOURCE[0]})" && pwd)} +source ${PROJECT_DIR}/config.sh +source ${PROJECT_DIR}/functions.sh + +################################################################################ + +# Extract the build meta-data from either the conda environment or the +# cugraph source dir and write out a file which can be read by other +# scripts. If the cugraph conda packages are present, those take +# precedence, otherwise meta-data will be extracted from the sources. + +GIT_COMMIT=$( /dev/null); then + STATUS='PASSED' + STATUS_IMG='https://img.icons8.com/bubbles/100/000000/approval.png' + fi + +fi + +# Generate a one-line summary based on existance of certain reports, etc. +if [[ "$ALL_REPORTS" == "" ]]; then + ONE_LINE_SUMMARY="*Build failed*" +elif [[ "$STATUS" == "FAILED" ]]; then + if (grep -w FAILED $BENCHMARK_REPORT > /dev/null); then + ONE_LINE_SUMMARY="*One or more benchmarks failed*" + fi + if (grep -w FAILED $TEST_REPORT > /dev/null); then + ONE_LINE_SUMMARY="*One or more tests failed*" + fi + if (grep -w FAILED $TEST_REPORT > /dev/null) && (grep -w FAILED $BENCHMARK_REPORT > /dev/null); then + ONE_LINE_SUMMARY="*One or more tests and benchmarks failed*" + fi +else + ONE_LINE_SUMMARY="Build succeeded, all tests and benchmarks passed" +fi + +RESULTS_DIR_NAME=$(basename "$(getNonLinkedFileName $RESULTS_DIR)") + +# Upload everything +logger "Uploading all files in $RESULTS_DIR ..." +logger "Uploading all files in $RESULTS_DIR_NAME ..." +aws s3 cp --follow-symlinks --acl public-read --recursive ${RESULTS_DIR} ${S3_FILE_PREFIX}/${RESULTS_DIR_NAME} +logger "done uploading all files in $RESULTS_DIR" + +# Set vars used in the report +PROJECT_VERSION_STRING="" +PROJECT_VERSION="" +PROJECT_BUILD="" +PROJECT_CHANNEL="" +PROJECT_REPO_URL="" +PROJECT_REPO_BRANCH="" +if [ -f $METADATA_FILE ]; then + source $METADATA_FILE +fi +# Assume if PROJECT_BUILD is set then a conda version string should be +# created, else a git version string. +if [[ "$PROJECT_BUILD" != "" ]]; then + PROJECT_VERSION_STRING=" cuOpt ver.: $PROJECT_VERSION + build: $PROJECT_BUILD + channel: $PROJECT_CHANNEL" +else + PROJECT_VERSION_STRING=" cuOpt ver.: $PROJECT_VERSION + repo: $PROJECT_REPO_URL + branch: $PROJECT_REPO_BRANCH" +fi + +export STATUS +export STATUS_IMG +export PROJECT_VERSION_STRING +HUMAN_READABLE_DATE="$(date '+`%D`, `%H:%M` (PT)')" +export HUMAN_READABLE_DATE +# These files should be created by create-html-reports.sh +export REPORT_URL="${S3_URL_PREFIX}/${RESULTS_DIR_NAME}/report.html" +export ASV_URL="${S3_URL_PREFIX}/${RESULTS_DIR_NAME}/benchmarks/asv/html/index.html" +export LOGS_URL="${S3_URL_PREFIX}/${RESULTS_DIR_NAME}/index.html" +# export SPREADSHEET_URL=$SPREADSHEET_URL +export ONE_LINE_SUMMARY + +echo +echo "REPORT_URL: ${REPORT_URL}" +# echo "SPREADSHEET_URL: ${SPREADSHEET_URL}" + +if hasArg --skip-sending-report; then + logger "Skipping sending Slack report." +else + echo "$(envsubst < ${PROJECT_DIR}/slack_msg.json)" + curl -X POST \ + -H 'Content-type: application/json' \ + --data "$(envsubst < ${PROJECT_DIR}/slack_msg.json)" \ + ${WEBHOOK_URL} +fi diff --git a/regression/slack_msg.json b/regression/slack_msg.json new file mode 100644 index 000000000..a73e659b2 --- /dev/null +++ b/regression/slack_msg.json @@ -0,0 +1,68 @@ +{ + "channel": "cuopt-regression-testing", + "username": "cuOpt Messaging", + "icon_emoji": ":robot_face:", + "blocks": [ + { + "type": "section", + "text": { + "type": "mrkdwn", + "text": "${ONE_LINE_SUMMARY}" + } + }, + { + "type": "divider" + }, + { + "type": "section", + "text": { + "type": "mrkdwn", + "text": "<${REPORT_URL}|*Results Report*>\nBuild status and test results." + }, + "accessory": { + "type": "button", + "url": "${REPORT_URL}", + "text": { + "type": "plain_text", + "emoji": true, + "text": "View" + }, + "value": "click_me_123" + } + }, + { + "type": "section", + "text": { + "type": "mrkdwn", + "text": "<${ASV_URL}|*ASV Dashboard*>\nBenchmark results." + }, + "accessory": { + "type": "button", + "url": "${ASV_URL}", + "text": { + "type": "plain_text", + "emoji": true, + "text": "View" + }, + "value": "click_me_123" + } + }, + { + "type": "section", + "text": { + "type": "mrkdwn", + "text": "<${LOGS_URL}|*Logs*>\nAll available logs." + }, + "accessory": { + "type": "button", + "url": "${LOGS_URL}", + "text": { + "type": "plain_text", + "emoji": true, + "text": "View" + }, + "value": "click_me_123" + } + } + ] +} diff --git a/regression/test-container.sh b/regression/test-container.sh new file mode 100644 index 000000000..4aeacb567 --- /dev/null +++ b/regression/test-container.sh @@ -0,0 +1,26 @@ +#!/bin/bash +# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Creates a conda environment to be used for cuopt benchmarking. + +# Abort script on first error +set -e + +# Must ensure PROJECT_DIR is exported first then load rapids-mg-tools env +export PROJECT_DIR=${PROJECT_DIR:-$(cd "$(dirname ${BASH_SOURCE[0]})" && pwd)} +source ${PROJECT_DIR}/config.sh + +################################################################################ + +# Test +logger "Testing container image $IMAGE" +python -c "import cuopt; print(cuopt)" + +trap '${SCRIPTS_DIR}/write-meta-data.sh' EXIT + +# Other scripts look for this to be the last line to determine if this +# script completed successfully. This is only possible because of the +# "set -e" above. +echo "done." +logger "done." diff --git a/regression/update_asv_database.py b/regression/update_asv_database.py new file mode 100644 index 000000000..bef64bd31 --- /dev/null +++ b/regression/update_asv_database.py @@ -0,0 +1,157 @@ +# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +from pathlib import Path +import platform +import psutil +from asvdb import BenchmarkInfo, BenchmarkResult, ASVDb +import json +import pandas as pd + + +def update_asv_db( + commitHash=None, + commitTime=None, + branch=None, + repo_url=None, + results_dir=None, + machine_name=None, + gpu_type=None, + configs=None, +): + """ + Read the benchmark_result* files in results_dir/benchmarks and + update an existing asv benchmark database or create one if one + does not exist in results_dir/benchmarks/asv. If no + benchmark_result* files are present, return without updating or + creating. + """ + + # commitHash = commitHash + str(int(time.time())) + benchmark_dir_path = Path(results_dir) / "benchmarks" / "results" / "csvs" + asv_dir_path = Path(results_dir) / "benchmarks" / "results" / "asv" + + # List all benchmark_result files + benchmark_result_list = benchmark_dir_path.glob("*.csv") + + bResultList = [] + # Skip these columns from benchmarking + skip_columns = ["date_time", "git_commit"] + + # Create result objects for each benchmark result and store it in a list + for file_name in benchmark_result_list: + # skip if it's regression file + if "regressions.csv" in str(file_name): + continue + with open(file_name, "r") as openfile: + data = pd.read_csv(openfile, index_col=0).iloc[-1] + test_name = str(file_name).split("/")[-1].split(".")[-2] + config_file = None + if test_name.startswith("lp"): + config_file = configs + "/" + "lp_config.json" + elif test_name.startswith("mip"): + config_file = configs + "/" + "mip_config.json" + else: + config_file = configs + "/" + test_name + "_config.json" + metrics = {} + with open(config_file, "r") as fp: + metrics = json.load(fp)["metrics"] + for col_name in data.index: + if col_name not in skip_columns: + bResult = BenchmarkResult( + funcName=test_name + "." + col_name, + result=data[col_name].item(), + unit="percentage" + if "bks" in col_name + else metrics[col_name]["unit"], + ) + bResultList.append(bResult) + + if len(bResultList) == 0: + print( + "Could not find files matching 'csv' in " + f"{benchmark_dir_path}, not creating/updating ASV database " + f"in {asv_dir_path}." + ) + return + + uname = platform.uname() + # Maybe also write those metadata to metadata.sh ? + osType = "%s %s" % (uname.system, uname.release) + # Remove unnecessary osType detail + osType = ".".join(osType.split("-")[0].split(".", 2)[:2]) + pythonVer = platform.python_version() + # Remove unnecessary python version detail + pythonVer = ".".join(pythonVer.split(".", 2)[:2]) + bInfo_dict = { + "machineName": machine_name, + # cudaVer : "10.0", + "osType": osType, + "pythonVer": pythonVer, + "commitHash": commitHash, + "branch": branch, + # commit time needs to be in milliseconds + "commitTime": commitTime * 1000, + "gpuType": gpu_type, + "cpuType": uname.processor, + "arch": uname.machine, + "ram": "%d" % psutil.virtual_memory().total, + } + bInfo = BenchmarkInfo(**bInfo_dict) + + # extract the branch name + branch = bInfo_dict["branch"] + + db = ASVDb(dbDir=str(asv_dir_path), repo=repo_url, branches=[branch]) + + for res in bResultList: + db.addResult(bInfo, res) + + +if __name__ == "__main__": + import argparse + + ap = argparse.ArgumentParser() + ap.add_argument( + "--commitHash", type=str, required=True, help="project version" + ) + ap.add_argument( + "--commitTime", type=str, required=True, help="project version date" + ) + ap.add_argument( + "--repo-url", type=str, required=True, help="project repo url" + ) + ap.add_argument("--branch", type=str, required=True, help="project branch") + ap.add_argument( + "--results-dir", + type=str, + required=True, + help="directory to store the results in json files", + ) + ap.add_argument( + "--machine-name", type=str, required=True, help="Slurm cluster name" + ) + ap.add_argument( + "--gpu-type", + type=str, + required=True, + help="the official product name of the GPU", + ) + ap.add_argument( + "--configs", + type=str, + required=True, + help="the config file for all the tests", + ) + args = ap.parse_args() + + update_asv_db( + commitHash=args.commitHash, + commitTime=int(args.commitTime), + branch=args.branch, + repo_url=args.repo_url, + results_dir=args.results_dir, + machine_name=args.machine_name, + gpu_type=args.gpu_type, + configs=args.configs, + ) diff --git a/regression/write-meta-data.sh b/regression/write-meta-data.sh new file mode 100755 index 000000000..020631469 --- /dev/null +++ b/regression/write-meta-data.sh @@ -0,0 +1,26 @@ +#!/bin/bash +# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Abort script on first error +set -e + +# Must ensure PROJECT_DIR is exported first then load rapids-mg-tools env +export PROJECT_DIR=${PROJECT_DIR:-$(cd "$(dirname ${BASH_SOURCE[0]})" && pwd)} +source ${PROJECT_DIR}/config.sh +source ${PROJECT_DIR}/functions.sh + +PROJECT_VERSION=$(> $METADATA_FILE +echo "PROJECT_VERSION=\"$PROJECT_VERSION\"" >> $METADATA_FILE +echo "PROJECT_BUILD=\"$PROJECT_BUILD\"" >> $METADATA_FILE +echo "PROJECT_CHANNEL=\"$PROJECT_CHANNEL\"" >> $METADATA_FILE +echo "PROJECT_REPO_URL=\"$PROJECT_REPO_URL\"" >> $METADATA_FILE +echo "PROJECT_REPO_BRANCH=\"$PROJECT_REPO_BRANCH\"" >> $METADATA_FILE +echo "PROJECT_REPO_TIME=\"$PROJECT_REPO_TIME\"" >> $METADATA_FILE