From c3fa9350bf0aa7f1d97bb0363c0d4ab0f589e9dd Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Tue, 13 May 2025 11:07:38 +0800 Subject: [PATCH 1/4] multi devices support --- cv/classification/main.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/cv/classification/main.py b/cv/classification/main.py index b780d7e9..91f9de19 100755 --- a/cv/classification/main.py +++ b/cv/classification/main.py @@ -122,10 +122,17 @@ def parse_option(): required=False, help="local rank for DistributedDataParallel", ) + parser.add_argument( + "--device", + type=str, + default="cuda", + help="Specify the device to run the model on. Options: 'cuda', 'cpu', or 'npu'.", + ) args, unparsed = parser.parse_known_args() config = get_config(args) + config["DEVICE"] = args.device.lower() return args, config @@ -141,7 +148,7 @@ def main(config): logger.info(f"Creating model:{config.MODEL.ARCH}") model = build_model(config) - model.cuda() + model.to(config.DEVICE) optimizer = build_optimizer(config, model) model = flow.nn.parallel.DistributedDataParallel(model, broadcast_buffers=False, use_bucket=False) @@ -255,8 +262,8 @@ def train_one_epoch( start = time.time() end = time.time() for idx, (samples, targets) in enumerate(data_loader): - samples = samples.cuda() - targets = targets.cuda() + samples = samples.to(config.DEVICE) + targets = targets.to(config.DEVICE).to(flow.int32) if mixup_fn is not None: samples, targets = mixup_fn(samples, targets) @@ -324,8 +331,8 @@ def validate(config, data_loader, model): end = time.time() for idx, (images, target) in enumerate(data_loader): - images = images.cuda() - target = target.cuda() + images = images.to(config.DEVICE) + target = target.to(config.DEVICE).to(flow.int32) # compute output output = model(images) @@ -370,7 +377,7 @@ def throughput(data_loader, model, logger): model.eval() for idx, (images, _) in enumerate(data_loader): - images = images.cuda() + images = images.to(config.DEVICE) batch_size = images.shape[0] for i in range(50): model(images) @@ -453,4 +460,7 @@ def throughput(data_loader, model, logger): # print config logger.info(config.dump()) + if config.DEVICE == "npu": + import oneflow_npu + main(config) From 656ca33820c9e8d6114cf9847a3cc6079a58ce84 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Tue, 13 May 2025 11:11:16 +0800 Subject: [PATCH 2/4] update README --- cv/classification/README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/cv/classification/README.md b/cv/classification/README.md index 41020387..bf04f48a 100644 --- a/cv/classification/README.md +++ b/cv/classification/README.md @@ -105,5 +105,13 @@ Bash script `infer.sh` is used to infer the trained model. sh infer.sh ``` +### Multi-Device Support (Experimental) +This branch introduces preliminary support for running on different device types. To train on an NPU device, add the following argument to your train.sh command: + +```bash +--device=npu +``` + +> Note: The label_smoothing feature is currently not supported in this branch. If your configuration file (e.g., configs/default_settings.yaml) includes label_smoothing, please disable it(set to 0.0) to avoid errors. From 206ab33bba30e95db54ff13497e65c72e8cb625d Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Tue, 13 May 2025 11:37:59 +0800 Subject: [PATCH 3/4] auto detect device --- cv/classification/main.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/cv/classification/main.py b/cv/classification/main.py index 91f9de19..6841b782 100755 --- a/cv/classification/main.py +++ b/cv/classification/main.py @@ -7,6 +7,7 @@ import argparse import datetime import numpy as np +import importlib.util import oneflow as flow import oneflow.backends.cudnn as cudnn @@ -39,6 +40,15 @@ def build_model(config): return model +def detect_device(): + if flow.cuda.is_available(): + return "cuda" + elif importlib.util.find_spec("oneflow_npu") is not None: + return "npu" + else: + return "cpu" + + def parse_option(): parser = argparse.ArgumentParser( "Flowvision image classification training and evaluation script", add_help=False @@ -125,7 +135,7 @@ def parse_option(): parser.add_argument( "--device", type=str, - default="cuda", + default=detect_device(), help="Specify the device to run the model on. Options: 'cuda', 'cpu', or 'npu'.", ) From ddf1ff2fd49266b8c5387b6bef68f349dd1a6b7d Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Tue, 13 May 2025 11:45:16 +0800 Subject: [PATCH 4/4] update readme --- cv/classification/README.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/cv/classification/README.md b/cv/classification/README.md index bf04f48a..97b926d5 100644 --- a/cv/classification/README.md +++ b/cv/classification/README.md @@ -107,7 +107,12 @@ sh infer.sh ### Multi-Device Support (Experimental) -This branch introduces preliminary support for running on different device types. To train on an NPU device, add the following argument to your train.sh command: +This branch introduces preliminary support for running on different device types. By default, the training script now automatically selects the best available device in the following priority: +1. CUDA (GPU) +2. NPU (if oneflow_npu is installed) +3. CPU (fallback) + +If you want to explicitly run on a specific device (e.g., NPU), you can still override the default by adding the following argument to your train.sh command: ```bash --device=npu