From 01a2f2e0bfbba3da521be19659dfa362797af990 Mon Sep 17 00:00:00 2001 From: Jaap de Ruyter Date: Wed, 18 Feb 2026 16:17:11 +0100 Subject: [PATCH 1/3] update TorchvisionDetectorAdaptor: register as usable model --- .../pose_estimation_pytorch/models/detectors/__init__.py | 3 +++ .../models/detectors/torchvision.py | 7 ++++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/dlclive/pose_estimation_pytorch/models/detectors/__init__.py b/dlclive/pose_estimation_pytorch/models/detectors/__init__.py index e9a99a6..c89902a 100644 --- a/dlclive/pose_estimation_pytorch/models/detectors/__init__.py +++ b/dlclive/pose_estimation_pytorch/models/detectors/__init__.py @@ -12,5 +12,8 @@ DETECTORS, BaseDetector, ) +from dlclive.pose_estimation_pytorch.models.detectors.torchvision import ( + TorchvisionDetectorAdaptor, +) from dlclive.pose_estimation_pytorch.models.detectors.fasterRCNN import FasterRCNN from dlclive.pose_estimation_pytorch.models.detectors.ssd import SSDLite diff --git a/dlclive/pose_estimation_pytorch/models/detectors/torchvision.py b/dlclive/pose_estimation_pytorch/models/detectors/torchvision.py index 72dd54b..8541870 100644 --- a/dlclive/pose_estimation_pytorch/models/detectors/torchvision.py +++ b/dlclive/pose_estimation_pytorch/models/detectors/torchvision.py @@ -14,9 +14,10 @@ import torch import torchvision.models.detection as detection -from dlclive.pose_estimation_pytorch.models.detectors.base import BaseDetector +from dlclive.pose_estimation_pytorch.models.detectors.base import DETECTORS, BaseDetector +@DETECTORS.register_module class TorchvisionDetectorAdaptor(BaseDetector): """An adaptor for torchvision detectors @@ -26,8 +27,8 @@ class TorchvisionDetectorAdaptor(BaseDetector): - fasterrcnn_mobilenet_v3_large_fpn - fasterrcnn_resnet50_fpn_v2 - This class should not be used out-of-the-box. Subclasses (such as FasterRCNN or - SSDLite) should be used instead. + This class can be used directly (e.g. with pre-trained COCO weights) or through its + subclasses (FasterRCNN or SSDLite) which adapt the model for DLC's 2-class detection. The torchvision implementation does not allow to get both predictions and losses with a single forward pass. Therefore, during evaluation only bounding box metrics From 333f714fe581c674ad383360fa0e0acc423a5359 Mon Sep 17 00:00:00 2001 From: Jaap de Ruyter Date: Wed, 18 Feb 2026 16:17:53 +0100 Subject: [PATCH 2/3] update runner: consider pretrained detectors (no weights in raw_data) --- dlclive/pose_estimation_pytorch/runner.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/dlclive/pose_estimation_pytorch/runner.py b/dlclive/pose_estimation_pytorch/runner.py index 2c59605..a22506e 100644 --- a/dlclive/pose_estimation_pytorch/runner.py +++ b/dlclive/pose_estimation_pytorch/runner.py @@ -268,10 +268,24 @@ def load_model(self) -> None: self.model = self.model.half() self.detector = None - if self.dynamic is None and raw_data.get("detector") is not None: + detector_cfg = self.cfg.get("detector") + has_detector_weights = raw_data.get("detector") is not None + if detector_cfg is not None: + detector_model_cfg = detector_cfg["model"] + uses_pretrained = ( + detector_model_cfg.get("pretrained", False) + or detector_model_cfg.get("weights") is not None + ) + else: + uses_pretrained = False + + if self.dynamic is None and (has_detector_weights or uses_pretrained): self.detector = models.DETECTORS.build(self.cfg["detector"]["model"]) self.detector.to(self.device) - self.detector.load_state_dict(raw_data["detector"]) + + if has_detector_weights: + self.detector.load_state_dict(raw_data["detector"]) + self.detector.eval() if self.precision == "FP16": self.detector = self.detector.half() @@ -281,7 +295,8 @@ def load_model(self) -> None: self.top_down_config.read_config(self.cfg) detector_transforms = [v2.ToDtype(torch.float32, scale=True)] - if self.cfg["detector"]["data"]["inference"].get("normalize_images", False): + detector_data_cfg = detector_cfg.get("data", {}).get("inference", {}) + if detector_data_cfg.get("normalize_images", False): detector_transforms.append(v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])) self.detector_transform = v2.Compose(detector_transforms) From 3a791aaa5b3df12f2cddd2472df57a054a44be08 Mon Sep 17 00:00:00 2001 From: Jaap de Ruyter Date: Wed, 18 Feb 2026 16:43:19 +0100 Subject: [PATCH 3/3] Add specific export config for torchvision detectors --- dlclive/modelzoo/pytorch_model_zoo_export.py | 4 +++- dlclive/modelzoo/utils.py | 22 ++++++++++++++----- .../models/detectors/torchvision.py | 2 ++ 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/dlclive/modelzoo/pytorch_model_zoo_export.py b/dlclive/modelzoo/pytorch_model_zoo_export.py index 616857d..b3554c1 100644 --- a/dlclive/modelzoo/pytorch_model_zoo_export.py +++ b/dlclive/modelzoo/pytorch_model_zoo_export.py @@ -32,10 +32,12 @@ def _load_model_weights(model_name: str, super_animal: str = super_animal) -> Or checkpoint: Path = download_super_animal_snapshot(dataset=super_animal, model_name=model_name) return torch.load(checkpoint, map_location="cpu", weights_only=True)["model"] + # Skip downloading the detector weights for humanbody models, as they are not on huggingface + skip_detector_download = (detector_name is None) or (super_animal == "superanimal_humanbody") export_dict = { "config": model_cfg, "pose": _load_model_weights(model_name), - "detector": _load_model_weights(detector_name) if detector_name is not None else None, + "detector": None if skip_detector_download else _load_model_weights(detector_name), } torch.save(export_dict, export_path) diff --git a/dlclive/modelzoo/utils.py b/dlclive/modelzoo/utils.py index 3857d14..3376fe1 100644 --- a/dlclive/modelzoo/utils.py +++ b/dlclive/modelzoo/utils.py @@ -12,6 +12,7 @@ from ruamel.yaml import YAML from dlclive.modelzoo.resolve_config import update_config +from dlclive.pose_estimation_pytorch.models.detectors.torchvision import SUPPORTED_TORCHVISION_DETECTORS _MODELZOO_PATH = Path(__file__).parent @@ -131,12 +132,21 @@ def load_super_animal_config( model_config["method"] = "BU" else: model_config["method"] = "TD" - if super_animal != "superanimal_humanbody": - detector_cfg_path = get_super_animal_model_config_path( - model_name=detector_name - ) - detector_cfg = read_config_as_dict(detector_cfg_path) - model_config["detector"] = detector_cfg + detector_cfg_path = get_super_animal_model_config_path( + model_name=detector_name + ) + detector_cfg = read_config_as_dict(detector_cfg_path) + model_config["detector"] = detector_cfg + if super_animal == "superanimal_humanbody": + # Apply specific updates required to run the torchvision detector with pretrained weights + assert detector_name in SUPPORTED_TORCHVISION_DETECTORS + model_config["detector"]['model']= { + "type": "TorchvisionDetectorAdaptor", + "model": detector_name, + "weights": "COCO_V1", + "num_classes": None, + "box_score_thresh": 0.6, + } return model_config diff --git a/dlclive/pose_estimation_pytorch/models/detectors/torchvision.py b/dlclive/pose_estimation_pytorch/models/detectors/torchvision.py index 8541870..8790a1d 100644 --- a/dlclive/pose_estimation_pytorch/models/detectors/torchvision.py +++ b/dlclive/pose_estimation_pytorch/models/detectors/torchvision.py @@ -16,6 +16,8 @@ from dlclive.pose_estimation_pytorch.models.detectors.base import DETECTORS, BaseDetector +SUPPORTED_TORCHVISION_DETECTORS = ["fasterrcnn_mobilenet_v3_large_fpn"] + @DETECTORS.register_module class TorchvisionDetectorAdaptor(BaseDetector):