From 5e63fa3d6f6c7fd9d029dd21ad16eaa675dbafe6 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Tue, 8 Apr 2025 15:09:20 +0200 Subject: [PATCH 1/7] :package: 0.2.0, update API to `ocrd>=3.0` --- ocrd_detectron2/config.py | 4 - ocrd_detectron2/ocrd-tool.json | 10 +- ocrd_detectron2/segment.py | 303 +++++++++++++++------------------ requirements.txt | 2 +- 4 files changed, 143 insertions(+), 176 deletions(-) delete mode 100644 ocrd_detectron2/config.py diff --git a/ocrd_detectron2/config.py b/ocrd_detectron2/config.py deleted file mode 100644 index 01e0b23..0000000 --- a/ocrd_detectron2/config.py +++ /dev/null @@ -1,4 +0,0 @@ -import json -from pkg_resources import resource_string - -OCRD_TOOL = json.loads(resource_string(__name__, 'ocrd-tool.json').decode('utf8')) diff --git a/ocrd_detectron2/ocrd-tool.json b/ocrd_detectron2/ocrd-tool.json index 6eb5db0..a5d6736 100644 --- a/ocrd_detectron2/ocrd-tool.json +++ b/ocrd_detectron2/ocrd-tool.json @@ -1,18 +1,14 @@ { "git_url": "https://github.com/bertsky/ocrd_detectron2", - "version": "0.1.8", + "version": "0.2.0", "tools": { "ocrd-detectron2-segment": { "executable": "ocrd-detectron2-segment", "categories": ["Layout analysis"], "steps": ["layout/segmentation/region"], "description": "Detect regions with Detectron2 models", - "input_file_grp": [ - "OCR-D-IMG" - ], - "output_file_grp": [ - "OCR-D-SEG-REGION" - ], + "input_file_grp_cardinality": 1, + "output_file_grp_cardinality": 1, "parameters": { "operation_level": { "type": "string", diff --git a/ocrd_detectron2/segment.py b/ocrd_detectron2/segment.py index cd692bd..68f9853 100644 --- a/ocrd_detectron2/segment.py +++ b/ocrd_detectron2/segment.py @@ -1,6 +1,5 @@ from __future__ import absolute_import -from pkg_resources import resource_filename import sys import os import tempfile @@ -10,6 +9,8 @@ import multiprocessing as mp import multiprocessing.sharedctypes import ctypes +from typing import Optional + import numpy as np from shapely.geometry import Polygon from shapely.ops import unary_union @@ -23,19 +24,17 @@ import torch from ocrd_utils import ( + resource_filename, getLogger, - make_file_id, - assert_file_grp_cardinality, pushd_popd, coordinates_of_segment, coordinates_for_segment, crop_image, points_from_polygon, polygon_from_points, - MIMETYPE_PAGE ) from ocrd_models.ocrd_page import ( - to_xml, + OcrdPage, PageType, AdvertRegionType, ChartRegionType, @@ -60,12 +59,8 @@ GraphicsTypeSimpleType, TextTypeSimpleType ) -from ocrd_modelfactory import page_from_file -from ocrd import Processor - -from .config import OCRD_TOOL +from ocrd import Processor, OcrdPageResult, OcrdPageResultImage -TOOL = 'ocrd-detectron2-segment' # when doing Numpy postprocessing, enlarge masks via # outer (convex) instead of inner (concave) hull of # corresponding connected components @@ -83,35 +78,32 @@ FINAL_DILATION = 4 class Detectron2Segment(Processor): + max_workers = 1 # GPU context sharable across not forks - def __init__(self, *args, **kwargs): - kwargs['ocrd_tool'] = OCRD_TOOL['tools'][TOOL] - kwargs['version'] = OCRD_TOOL['version'] - super().__init__(*args, **kwargs) - if hasattr(self, 'output_file_grp'): - # processing context - self.setup() + @property + def executable(self): + return 'ocrd-detectron2-segment' def setup(self): #setup_logger(name='fvcore') #mp.set_start_method("spawn", force=True) - LOG = getLogger('processor.Detectron2Segment') # runtime overrides if self.parameter['device'] == 'cpu' or not torch.cuda.is_available(): device = "cpu" else: device = self.parameter['device'] - LOG.info("Using compute device %s", device) + self.logger.info("Using compute device %s", device) model_config = self.resolve_resource(self.parameter['model_config']) - LOG.info("Loading config '%s'", model_config) + self.logger.info("Loading config '%s'", model_config) # add project-specific config (e.g., TensorMask) here if you're not running a model in detectron2's core library with tempfile.TemporaryDirectory() as tmpdir: # workaround for fvcore/detectron2's stupid decision # to resolve the relative path for _BASE_ in the config file # on its dirname instead of the detectron2 distribution's config directory - temp_config = os.path.join(tmpdir, 'configs') - shutil.copytree(resource_filename('detectron2', 'model_zoo/configs'), temp_config) - temp_config = os.path.join(temp_config, os.path.basename(model_config)) + temp_configs = os.path.join(tmpdir, 'configs') + with resource_filename('detectron2', 'model_zoo/configs') as stock_configs: + shutil.copytree(stock_configs, temp_configs) + temp_config = os.path.join(temp_configs, os.path.basename(model_config)) shutil.copyfile(model_config, temp_config) with pushd_popd(tmpdir): # repair broken config files that make deviating assumptions on model_zoo files @@ -145,13 +137,13 @@ def setup(self): "The chosen model's number of classes %d does not match the given list of categories %d " % ( cfg.MODEL.ROI_HEADS.NUM_CLASSES, len(self.parameter['categories'])) # instantiate model - LOG.info("Loading weights '%s'", model_weights) + self.logger.info("Loading weights '%s'", model_weights) self.predictor = DefaultPredictor(cfg) self.categories = self.parameter['categories'] - self.metadata = MetadataCatalog.get('runtime') - self.metadata.thing_classes = self.categories + self.metadatacat = MetadataCatalog.get('runtime') + self.metadatacat.thing_classes = self.categories - def process(self): + def process_page_pcgts(self, *input_pcgts: Optional[OcrdPage], page_id: Optional[str] = None) -> OcrdPageResult: """Use detectron2 to segment each page into regions. Open and deserialize PAGE input files and their respective images, @@ -190,113 +182,93 @@ class id to a new PAGE region type (and subtype). Produce a new output file by serialising the resulting hierarchy. """ - LOG = getLogger('processor.Detectron2Segment') - assert_file_grp_cardinality(self.input_file_grp, 1) - assert_file_grp_cardinality(self.output_file_grp, 1) + pcgts = input_pcgts[0] + result = OcrdPageResult(pcgts) level = self.parameter['operation_level'] - # pylint: disable=attribute-defined-outside-init - for n, input_file in enumerate(self.input_files): - file_id = make_file_id(input_file, self.output_file_grp) - page_id = input_file.pageId or input_file.ID - LOG.info("INPUT FILE %i / %s", n, page_id) - pcgts = page_from_file(self.workspace.download_file(input_file)) - pcgts.set_pcGtsId(file_id) - self.add_metadata(pcgts) - - page = pcgts.get_Page() - page_image_raw, page_coords, page_image_info = self.workspace.image_from_page( - page, page_id, feature_filter='binarized') - # for morphological post-processing, we will need the binarized image, too - if self.parameter['postprocessing'] != 'none': - page_image_bin, _, _ = self.workspace.image_from_page( - page, page_id, feature_selector='binarized') - page_image_raw, page_image_bin = _ensure_consistent_crops( - page_image_raw, page_image_bin) - else: - page_image_bin = page_image_raw - # determine current zoom and target zoom - if page_image_info.resolution != 1: - dpi = page_image_info.resolution - if page_image_info.resolutionUnit == 'cm': - dpi = round(dpi * 2.54) - zoom = 300.0 / dpi - else: - dpi = None - zoom = 1.0 - # todo: if zoom is > 4.0, do something along the lines of eynollah's enhance - if zoom < 2.0: - # actual resampling: see below - zoomed = zoom / 2.0 - LOG.info("scaling %dx%d image by %.2f", page_image_raw.width, page_image_raw.height, zoomed) + page = pcgts.get_Page() + page_image_raw, page_coords, page_image_info = self.workspace.image_from_page( + page, page_id, feature_filter='binarized') + # for morphological post-processing, we will need the binarized image, too + if self.parameter['postprocessing'] != 'none': + page_image_bin, _, _ = self.workspace.image_from_page( + page, page_id, feature_selector='binarized') + page_image_raw, page_image_bin = _ensure_consistent_crops( + page_image_raw, page_image_bin) + else: + page_image_bin = page_image_raw + # determine current zoom and target zoom + if page_image_info.resolution != 1: + dpi = page_image_info.resolution + if page_image_info.resolutionUnit == 'cm': + dpi = round(dpi * 2.54) + zoom = 300.0 / dpi + else: + dpi = None + zoom = 1.0 + # todo: if zoom is > 4.0, do something along the lines of eynollah's enhance + if zoom < 2.0: + # actual resampling: see below + zoomed = zoom / 2.0 + self.logger.info("scaling %dx%d image by %.2f", page_image_raw.width, page_image_raw.height, zoomed) + else: + zoomed = 1.0 + + for segment in ([page] if level == 'page' else + page.get_AllRegions(depth=1, classes=['Table'])): + # regions = segment.get_AllRegions(depth=1) + # FIXME: as long as we don't have get_AllRegions on region level, + # we have to simulate this via parent_object filtering + def at_segment(region): + return region.parent_object_ is segment + regions = list(filter(at_segment, page.get_AllRegions())) + + if isinstance(segment, PageType): + image_raw = page_image_raw + image_bin = page_image_bin + coords = page_coords else: - zoomed = 1.0 - - for segment in ([page] if level == 'page' else - page.get_AllRegions(depth=1, classes=['Table'])): - # regions = segment.get_AllRegions(depth=1) - # FIXME: as long as we don't have get_AllRegions on region level, - # we have to simulate this via parent_object filtering - def at_segment(region): - return region.parent_object_ is segment - regions = list(filter(at_segment, page.get_AllRegions())) - - if isinstance(segment, PageType): - image_raw = page_image_raw - image_bin = page_image_bin - coords = page_coords + image_raw, coords = self.workspace.image_from_segment( + segment, page_image_raw, page_coords, feature_filter='binarized') + if self.parameter['postprocessing'] != 'none': + image_bin, _ = self.workspace.image_from_segment( + segment, page_image_bin, page_coords) + image_raw, image_bin = _ensure_consistent_crops( + image_raw, image_bin) else: - image_raw, coords = self.workspace.image_from_segment( - segment, page_image_raw, page_coords, feature_filter='binarized') - if self.parameter['postprocessing'] != 'none': - image_bin, _ = self.workspace.image_from_segment( - segment, page_image_bin, page_coords) - image_raw, image_bin = _ensure_consistent_crops( - image_raw, image_bin) - else: - image_bin = image_raw - - # ensure RGB (if raw was merely grayscale) - if image_raw.mode == '1': - image_raw = image_raw.convert('L') - image_raw = image_raw.convert(mode='RGB') - image_bin = image_bin.convert(mode='1') - - # reduce resolution to 300 DPI max - if zoomed != 1.0: - image_bin = image_bin.resize( - (int(image_raw.width * zoomed), - int(image_raw.height * zoomed)), - resample=Image.Resampling.BICUBIC) - image_raw = image_raw.resize( - (int(image_raw.width * zoomed), - int(image_raw.height * zoomed)), - resample=Image.Resampling.BICUBIC) - - # convert raw to BGR - array_raw = np.array(image_raw) - array_raw = array_raw[:,:,::-1] - # convert binarized to single-channel negative - array_bin = np.array(image_bin) - array_bin = ~ array_bin - - self._process_segment(segment, regions, coords, array_raw, array_bin, zoomed, - file_id, input_file.pageId) - - file_path = os.path.join(self.output_file_grp, - file_id + '.xml') - out = self.workspace.add_file( - ID=file_id, - file_grp=self.output_file_grp, - pageId=input_file.pageId, - local_filename=file_path, - mimetype=MIMETYPE_PAGE, - content=to_xml(pcgts)) - LOG.info('created file ID: %s, file_grp: %s, path: %s', - file_id, self.output_file_grp, out.local_filename) - - def _process_segment(self, segment, ignore, coords, array_raw, array_bin, zoomed, file_id, page_id): - LOG = getLogger('processor.Detectron2Segment') + image_bin = image_raw + + # ensure RGB (if raw was merely grayscale) + if image_raw.mode == '1': + image_raw = image_raw.convert('L') + image_raw = image_raw.convert(mode='RGB') + image_bin = image_bin.convert(mode='1') + + # reduce resolution to 300 DPI max + if zoomed != 1.0: + image_bin = image_bin.resize( + (int(image_raw.width * zoomed), + int(image_raw.height * zoomed)), + resample=Image.Resampling.BICUBIC) + image_raw = image_raw.resize( + (int(image_raw.width * zoomed), + int(image_raw.height * zoomed)), + resample=Image.Resampling.BICUBIC) + + # convert raw to BGR + array_raw = np.array(image_raw) + array_raw = array_raw[:,:,::-1] + # convert binarized to single-channel negative + array_bin = np.array(image_bin) + array_bin = ~ array_bin + + image = self._process_segment(segment, regions, coords, array_raw, array_bin, zoomed, page_id) + if image: + result.images.append(image) + return result + + def _process_segment(self, segment, ignore, coords, array_raw, array_bin, zoomed, page_id) -> Optional[OcrdPageResultImage]: + self.logger = getLogger('processor.Detectron2Segment') cpu = torch.device('cpu') segtype = segment.__class__.__name__[:-4] # remove existing segmentation (have only detected targets survive) @@ -315,12 +287,12 @@ def _process_segment(self, segment, ignore, coords, array_raw, array_bin, zoomed counts = np.sqrt(3 * counts) counts = counts[(5 < counts) & (counts < 100)] scale = int(np.median(counts)) - LOG.debug("estimated scale: %d", scale) + self.logger.debug("estimated scale: %d", scale) # predict output = self.predictor(array_raw) if self.parameter['debug_img'] != 'none': vis = visualizer.Visualizer(array_raw, - metadata=self.metadata, + metadata=self.metadatacat, instance_mode={ 'instance_colors': visualizer.ColorMode.IMAGE, 'instance_colors_only': visualizer.ColorMode.IMAGE_BW, @@ -328,10 +300,10 @@ def _process_segment(self, segment, ignore, coords, array_raw, array_bin, zoomed }[self.parameter['debug_img']]) # decoding, cf. https://detectron2.readthedocs.io/en/latest/tutorials/models.html if 'panoptic_seg' in output: - LOG.info("decoding from panoptic segmentation results") + self.logger.info("decoding from panoptic segmentation results") segmap, seginfo = output['panoptic_seg'] if not isinstance(segmap, np.ndarray): - LOG.debug(str(segmap)) + self.logger.debug(str(segmap)) segmap = segmap.to(cpu) segmap = segmap.numpy() if self.parameter['debug_img'] != 'none': @@ -339,8 +311,8 @@ def _process_segment(self, segment, ignore, coords, array_raw, array_bin, zoomed seglabels = np.unique(segmap) nseg = len(seglabels) if not nseg: - LOG.warning("Detected no regions on %s '%s'", segtype, segment.id) - return + self.logger.warning("Detected no regions on %s '%s'", segtype, segment.id) + return None masks = [] classes = [] scores = [] @@ -360,10 +332,10 @@ def _process_segment(self, segment, ignore, coords, array_raw, array_bin, zoomed scores.append(1.0) #scores[i] classes.append(class_id) if not len(masks): - LOG.warning("Detected no regions for selected categories on %s '%s'", segtype, segment.id) - return + self.logger.warning("Detected no regions for selected categories on %s '%s'", segtype, segment.id) + return None elif 'instances' in output: - LOG.info("decoding from instance segmentation results") + self.logger.info("decoding from instance segmentation results") instances = output['instances'] if not isinstance(instances, dict): assert instances.image_size == (height, width) @@ -383,8 +355,8 @@ def _process_segment(self, segment, ignore, coords, array_raw, array_bin, zoomed if not isinstance(scores, np.ndarray): scores = scores.to(cpu).numpy() if not scores.shape[0]: - LOG.warning("Detected no regions on %s '%s'", segtype, segment.id) - return + self.logger.warning("Detected no regions on %s '%s'", segtype, segment.id) + return None if 'pred_masks' in instances: # or pred_masks_rle ? masks = np.asarray(instances['pred_masks']) def get_mask(x): @@ -394,7 +366,7 @@ def get_mask(x): return x.mask > 0 masks = np.stack([get_mask(x) for x in masks]) elif 'pred_boxes' in instances: - LOG.warning("model has no mask output, only bbox") + self.logger.warning("model has no mask output, only bbox") boxes = instances['pred_boxes'] if not isinstance(boxes, np.ndarray): boxes = boxes.to(cpu).tensor.numpy() @@ -406,8 +378,8 @@ def get_mask(x): math.floor(y1):math.ceil(y2), math.floor(x1):math.ceil(x2)] = True else: - LOG.error("Found no suitable output format to decode from") - return + self.logger.error("Found no suitable output format to decode from") + return None assert len(scores) == len(classes) == len(masks) # apply non-maximum suppression between overlapping instances # (not strictly necessary in case of panoptic segmentation, @@ -427,10 +399,10 @@ def get_mask(x): if postprocessing in ['full', 'only-nms']: scores, classes, masks = postprocess_nms( scores, classes, masks, array_bin, self.categories, - min_confidence=self.parameter['min_confidence'], nproc=8) + min_confidence=self.parameter['min_confidence'], nproc=8, logger=self.logger) if postprocessing in ['full', 'only-morph']: scores, classes, masks = postprocess_morph( - scores, classes, masks, components, nproc=8) + scores, classes, masks, components, nproc=8, logger=self.logger) if len(ignore): scores = scores[1:] classes = classes[1:] @@ -451,7 +423,7 @@ def get_mask(x): mask = cv2.dilate(mask.astype(np.uint8), np.ones((scale,scale), np.uint8)) > 0 if invalid: - LOG.warning("Ignoring non-contiguous (%d) region for %s", len(contours), category) + self.logger.warning("Ignoring non-contiguous (%d) region for %s", len(contours), category) continue region_polygon = contours[0][:,0,:] # already in x,y order if zoomed != 1.0: @@ -460,7 +432,7 @@ def get_mask(x): region_polygon = coordinates_for_segment(region_polygon, _, coords) region_polygon = polygon_for_parent(region_polygon, segment) if region_polygon is None: - LOG.warning("Ignoring extant region for %s", category) + self.logger.warning("Ignoring extant region for %s", category) continue # annotate new region/line region_coords = CoordsType(points_from_polygon(region_polygon), conf=score) @@ -485,8 +457,7 @@ def get_mask(x): try: regiontype = cat2class[cat[0]] except KeyError: - LOG.critical("Invalid region type %s (see https://github.com/PRImA-Research-Lab/PAGE-XML)", cat[0]) - sys.exit(1) + raise ValueError("Invalid region type %s (see https://github.com/PRImA-Research-Lab/PAGE-XML)", cat[0]) region_no += 1 region_id = 'region%04d_%s' % (region_no, cat[0]) region = regiontype(id=region_id, Coords=region_coords) @@ -499,22 +470,25 @@ def get_mask(x): except (KeyError, ValueError): region.set_custom(cat[1]) getattr(segment, 'add_' + cat[0])(region) - LOG.info("Detected %s region%04d (p=%.2f) on %s '%s'", + self.logger.info("Detected %s region%04d (p=%.2f) on %s '%s'", category, region_no, score, segtype, segment.id) if self.parameter['debug_img'] != 'none': - path = self.workspace.save_image_file( + altimg = AlternativeImageType(comments='debug') + segment.add_AlternativeImage(altimg) + return OcrdPageResultImage( Image.fromarray(visimg.get_image()), - (file_id if isinstance(segment, PageType) else file_id + '_' + segment.id) + '.IMG-DEBUG', - self.output_file_grp, page_id=page_id) - segment.add_AlternativeImage(AlternativeImageType(filename=path, comments='debug')) + ('' if isinstance(segment, PageType) else '_' + segment.id) + '.IMG-DEBUG', + altimg) + return None -def postprocess_nms(scores, classes, masks, page_array_bin, categories, min_confidence=0.5, nproc=8): +def postprocess_nms(scores, classes, masks, page_array_bin, categories, min_confidence=0.5, nproc=8, logger=None): """Apply geometrical post-processing to raw detections: remove overlapping candidates via non-maximum suppression across classes. Implement via Numpy routines. """ - LOG = getLogger('processor.Detectron2Segment') + if logger is None: + logger = getLogger('ocrd.processor.Detectron2Segment') # apply IoU-based NMS across classes assert masks.dtype == bool instances = np.arange(len(masks)) @@ -543,25 +517,25 @@ def postprocess_nms(scores, classes, masks, page_array_bin, categories, min_conf bbox = [xs.min(), ys.min(), xs.max(), ys.max()] class_id = classes[i] if class_id < 0: - LOG.debug("ignoring existing region at %s", str(bbox)) + logger.debug("ignoring existing region at %s", str(bbox)) continue category = categories[class_id] if scores[i] < min_confidence: - LOG.debug("Ignoring instance for %s with too low score %.2f", category, score) + logger.debug("Ignoring instance for %s with too low score %.2f", category, score) bad[i] = True continue count = np.count_nonzero(mask) if count < 10: - LOG.warning("Ignoring too small (%dpx) region for %s", count, category) + logger.warning("Ignoring too small (%dpx) region for %s", count, category) bad[i] = True continue worse = score < scores if np.any(worse & overlaps[i]): - LOG.debug("Ignoring instance for %s with %.2f overlapping better neighbour", + logger.debug("Ignoring instance for %s with %.2f overlapping better neighbour", category, score) bad[i] = True else: - LOG.debug("post-processing prediction for %s at %s area %d score %f", + logger.debug("post-processing prediction for %s at %s area %d score %f", category, str(bbox), count, score) # post-process detections morphologically and decode to region polygons # does not compile (no OpenCV support): @@ -574,12 +548,13 @@ def postprocess_nms(scores, classes, masks, page_array_bin, categories, min_conf masks = masks[keep] return scores, classes, masks -def postprocess_morph(scores, classes, masks, components, nproc=8): +def postprocess_morph(scores, classes, masks, components, nproc=8, logger=None): """Apply morphological post-processing to raw detections: extend masks to avoid chopping off fg connected components. Implement via Numpy routines. """ - LOG = getLogger('processor.Detectron2Segment') + if logger is None: + logger = getLogger('ocrd.processor.Detectron2Segment') shared_masks = mp.sharedctypes.RawArray(ctypes.c_bool, masks.size) shared_components = mp.sharedctypes.RawArray(ctypes.c_int32, components.size) shared_masks_np = tonumpyarray_with_shape(shared_masks, masks.shape) diff --git a/requirements.txt b/requirements.txt index a71c973..bf81af1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -ocrd>=2.40 +ocrd>=3.3.0 click>=7.0 scipy numpy>=1.17.0 From 5312e36b7d76d1be6f4494ae25c1d0f9da503bdb Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Tue, 8 Apr 2025 17:12:29 +0200 Subject: [PATCH 2/7] =?UTF-8?q?setup.py=20=E2=86=92=20pyproject.toml?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Makefile | 19 ++++++++++++------- pyproject.toml | 46 ++++++++++++++++++++++++++++++++++++++++++++++ setup.py | 45 --------------------------------------------- 3 files changed, 58 insertions(+), 52 deletions(-) create mode 100644 pyproject.toml delete mode 100644 setup.py diff --git a/Makefile b/Makefile index c917e65..dddba48 100644 --- a/Makefile +++ b/Makefile @@ -11,13 +11,14 @@ help: @echo @echo " Targets" @echo - @echo " deps Install only Python dependencies via pip" - @echo " install Install full Python package via pip" - @echo " deps-test Install Python dependencies for tests via pip and models via resmgr" - @echo " test Run regression tests" - @echo " build Build Python package as source and wheel distribution" - @echo " clean Remove symlinks in test/assets" - @echo " docker Build Docker image" + @echo " deps Install only Python dependencies via pip" + @echo " install Install full Python package via pip" + @echo " install-dev Install full Python package via pip" + @echo " deps-test Install Python dependencies for tests via pip and models via resmgr" + @echo " test Run regression tests" + @echo " build Build Python package as source and wheel distribution" + @echo " clean Remove symlinks in test/assets" + @echo " docker Build Docker image" @echo @echo " Variables" @echo " PYTHON" @@ -70,6 +71,10 @@ deps: install: deps $(PIP) install . +# Install Python package via pip +install-dev: deps + $(PIP) install -e . + # Install testing python deps via pip deps-test: models-test $(PIP) install -r requirements-test.txt diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..4fbd2a6 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,46 @@ +[build-system] +requires = ["setuptools>=61.0.0", "wheel", "setuptools-ocrd"] + +[project] +name = "ocrd_detectron2" +authors = [ + {name = "Robert Sachunsky", email = "sachunsky@informatik.uni-leipzig.de"}, + {name = "Julian Balling", email = "balling@infai.org"}, +] +description = "OCR-D wrapper for detectron2 based segmentation models" +readme = "README.md" +license.text = "MIT" +requires-python = ">=3.8" + +dynamic = ["version", "dependencies"] + +# https://pypi.org/classifiers/ +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Environment :: Console", + "Intended Audience :: Science/Research", + "Intended Audience :: Other Audience", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Topic :: Text Processing", +] + +[project.scripts] +ocrd-detectron2-segment = "ocrd_detectron2.cli:ocrd_detectron2_segment" + +[project.urls] +Homepage = "https://github.com/bertsky/ocrd_detectron2" +Repository = "https://github.com/bertsky/ocrd_detectron2.git" + +[tool.setuptools.dynamic] +dependencies = {file = ["requirements.txt"]} +optional-dependencies.test = {file = ["requirements-test.txt"]} + +[tool.setuptools] +packages = ["ocrd_detectron2"] +package-data = {"*" = ["*.json"]} + +[tool.coverage.run] +branch = true +source = ["ocrd_detectron2"] diff --git a/setup.py b/setup.py deleted file mode 100644 index aca17d6..0000000 --- a/setup.py +++ /dev/null @@ -1,45 +0,0 @@ -""" -Installs: - - ocrd-detectron2-segment -""" - -import codecs -import json -from setuptools import setup -from setuptools import find_packages - -with codecs.open('README.md', encoding='utf-8') as f: - README = f.read() - -with open('./ocrd-tool.json', 'r') as f: - version = json.load(f)['version'] - -setup( - name='ocrd_detectron2', - version=version, - description='OCR-D wrapper for detectron2 based segmentation models', - long_description=README, - long_description_content_type='text/markdown', - author='Robert Sachunsky, Julian Balling', - author_email='sachunsky@informatik.uni-leipzig.de, balling@infai.org', - url='https://github.com/bertsky/ocrd_detectron2', - license='MIT', - packages=find_packages(), - include_package_data=True, - install_requires=open('requirements.txt').read().split('\n'), - # dependency links not supported anymore (must use pip install -f ... now) - dependency_links=[ - 'https://dl.fbaipublicfiles.com/detectron2/wheels/cpu/torch1.10/index.html', - 'https://dl.fbaipublicfiles.com/detectron2/wheels/cu102/torch1.10/index.html', - 'https://dl.fbaipublicfiles.com/detectron2/wheels/cu111/torch1.10/index.html', - 'https://dl.fbaipublicfiles.com/detectron2/wheels/cu113/torch1.10/index.html', - ], - package_data={ - '': ['*.json', '*.yml', '*.yaml', '*.csv.gz', '*.jar', '*.zip'], - }, - entry_points={ - 'console_scripts': [ - 'ocrd-detectron2-segment=ocrd_detectron2.cli:ocrd_detectron2_segment', - ] - }, -) From 4a128075bb9c33d91ecf4345fbe8db6dc5295c16 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Tue, 8 Apr 2025 17:13:03 +0200 Subject: [PATCH 3/7] =?UTF-8?q?Dockerfile:=20update=20to=20conform=20to=20?= =?UTF-8?q?spec=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - labels - environment variables - preinstalled ocrd-all-tool.json - 3.x base stage --- .dockerignore | 6 ++++++ Dockerfile | 41 +++++++++++++++++++++++++++++------------ Makefile | 2 +- 3 files changed, 36 insertions(+), 13 deletions(-) create mode 100644 .dockerignore diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..208e33d --- /dev/null +++ b/.dockerignore @@ -0,0 +1,6 @@ +test +repo +dist +build +*.egg-info +*.whl diff --git a/Dockerfile b/Dockerfile index 8968ea7..f462efd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,22 +3,39 @@ FROM $DOCKER_BASE_IMAGE ARG VCS_REF ARG BUILD_DATE LABEL \ - maintainer="https://ocr-d.de/kontakt" \ + maintainer="https://ocr-d.de/en/contact" \ org.label-schema.vcs-ref=$VCS_REF \ - org.label-schema.vcs-url="https://github.com/bertsky/detectron2" \ - org.label-schema.build-date=$BUILD_DATE + org.label-schema.vcs-url="https://github.com/bertsky/ocrd_detectron2" \ + org.label-schema.build-date=$BUILD_DATE \ + org.opencontainers.image.vendor="DFG-Funded Initiative for Optical Character Recognition Development" \ + org.opencontainers.image.title="ocrd_detectron2" \ + org.opencontainers.image.description="OCR-D wrapper for detectron2 based segmentation models" \ + org.opencontainers.image.source="https://github.com/bertsky/ocrd_detectron2" \ + org.opencontainers.image.documentation="https://github.com/bertsky/ocrd_detectron2/blob/${VCS_REF}/README.md" \ + org.opencontainers.image.revision=$VCS_REF \ + org.opencontainers.image.created=$BUILD_DATE \ + org.opencontainers.image.base.name=ocrd/core-cuda-torch -ENV DEBIAN_FRONTEND noninteractive -ENV PYTHONIOENCODING utf8 +ENV DEBIAN_FRONTEND=noninteractive +ENV PYTHONIOENCODING=utf8 +ENV LANG=C.UTF-8 +ENV LC_ALL=C.UTF-8 + +# avoid HOME/.local/share (hard to predict USER here) +# so let XDG_DATA_HOME coincide with fixed system location +# (can still be overridden by derived stages) +ENV XDG_DATA_HOME /usr/local/share +# avoid the need for an extra volume for persistent resource user db +# (i.e. XDG_CONFIG_HOME/ocrd/resources.yml) +ENV XDG_CONFIG_HOME /usr/local/share/ocrd-resources WORKDIR /build/ocrd_detectron2 -COPY setup.py . -COPY ocrd_detectron2/ocrd-tool.json . -COPY README.md . -COPY requirements.txt . -COPY requirements-test.txt . -COPY ocrd_detectron2 ./ocrd_detectron2 -COPY Makefile . + +COPY . . +COPY ocrd-tool.json . +# prepackage ocrd-tool.json as ocrd-all-tool.json +RUN ocrd ocrd-tool ocrd-tool.json dump-tools > $(dirname $(ocrd bashlib filename))/ocrd-all-tool.json +# install everything and reduce image size RUN apt-get install -y --no-install-recommends g++ && \ make deps && \ make install && \ diff --git a/Makefile b/Makefile index dddba48..13b5c4a 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ PYTHONIOENCODING=utf8 SHELL = /bin/bash # Docker container tag -DOCKER_BASE_IMAGE = docker.io/ocrd/core-cuda-torch:v2.69.0 +DOCKER_BASE_IMAGE = docker.io/ocrd/core-cuda-torch:v3.3.0 DOCKER_TAG = 'ocrd/detectron2' help: From 45fa3dd5b237c5ab0119fb4aef8bf10639e8848c Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Tue, 8 Apr 2025 19:27:50 +0200 Subject: [PATCH 4/7] CI: update Github Actions versions --- .github/workflows/python-app.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index ce6f43c..e19b558 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -20,12 +20,12 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.8', '3.9', '3.10'] + python-version: ['3.8', '3.9', '3.10', '3.11'] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v3 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Lint with flake8 @@ -44,7 +44,7 @@ jobs: - name: Install package run: make install - name: Cache models - uses: actions/cache@v3 + uses: actions/cache@v4 with: key: detectron-models path: /home/runner/.local/share/ocrd-resources/ocrd-detectron2-segment/* @@ -54,7 +54,7 @@ jobs: - name: Run tests run: make test - name: Upload test results - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: test-results path: | From fbb56918f41412f998b40330bc451c407a7e2855 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Tue, 8 Apr 2025 19:46:50 +0200 Subject: [PATCH 5/7] CI: only upload test results for one Python version --- .github/workflows/python-app.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index e19b558..3641124 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -54,6 +54,7 @@ jobs: - name: Run tests run: make test - name: Upload test results + if: matrix.python-version == '3.8' uses: actions/upload-artifact@v4 with: name: test-results From 84552352e8dee98880d37c1fb9a0978ae48a720d Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Tue, 8 Apr 2025 20:02:49 +0200 Subject: [PATCH 6/7] CI: update Github Actions version for downloader, too --- .github/workflows/python-app.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 3641124..353cc60 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -76,7 +76,7 @@ jobs: with: ref: gh-pages - name: Download Artifact - uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a # v3.0.2 + uses: actions/download-artifact@4 with: name: test-results path: test-results From 42796cc798c57f2b743801518e946efef5e3d568 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Tue, 8 Apr 2025 20:34:11 +0200 Subject: [PATCH 7/7] CI: fix publish part even more --- .github/workflows/python-app.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 353cc60..818b2f2 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -72,11 +72,11 @@ jobs: continue-on-error: true steps: - name: Checkout GH Pages - uses: actions/checkout@24cb9080177205b6e8c946b17badbe402adc938f # v3.4.0 + uses: actions/checkout@v4 with: ref: gh-pages - name: Download Artifact - uses: actions/download-artifact@4 + uses: actions/download-artifact@v4 with: name: test-results path: test-results