diff --git a/setup.py b/setup.py index 24aee6c..ceb7013 100644 --- a/setup.py +++ b/setup.py @@ -73,5 +73,6 @@ target = plone [console_scripts] update_locale = redturtle.rsync.locales.update:update_locale + redturtle_rsync = redturtle.rsync.scripts.rsync:main """, ) diff --git a/src/redturtle/rsync/__init__.py b/src/redturtle/rsync/__init__.py index b077e2b..6c8bab9 100644 --- a/src/redturtle/rsync/__init__.py +++ b/src/redturtle/rsync/__init__.py @@ -3,4 +3,4 @@ from zope.i18nmessageid import MessageFactory -_ = MessageFactory('redturtle.rsync') +_ = MessageFactory("redturtle.rsync") diff --git a/src/redturtle/rsync/adapters/__init__.py b/src/redturtle/rsync/adapters/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/redturtle/rsync/adapters/adapter.py b/src/redturtle/rsync/adapters/adapter.py new file mode 100644 index 0000000..6f26c19 --- /dev/null +++ b/src/redturtle/rsync/adapters/adapter.py @@ -0,0 +1,160 @@ +from pathlib import Path +from redturtle.rsync.interfaces import IRedturtleRsyncAdapter +from requests.adapters import HTTPAdapter +from requests.packages.urllib3.util.retry import Retry +from zope.component import adapter +from zope.interface import implementer +from zope.interface import Interface + +import json +import requests + + +class TimeoutHTTPAdapter(HTTPAdapter): + def __init__(self, *args, **kwargs): + if "timeout" in kwargs: + self.timeout = kwargs["timeout"] + del kwargs["timeout"] + super(TimeoutHTTPAdapter, self).__init__(*args, **kwargs) + + def send(self, request, **kwargs): + timeout = kwargs.get("timeout") + if timeout is None: + kwargs["timeout"] = self.timeout + return super(TimeoutHTTPAdapter, self).send(request, **kwargs) + + +@implementer(IRedturtleRsyncAdapter) +@adapter(Interface, Interface) +class RsyncAdapterBase: + """ + This is the base class for all rsync adapters. + It provides a common interface for all adapters and some default + implementations of the methods. + Default methods works with some data in restapi-like format. + """ + + def __init__(self, context, request): + self.context = context + self.request = request + + def requests_retry_session( + self, + retries=3, + backoff_factor=0.3, + status_forcelist=(500, 501, 502, 503, 504), + timeout=5.0, + session=None, + ): + """ + https://dev.to/ssbozy/python-requests-with-retries-4p03 + """ + session = session or requests.Session() + retry = Retry( + total=retries, + read=retries, + connect=retries, + backoff_factor=backoff_factor, + status_forcelist=status_forcelist, + ) + # adapter = HTTPAdapter(max_retries=retry) + http_adapter = TimeoutHTTPAdapter(max_retries=retry, timeout=timeout) + session.mount("http://", http_adapter) + session.mount("https://", http_adapter) + return session + + def log_item_title(self, start, options): + """ + Return the title of the log item for the rsync command. + """ + return f"Report sync {start.strftime('%d-%m-%Y %H:%M:%S')}" + + def set_args(self, parser): + """ + Set some additional arguments for the rsync command. + + For example: + parser.add_argument( + "--import-type", + choices=["xxx", "yyy", "zzz"], + help="Import type", + ) + """ + return + + def get_data(self, options): + """ + Convert the data to be used for the rsync command. + Return: + - data: the data to be used for the rsync command + - error: an error message if there was an error, None otherwise + """ + error = None + data = None + # first, read source data + if getattr(options, "source_path", None): + file_path = Path(options.source_path) + if file_path.exists() and file_path.is_file(): + with open(file_path, "r") as f: + try: + data = json.load(f) + except json.JSONDecodeError: + data = f.read() + else: + error = f"Source file not found in: {file_path}" + return data, error + elif getattr(options, "source_url", None): + http = self.requests_retry_session(retries=7, timeout=30.0) + response = http.get(options.source_url) + if response.status_code != 200: + error = f"Error getting data from {options.source_url}: {response.status_code}" + return data, error + if "application/json" in response.headers.get("Content-Type", ""): + try: + data = response.json() + except ValueError: + data = response.content + else: + data = response.content + + if data: + data, error = self.convert_source_data(data) + return data, error + + def convert_source_data(self, data): + """ + If needed, convert the source data to a format that can be used by the rsync command. + """ + return data, None + + def find_item_from_row(self, row): + """ + Find the item in the context from the given row of data. + This method should be implemented by subclasses to find the specific type of content item. + """ + raise NotImplementedError() + + def create_item(self, row, options): + """ + Create a new content item from the given row of data. + This method should be implemented by subclasses to create the specific type of content item. + """ + raise NotImplementedError() + + def update_item(self, item, row): + """ + Update an existing content item from the given row of data. + This method should be implemented by subclasses to update the specific type of content item. + """ + raise NotImplementedError() + + def delete_items(self, data, sync_uids): + """ + params: + - data: the data to be used for the rsync command + - sync_uids: the uids of the items thata has been updated + + Delete items if needed. + This method should be implemented by subclasses to delete the specific type of content item. + """ + return diff --git a/src/redturtle/rsync/adapters/configure.zcml b/src/redturtle/rsync/adapters/configure.zcml new file mode 100644 index 0000000..5c0a861 --- /dev/null +++ b/src/redturtle/rsync/adapters/configure.zcml @@ -0,0 +1,8 @@ + + + + diff --git a/src/redturtle/rsync/browser/configure.zcml b/src/redturtle/rsync/browser/configure.zcml index 5327771..3125fde 100644 --- a/src/redturtle/rsync/browser/configure.zcml +++ b/src/redturtle/rsync/browser/configure.zcml @@ -2,10 +2,14 @@ xmlns="http://namespaces.zope.org/zope" xmlns:browser="http://namespaces.zope.org/browser" xmlns:plone="http://namespaces.plone.org/plone" - i18n_domain="redturtle.rsync"> + i18n_domain="redturtle.rsync" + > - + diff --git a/src/redturtle/rsync/configure.zcml b/src/redturtle/rsync/configure.zcml index 5577d89..a0bf241 100644 --- a/src/redturtle/rsync/configure.zcml +++ b/src/redturtle/rsync/configure.zcml @@ -3,7 +3,8 @@ xmlns:genericsetup="http://namespaces.zope.org/genericsetup" xmlns:i18n="http://namespaces.zope.org/i18n" xmlns:plone="http://namespaces.plone.org/plone" - i18n_domain="redturtle.rsync"> + i18n_domain="redturtle.rsync" + > @@ -16,23 +17,24 @@ + diff --git a/src/redturtle/rsync/interfaces.py b/src/redturtle/rsync/interfaces.py index 8e150d7..55cfa16 100644 --- a/src/redturtle/rsync/interfaces.py +++ b/src/redturtle/rsync/interfaces.py @@ -1,8 +1,71 @@ # -*- coding: utf-8 -*- """Module where all interfaces, events and exceptions live.""" - +from zope.interface import Interface from zope.publisher.interfaces.browser import IDefaultBrowserLayer class IRedturtleRsyncLayer(IDefaultBrowserLayer): """Marker interface that defines a browser layer.""" + + +class IRedturtleRsyncAdapter(Interface): + """Marker interface for the redturtle rsync adapter.""" + + def __init__(context, request): + """Initialize the adapter with the given context and request.""" + + def log_item_title(start, options): + """ + Return the title of the log item for the rsync command. + """ + + def set_args(parser): + """ + Set some additional arguments for the rsync command. + """ + + def get_data(options): + """ + Set some additional arguments for the rsync command. + """ + + def handle_row(row): + """ + Method to handle a row of data. + For example it could do the following steps: + - check if there is already a content item with the same id + - if not, create a new content item + - if yes, update the existing content item + + It should return the content item created or updated and the status of the operation. + The status could be one of the following: + - "created": a new content item was created + - "updated": an existing content item was updated + - "skipped": the content item was skipped because it already exists and is up to date + - "error": an error occurred while processing the content item + + for example: + return {'item': content_item, 'status': status} + """ + + def create_item(row): + """ + Create a new content item from the given row of data. + This method should be implemented by subclasses to create the specific type of content item. + """ + + def update_item(item, row): + """ + Update an existing content item from the given row of data. + This method should be implemented by subclasses to update the specific type of content item. + """ + + def delete_items(data, sync_uids): + """ + params: + - data: the data to be used for the rsync command + - sync_uids: the uids of the items thata has been updated + + Delete items if needed. + This method should be implemented by subclasses to delete the specific type of content item. + """ diff --git a/src/redturtle/rsync/locales/update.py b/src/redturtle/rsync/locales/update.py index ca753e5..70a8da5 100644 --- a/src/redturtle/rsync/locales/update.py +++ b/src/redturtle/rsync/locales/update.py @@ -5,12 +5,12 @@ import subprocess -domain = 'redturtle.rsync' -os.chdir(pkg_resources.resource_filename(domain, '')) -os.chdir('../../../') -target_path = 'src/redturtle/rsync/' -locale_path = target_path + 'locales/' -i18ndude = './bin/i18ndude' +domain = "redturtle.rsync" +os.chdir(pkg_resources.resource_filename(domain, "")) +os.chdir("../../../") +target_path = "src/redturtle/rsync/" +locale_path = target_path + "locales/" +i18ndude = "./bin/i18ndude" # ignore node_modules files resulting in errors excludes = '"*.html *json-schema*.xml"' @@ -18,15 +18,15 @@ def locale_folder_setup(): os.chdir(locale_path) - languages = [d for d in os.listdir('.') if os.path.isdir(d)] + languages = [d for d in os.listdir(".") if os.path.isdir(d)] for lang in languages: folder = os.listdir(lang) - if 'LC_MESSAGES' in folder: + if "LC_MESSAGES" in folder: continue else: - lc_messages_path = lang + '/LC_MESSAGES/' + lc_messages_path = lang + "/LC_MESSAGES/" os.mkdir(lc_messages_path) - cmd = 'msginit --locale={0} --input={1}.pot --output={2}/LC_MESSAGES/{3}.po'.format( # NOQA: E501 + cmd = "msginit --locale={0} --input={1}.pot --output={2}/LC_MESSAGES/{3}.po".format( # NOQA: E501 lang, domain, lang, @@ -37,11 +37,11 @@ def locale_folder_setup(): shell=True, ) - os.chdir('../../../../') + os.chdir("../../../../") def _rebuild(): - cmd = '{i18ndude} rebuild-pot --pot {locale_path}/{domain}.pot --exclude {excludes} --create {domain} {target_path}'.format( # NOQA: E501 + cmd = "{i18ndude} rebuild-pot --pot {locale_path}/{domain}.pot --exclude {excludes} --create {domain} {target_path}".format( # NOQA: E501 i18ndude=i18ndude, locale_path=locale_path, domain=domain, @@ -55,7 +55,7 @@ def _rebuild(): def _sync(): - cmd = '{0} sync --pot {1}/{2}.pot {3}*/LC_MESSAGES/{4}.po'.format( + cmd = "{0} sync --pot {1}/{2}.pot {3}*/LC_MESSAGES/{4}.po".format( i18ndude, locale_path, domain, diff --git a/src/redturtle/rsync/permissions.zcml b/src/redturtle/rsync/permissions.zcml index 1f79c8a..74de0f4 100644 --- a/src/redturtle/rsync/permissions.zcml +++ b/src/redturtle/rsync/permissions.zcml @@ -1,10 +1,11 @@ + xmlns="http://namespaces.zope.org/zope" + xmlns:zcml="http://namespaces.zope.org/zcml" + i18n_domain="plone" + > - + diff --git a/src/redturtle/rsync/scripts/rsync.py b/src/redturtle/rsync/scripts/rsync.py index 8e2b99b..74abc95 100644 --- a/src/redturtle/rsync/scripts/rsync.py +++ b/src/redturtle/rsync/scripts/rsync.py @@ -1,212 +1,302 @@ # -*- coding: utf-8 -*- -# documentazione: .... -from zope.interface import Interface -from Acquisition import aq_base -import logging +from datetime import datetime from plone import api -import requests -from requests.adapters import HTTPAdapter -from requests.packages.urllib3.util.retry import Retry -from plone.namedfile.file import NamedBlobImage -from z3c.relationfield.relation import RelationValue -from zope.lifecycleevent import ObjectModifiedEvent -from zope.event import notify -from Products.Five.utilities.marker import mark +from redturtle.rsync.interfaces import IRedturtleRsyncAdapter +from zope.component import getMultiAdapter + +import argparse +import logging +import re +import sys +import transaction +import uuid + logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) -class TimeoutHTTPAdapter(HTTPAdapter): - def __init__(self, *args, **kwargs): - if "timeout" in kwargs: - self.timeout = kwargs["timeout"] - del kwargs["timeout"] - super(TimeoutHTTPAdapter, self).__init__(*args, **kwargs) - - def send(self, request, **kwargs): - timeout = kwargs.get("timeout") - if timeout is None: - kwargs["timeout"] = self.timeout - return super(TimeoutHTTPAdapter, self).send(request, **kwargs) - - -# https://dev.to/ssbozy/python-requests-with-retries-4p03 -def requests_retry_session( - retries=3, - backoff_factor=0.3, - status_forcelist=(500, 501, 502, 503, 504), - timeout=5.0, - session=None, -): - session = session or requests.Session() - retry = Retry( - total=retries, - read=retries, - connect=retries, - backoff_factor=backoff_factor, - status_forcelist=status_forcelist, - ) - # adapter = HTTPAdapter(max_retries=retry) - adapter = TimeoutHTTPAdapter(max_retries=retry, timeout=timeout) - session.mount('http://', adapter) - session.mount('https://', adapter) - return session - -http = requests_retry_session(retries=7, timeout=30.0) - - -class ISynced(Interface): - """marker for synced content""" - - -def json_extractor(container, response, **kwargs): - return response.json() - - -def image_extractor(container, response, **kwargs): - if not response.headers.get('content-type', '').startswith('image/'): - logger.error('invalid for image_extractor %s (%r)', response.url, response.headers.get('content-type', '')) - return None - return { - 'title': kwargs.get('name', response.url.split('/')[-1]), - 'image': NamedBlobImage(data=response.content, filename=response.url.split('/')[-1]) - } - - -def page_creator(container, data, id=None, portal_type='Document', **kwargs): - obj = api.content.create(container, type=portal_type, id=id, **data) - logger.warning('created %s', obj.absolute_url()) - if 'review_state' in kwargs: - if api.content.get_state(obj) != kwargs['review_state']: - api.content.transition(obj, to_state=kwargs['review_state']) - # try: - # api.content.transition(obj, to_state=kwargs['review_state']) - # except api.exc.InvalidParameterError: - # logger.error('unable to set transition state for %s to %s', obj.absolute_url(), kwargs['review_state']) - mark(obj, ISynced) - obj.reindexObject(idxs=['object_provides']) - return obj - - -def page_delete(obj): - logger.warning('delete %s', obj.absolute_url()) - api.content.delete(obj) - return None - - -def page_update(obj, data, **kwargs): - changed_fields = [] - for fieldname, new_value in data.items(): - # TODO: verificare che il fieldname sia nello schema dell'obj ? - # TODO: qual'è il modo corretto/generale di fare setter di un field ? - # TODO: vedere z3c.form e come fa lui a vedere se le modifiche sono effettive - # o se non è stato modificato nulla ? - old_value = getattr(aq_base(obj), fieldname, None) - if isinstance(new_value, RelationValue) and isinstance(old_value, RelationValue): - changed = (new_value.to_id != old_value.to_id) - else: - changed = (new_value != old_value) - if changed: - setattr(obj, fieldname, new_value) - changed_fields.append(fieldname) - if changed_fields: - notify(ObjectModifiedEvent(obj)) - # BBB: la reindexObject modifica la modification_date, la azzeriamo - # di nuovo col valore originale se esiste - if data.get('modification_date'): - setattr(obj, 'modification_date', data['modification_date']) - obj.reindexObject(idxs=['modified']) - logger.warning('update %s fields:%r', obj.absolute_url(), changed_fields) - return obj - - -def obj_getter(container, remoteid): - return container.get(remoteid) - - -# BBB: usare parametri o adapter ? -def rsync(container, - remoteid, - remoteurl=None, - data=None, - force_update=False, - extractor=json_extractor, - getter=obj_getter, - creator=page_creator, - updater=page_update, - deleter=page_delete, - verbose=False, - **kwargs): +class ScriptRunner: """ - * container: destination plone container - * remoteid: pageid (destinaton pageid, i.e. remote uuid) - * remoteurl: - - # TODO: usare if-modified-since dove possibile - # TODO: valutare eventualmente una funzione per definire l'id del contenuto locale + Run the script. """ - if not remoteurl and not data: - raise Exception('remoteurl or data required') - obj = getter(container, remoteid) - if remoteurl: - response = http.get(remoteurl) - else: - response = data - if obj: - # update or delete - if not response: - # delete (se da 5XX non si cancella...) - if response.status_code in ['401', '403', '404']: - return deleter(obj) - else: - # TODO: sollevare un'eccezione quando c'è un errore in modo - # che l'update venga fatto al sync sucessivo? - logger.error('unable to fetch %s (%s)', remoteurl, response.status_code) - return None + + def __init__(self, args): + portal = api.portal.get() + self.adapter = getMultiAdapter((portal, portal.REQUEST), IRedturtleRsyncAdapter) + self.options = self.get_args(args=args) + self.logdata = [] + self.n_items = 0 + self.n_created = 0 + self.n_updated = 0 + self.n_todelete = 0 + self.sync_uids = set() + self.start = None + self.end = None + + def get_args(self, args): + """ + Get the parameters from the command line arguments. + """ + # first, set the default values + parser = argparse.ArgumentParser() + + # dry-run mode + parser.add_argument( + "--dry-run", action="store_true", default=False, help="Dry-run mode" + ) + + # verbose mode + parser.add_argument("--verbose", default=False, help="Verbose mode") + + # logpath to write the log on Plone content + parser.add_argument( + "--logpath", + default=None, + help="Log destination path (relative to Plone site)", + ) + + # set data source + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument("--source-path", help="Local source path") + group.add_argument("--source-url", help="Remote source URL") + + # then get from the adapter + self.adapter.set_args(parser) + + # Parsing degli argomenti + options = parser.parse_args(args) + return options + + def autolink(self, text): + """ + Fix links in the text. + """ + return re.sub( + r"(https?://\S+|/\S+)", + r'\1', + text, + re.MULTILINE | re.DOTALL, + ) + + def get_frontend_url(self, item): + frontend_domain = api.portal.get_registry_record( + name="volto.frontend_domain", default="" + ) + if not frontend_domain or frontend_domain == "https://": + frontend_domain = "http://localhost:3000" + if frontend_domain.endswith("/"): + frontend_domain = frontend_domain[:-1] + portal_url = api.portal.get().portal_url() + + return item.absolute_url().replace(portal_url, frontend_domain) + + def log_info(self, msg, type="info"): + """ + append a message to the logdata list and print it. + """ + style = "" + if type == "error": + style = "padding:5px;background-color:red;color:#fff" + msg = f"[{datetime.now()}] {msg}" + self.logdata.append(f'

{self.autolink(msg)}

') + + # print the message + if type == "error": + logger.error(msg) + elif type == "warning": + logger.warning(msg) else: - # TODO: verifica sulle date di aggiornamento della pagina remota vs. locale - data = extractor(container, response, **kwargs) - if verbose: - # TODO - logger.warning('DEBUG: %s', data) - if data: - # default: se non ci sono dati di ultima modifica non si fanno - # modifiche - update = False - if 'modification_date' in data: - # BBB: le due date devono esistere ed essere entrambe DateTime - update = (data['modification_date'] > obj.modification_date) - if update or force_update: - return updater(obj, data, **kwargs) - else: - # se la pagina remota non ha i metadati e come se fosse stata cancellate - # quindi va cancllata anche quella locale - return deleter(obj) - return obj - else: - # create - if not response: - logger.error('unable to fetch %s (%s)', remoteurl, response.status_code) + if self.options.verbose: + logger.info(msg) + + def write_log(self): + """ + Write the log into the database. + """ + logpath = getattr(self.options, "logpath", None) + if not logpath: + logger.warning("No logpath specified, skipping log write into database.") + return + logcontainer = api.content.get(logpath) + if not logcontainer: + logger.warning( + f'Log container not found with path "{logpath}", skipping log write into database.' + ) + return + description = f"{self.n_items} elementi trovati, {self.n_created} creati, {self.n_updated} aggiornati, {self.n_todelete} da eliminare" + blockid = str(uuid.uuid4()) + api.content.create( + logcontainer, + "Document", + title=self.adapter.log_item_title(start=self.start, options=self.options), + description=description, + blocks={ + blockid: { + "@type": "html", + "html": "\n".join(self.logdata), + } + }, + blocks_layout={ + "items": [blockid], + }, + ) + + def get_data(self): + """ + get the data from the adapter. + + The adapter should return: + - data: the data to be used for the rsync command + - error: an error message if there was an error in the data generation + """ + try: + data, error = self.adapter.get_data(options=self.options) + except Exception as e: + msg = f"Error in data generation: {e}" + self.log_info(msg=msg, type="error") + return None + if error: + msg = f"Error in data generation: {error}" + self.log_info(msg=msg, type="error") + return None + if not data: + msg = "No data to sync." + self.log_info(msg=msg, type="error") return None + return data + + def create_item(self, row, options): + """ + Create the item. + """ + try: + res = self.adapter.create_item(row=row, options=self.options) + except Exception as e: + msg = f"[Error] Unable to create item {row}: {e}" + self.log_info(msg=msg, type="error") + return + if not res: + msg = f"[Error] item {row} not created." + self.log_info(msg=msg, type="error") + return + + # adapter could create a list of items (maybe also children or related items) + if isinstance(res, list): + self.n_created += len(res) + for item in res: + msg = f"[CREATED] {'/'.join(item.getPhysicalPath())}" + self.log_info(msg=msg) + else: + self.n_created += 1 + msg = f"[CREATED] {'/'.join(res.getPhysicalPath())}" + self.log_info(msg=msg) + return res + + def update_item(self, item, row, options): + """ + Update the item. + """ + try: + res = self.adapter.update_item(item=item, row=row, options=options) + except Exception as e: + msg = f"[Error] Unable to update item {self.get_frontend_url(item)}: {e}" + self.log_info(msg=msg, type="error") + return + + if not res: + msg = f"[SKIPPED] {self.get_frontend_url(item)}" + self.log_info(msg=msg) + return + + # adapter could create a list of items (maybe also children or related items) + if isinstance(res, list): + self.n_updated += len(res) + for updated in res: + msg = f"[UPDATED] {updated.absolute_url()}" + self.log_info(msg=msg) + self.sync_uids.add(updated.UID()) + updated.reindexObject() else: - data = extractor(container, response, **kwargs) - if data: - obj = creator(container, data, id=remoteid, **kwargs) - return obj - - -""" -# ESEMPIO: ALMA2021 vs. Magazine -from unibo.api.rsync import rsync -remoteurl = 'http://magazine.dev.dsaw.unibo.it/archivio/2018/mio-articolo-con-il-nuovo-font' -remoteid = '6fc2a87d4aa64cc7ad6b5bd0838a4c0c' # AKA http://magazine.dev.dsaw.unibo.it/archivio/2018/mio-articolo-con-il-nuovo-font/uuid - -def magazine_extractor(response, lang): - data = extruct.extract(response.text) - return data - -container = api.content.get('/alma2021/it/notizie') -obj_it = rsync(container, remoteid, remoteurl, extractor=magazine_extractor, lang='it') -container = api.content.get('/alma2021/en/news') -obj_en = rsync(container, remoteid, remoteurl, extractor=magazine_extractor, lang='en') -""" + self.n_updated += 1 + msg = f"[UPDATED] {self.get_frontend_url(item)}" + self.log_info(msg=msg) + self.sync_uids.add(item.UID()) + item.reindexObject() + + def delete_items(self, data): + """ + See if there are items to delete. + """ + res = self.adapter.delete_items(data=data, sync_uids=self.sync_uids) + if not res: + return + if isinstance(res, list): + self.n_todelete += len(res) + for item in res: + msg = f"[DELETED] {item}" + self.log_info(msg=msg) + else: + self.n_todelete += 1 + msg = f"[DELETED] {res}" + self.log_info(msg=msg) + + def rsync(self): + """ + Do the rsync. + """ + self.start = datetime.now() + logger.info(f"[{self.start}] - START RSYNC") + data = self.get_data() + if not data: + # we already logged the error + logger.info(f"[{datetime.now()}] - END RSYNC") + return + + self.n_items = len(data) + self.log_info(msg=f"START - ITERATE DATA ({self.n_items} items)") + + # last_commit = 0 + i = 0 + for row in data[:200]: + i += 1 + if i % 100 == 0: + logger.info(f"Progress: {i}/{self.n_items}") + try: + item = self.adapter.find_item_from_row(row=row, options=self.options) + except Exception as e: + msg = f"[Error] Unable to find item from row {row}: {e}" + self.log_info(msg=msg, type="error") + continue + if not item: + self.create_item(row=row, options=self.options) + else: + self.update_item(item=item, row=row, options=self.options) + + # if self.n_updated + self.n_created - last_commit > 5: + # last_commit = self.n_updated + self.n_created + # if not getattr(self.options, "dry_run", False): + # logger.info( + # f"[{datetime.now()}] COMMIT ({i}/{self.n_items} items processed)" + # ) + # transaction.commit() + + self.delete_items(data) + + +def _main(args): + with api.env.adopt_user(username="admin"): + runner = ScriptRunner(args=args) + runner.rsync() + runner.write_log() + if not getattr(runner.options, "dry_run", False): + print(f"[{datetime.now()}] COMMIT") + transaction.commit() + + +def main(): + _main(sys.argv[3:]) + + +if __name__ == "__main__": + main() diff --git a/src/redturtle/rsync/setuphandlers.py b/src/redturtle/rsync/setuphandlers.py index 2e244de..0543099 100644 --- a/src/redturtle/rsync/setuphandlers.py +++ b/src/redturtle/rsync/setuphandlers.py @@ -5,7 +5,6 @@ @implementer(INonInstallable) class HiddenProfiles(object): - def getNonInstallableProfiles(self): """Hide uninstall profile from site-creation and quickinstaller.""" return [ diff --git a/src/redturtle/rsync/testing.py b/src/redturtle/rsync/testing.py index a525f00..7871cba 100644 --- a/src/redturtle/rsync/testing.py +++ b/src/redturtle/rsync/testing.py @@ -11,7 +11,6 @@ class RedturtleRsyncLayer(PloneSandboxLayer): - defaultBases = (PLONE_FIXTURE,) def setUpZope(self, app, configurationContext): @@ -19,13 +18,15 @@ def setUpZope(self, app, configurationContext): # The z3c.autoinclude feature is disabled in the Plone fixture base # layer. import plone.app.dexterity + self.loadZCML(package=plone.app.dexterity) import plone.restapi + self.loadZCML(package=plone.restapi) self.loadZCML(package=redturtle.rsync) def setUpPloneSite(self, portal): - applyProfile(portal, 'redturtle.rsync:default') + applyProfile(portal, "redturtle.rsync:default") REDTURTLE_RSYNC_FIXTURE = RedturtleRsyncLayer() @@ -33,13 +34,13 @@ def setUpPloneSite(self, portal): REDTURTLE_RSYNC_INTEGRATION_TESTING = IntegrationTesting( bases=(REDTURTLE_RSYNC_FIXTURE,), - name='RedturtleRsyncLayer:IntegrationTesting', + name="RedturtleRsyncLayer:IntegrationTesting", ) REDTURTLE_RSYNC_FUNCTIONAL_TESTING = FunctionalTesting( bases=(REDTURTLE_RSYNC_FIXTURE,), - name='RedturtleRsyncLayer:FunctionalTesting', + name="RedturtleRsyncLayer:FunctionalTesting", ) @@ -49,5 +50,5 @@ def setUpPloneSite(self, portal): REMOTE_LIBRARY_BUNDLE_FIXTURE, z2.ZSERVER_FIXTURE, ), - name='RedturtleRsyncLayer:AcceptanceTesting', + name="RedturtleRsyncLayer:AcceptanceTesting", ) diff --git a/src/redturtle/rsync/tests/test_robot.py b/src/redturtle/rsync/tests/test_robot.py index e748035..59cac43 100644 --- a/src/redturtle/rsync/tests/test_robot.py +++ b/src/redturtle/rsync/tests/test_robot.py @@ -11,18 +11,21 @@ def test_suite(): suite = unittest.TestSuite() current_dir = os.path.abspath(os.path.dirname(__file__)) - robot_dir = os.path.join(current_dir, 'robot') + robot_dir = os.path.join(current_dir, "robot") robot_tests = [ - os.path.join('robot', doc) for doc in os.listdir(robot_dir) - if doc.endswith('.robot') and doc.startswith('test_') + os.path.join("robot", doc) + for doc in os.listdir(robot_dir) + if doc.endswith(".robot") and doc.startswith("test_") ] for robot_test in robot_tests: robottestsuite = robotsuite.RobotTestSuite(robot_test) robottestsuite.level = ROBOT_TEST_LEVEL - suite.addTests([ - layered( - robottestsuite, - layer=REDTURTLE_RSYNC_ACCEPTANCE_TESTING, - ), - ]) + suite.addTests( + [ + layered( + robottestsuite, + layer=REDTURTLE_RSYNC_ACCEPTANCE_TESTING, + ), + ] + ) return suite diff --git a/src/redturtle/rsync/tests/test_setup.py b/src/redturtle/rsync/tests/test_setup.py index 3257e20..9c2c37e 100644 --- a/src/redturtle/rsync/tests/test_setup.py +++ b/src/redturtle/rsync/tests/test_setup.py @@ -21,48 +21,45 @@ class TestSetup(unittest.TestCase): def setUp(self): """Custom shared utility setup for tests.""" - self.portal = self.layer['portal'] + self.portal = self.layer["portal"] if get_installer: - self.installer = get_installer(self.portal, self.layer['request']) + self.installer = get_installer(self.portal, self.layer["request"]) else: - self.installer = api.portal.get_tool('portal_quickinstaller') + self.installer = api.portal.get_tool("portal_quickinstaller") def test_product_installed(self): """Test if redturtle.rsync is installed.""" - self.assertTrue(self.installer.is_product_installed( - 'redturtle.rsync')) + self.assertTrue(self.installer.is_product_installed("redturtle.rsync")) def test_browserlayer(self): """Test that IRedturtleRsyncLayer is registered.""" from plone.browserlayer import utils from redturtle.rsync.interfaces import IRedturtleRsyncLayer - self.assertIn( - IRedturtleRsyncLayer, - utils.registered_layers()) + self.assertIn(IRedturtleRsyncLayer, utils.registered_layers()) -class TestUninstall(unittest.TestCase): +class TestUninstall(unittest.TestCase): layer = REDTURTLE_RSYNC_INTEGRATION_TESTING def setUp(self): - self.portal = self.layer['portal'] + self.portal = self.layer["portal"] if get_installer: - self.installer = get_installer(self.portal, self.layer['request']) + self.installer = get_installer(self.portal, self.layer["request"]) else: - self.installer = api.portal.get_tool('portal_quickinstaller') + self.installer = api.portal.get_tool("portal_quickinstaller") roles_before = api.user.get_roles(TEST_USER_ID) - setRoles(self.portal, TEST_USER_ID, ['Manager']) - self.installer.uninstall_product('redturtle.rsync') + setRoles(self.portal, TEST_USER_ID, ["Manager"]) + self.installer.uninstall_product("redturtle.rsync") setRoles(self.portal, TEST_USER_ID, roles_before) def test_product_uninstalled(self): """Test if redturtle.rsync is cleanly uninstalled.""" - self.assertFalse(self.installer.is_product_installed( - 'redturtle.rsync')) + self.assertFalse(self.installer.is_product_installed("redturtle.rsync")) def test_browserlayer_removed(self): """Test that IRedturtleRsyncLayer is removed.""" from plone.browserlayer import utils from redturtle.rsync.interfaces import IRedturtleRsyncLayer + self.assertNotIn(IRedturtleRsyncLayer, utils.registered_layers())