From 0c1f2994480b33068e7822712f5f577dcd87eb0d Mon Sep 17 00:00:00 2001 From: Kiran Dawadi Date: Wed, 19 Mar 2025 19:05:55 -0500 Subject: [PATCH 1/7] Fix DocumentType filter in Delta URLs page --- sde_indexing_helper/static/js/delta_url_list.js | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sde_indexing_helper/static/js/delta_url_list.js b/sde_indexing_helper/static/js/delta_url_list.js index 85a92093..c961a981 100644 --- a/sde_indexing_helper/static/js/delta_url_list.js +++ b/sde_indexing_helper/static/js/delta_url_list.js @@ -881,6 +881,11 @@ function initializeDataTable() { $("#deltaDocTypeMatchPatternFilter").on("beforeinput", function (val) { document_type_patterns_table.columns(0).search(this.value).draw(); }); + + $("#document-type-patterns-dropdown-2").on("change", function () { + document_type_patterns_table.columns(2).search(this.value).draw(); + }); + } var division_patterns_table = $("#division_patterns_table").DataTable({ From fce770a9bca82911c84178ebb4e8bf548ebaaa86 Mon Sep 17 00:00:00 2001 From: Kiran Dawadi Date: Wed, 19 Mar 2025 19:14:16 -0500 Subject: [PATCH 2/7] Add Changelog --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2fdc59e8..2a3380d1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -137,3 +137,8 @@ For each PR made, an entry should be added to this changelog. It should contain - Description: The feedback form API was throwing CORS errors and to rectify that, we need to add the apt https link for sde-lrm. - Changes: - Added `https://sde-lrm.nasa-impact.net` to `CORS_ALLOWED_ORIGINS` in the base settings. + +- 1252-document-type-filter-not-working-in-delta-urls-page + - Description: Fixed document type filtering functionality in the "Document Type Patterns" tab in Delta URLs page. + - Changes: + - Added a new event listener to the Document Type Patterns dropdown to trigger the filtering of the table results based on the selected value. From d3e75d9aec7931b8c5132c96e478293a4d5988e5 Mon Sep 17 00:00:00 2001 From: Kiran Dawadi Date: Wed, 19 Mar 2025 20:02:55 -0500 Subject: [PATCH 3/7] Fix sorting issue --- .../templates/sde_collections/collection_list.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sde_indexing_helper/templates/sde_collections/collection_list.html b/sde_indexing_helper/templates/sde_collections/collection_list.html index 738ece8f..1930022e 100644 --- a/sde_indexing_helper/templates/sde_collections/collection_list.html +++ b/sde_indexing_helper/templates/sde_collections/collection_list.html @@ -151,14 +151,14 @@

Welcome back!

{{ collection.get_division_display }} - + {{ collection.num_delta_urls|intcomma }} - + {{ collection.num_curated_urls|intcomma }} From 0e4d44211e428edcb99a0f72c22e9b1c7c67d8cd Mon Sep 17 00:00:00 2001 From: Kiran Dawadi Date: Wed, 19 Mar 2025 20:14:13 -0500 Subject: [PATCH 4/7] Add Changelog --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2fdc59e8..9dab3e7d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -137,3 +137,9 @@ For each PR made, an entry should be added to this changelog. It should contain - Description: The feedback form API was throwing CORS errors and to rectify that, we need to add the apt https link for sde-lrm. - Changes: - Added `https://sde-lrm.nasa-impact.net` to `CORS_ALLOWED_ORIGINS` in the base settings. + +- 1251-column-sorting-issue-curated-urls-count-sorts-by-delta-urls-count + - Description: Fixed incorrect sorting behavior in Collections table where sorting by Curated URLs column was not working as expected. + - Changes: + - Added `data-order` attribute to Delta URLs and Curated URLs table cells to enable proper numeric sorting + - Ensured raw numeric values are used for sorting while maintaining formatted display with anchor tags From b082f3f41b93a62136226450794012e825a50c0d Mon Sep 17 00:00:00 2001 From: Kiran Dawadi Date: Thu, 20 Mar 2025 13:37:48 -0500 Subject: [PATCH 5/7] Updated searchpane comparisions --- CHANGELOG.md | 4 +-- .../static/js/collection_list.js | 36 +++++++++++-------- 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9dab3e7d..4cdb7dbd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -141,5 +141,5 @@ For each PR made, an entry should be added to this changelog. It should contain - 1251-column-sorting-issue-curated-urls-count-sorts-by-delta-urls-count - Description: Fixed incorrect sorting behavior in Collections table where sorting by Curated URLs column was not working as expected. - Changes: - - Added `data-order` attribute to Delta URLs and Curated URLs table cells to enable proper numeric sorting - - Ensured raw numeric values are used for sorting while maintaining formatted display with anchor tags + - Added `data-order` attribute to URL count columns for proper numeric sorting + - Updated SearchPane comparisons to use `@data-order` values instead of string-based loose equality checks to ensure correct numeric filtering diff --git a/sde_indexing_helper/static/js/collection_list.js b/sde_indexing_helper/static/js/collection_list.js index 78fd4894..7eb5c7bf 100644 --- a/sde_indexing_helper/static/js/collection_list.js +++ b/sde_indexing_helper/static/js/collection_list.js @@ -138,43 +138,47 @@ let table = $("#collection_table").DataTable({ { label: "0 URLs", value: function (rowData, rowIdx) { - return $(rowData[COLUMNS.DELTA_URLS]).text() == 0; + return parseInt(rowData[COLUMNS.DELTA_URLS]['@data-order']) === 0; }, }, { label: "1 solo URL", value: function (rowData, rowIdx) { - return $(rowData[COLUMNS.DELTA_URLS]).text() == 1; + return parseInt(rowData[COLUMNS.DELTA_URLS]['@data-order']) === 1; }, }, { label: "1 to 100 URLs", value: function (rowData, rowIdx) { - return $(rowData[COLUMNS.DELTA_URLS]).text() <= 100 && $(rowData[COLUMNS.DELTA_URLS]).text() > 1; + const value = parseInt(rowData[COLUMNS.DELTA_URLS]['@data-order']); + return value > 1 && value <= 100; }, }, { label: "100 to 1,000 URLs", value: function (rowData, rowIdx) { - return $(rowData[COLUMNS.DELTA_URLS]).text() <= 1000 && $(rowData[COLUMNS.DELTA_URLS]).text() > 100; + const value = parseInt(rowData[COLUMNS.DELTA_URLS]['@data-order']); + return value > 100 && value <= 1000; }, }, { label: "1,000 to 10,000 URLs", value: function (rowData, rowIdx) { - return $(rowData[COLUMNS.DELTA_URLS]).text() <= 10000 && $(rowData[COLUMNS.DELTA_URLS]).text() > 1000; + const value = parseInt(rowData[COLUMNS.DELTA_URLS]['@data-order']); + return value > 1000 && value <= 10000; }, }, { label: "10,000 to 100,000 URLs", value: function (rowData, rowIdx) { - return $(rowData[COLUMNS.DELTA_URLS]).text() <= 100000 && $(rowData[COLUMNS.DELTA_URLS]).text() > 10000; + const value = parseInt(rowData[COLUMNS.DELTA_URLS]['@data-order']); + return value > 10000 && value <= 100000; }, }, { label: "Over 100,000 URLs", value: function (rowData, rowIdx) { - return $(rowData[COLUMNS.DELTA_URLS]).text() > 100000; + return parseInt(rowData[COLUMNS.DELTA_URLS]['@data-order']) > 100000; }, }, ], @@ -189,43 +193,47 @@ let table = $("#collection_table").DataTable({ { label: "0 URLs", value: function (rowData, rowIdx) { - return $(rowData[COLUMNS.CURATED_URLS]).text() == 0; + return parseInt(rowData[COLUMNS.CURATED_URLS]['@data-order']) === 0; }, }, { label: "1 solo URL", value: function (rowData, rowIdx) { - return $(rowData[COLUMNS.CURATED_URLS]).text() == 1; + return parseInt(rowData[COLUMNS.CURATED_URLS]['@data-order']) === 1; }, }, { label: "1 to 100 URLs", value: function (rowData, rowIdx) { - return $(rowData[COLUMNS.CURATED_URLS]).text() <= 100 && $(rowData[COLUMNS.CURATED_URLS]).text() > 1; + const value = parseInt(rowData[COLUMNS.CURATED_URLS]['@data-order']); + return value > 1 && value <= 100; }, }, { label: "100 to 1,000 URLs", value: function (rowData, rowIdx) { - return $(rowData[COLUMNS.CURATED_URLS]).text() <= 1000 && $(rowData[COLUMNS.CURATED_URLS]).text() > 100; + const value = parseInt(rowData[COLUMNS.CURATED_URLS]['@data-order']); + return value > 100 && value <= 1000; }, }, { label: "1,000 to 10,000 URLs", value: function (rowData, rowIdx) { - return $(rowData[COLUMNS.CURATED_URLS]).text() <= 10000 && $(rowData[COLUMNS.CURATED_URLS]).text() > 1000; + const value = parseInt(rowData[COLUMNS.CURATED_URLS]['@data-order']); + return value > 1000 && value <= 10000; }, }, { label: "10,000 to 100,000 URLs", value: function (rowData, rowIdx) { - return $(rowData[COLUMNS.CURATED_URLS]).text() <= 100000 && $(rowData[COLUMNS.CURATED_URLS]).text() > 10000; + const value = parseInt(rowData[COLUMNS.CURATED_URLS]['@data-order']); + return value > 10000 && value <= 100000; }, }, { label: "Over 100,000 URLs", value: function (rowData, rowIdx) { - return $(rowData[COLUMNS.CURATED_URLS]).text() > 100000; + return parseInt(rowData[COLUMNS.CURATED_URLS]['@data-order']) > 100000; }, }, ], From 4a15389158a218e5216aa750c7987bf19c53af4e Mon Sep 17 00:00:00 2001 From: Dhanur Sharma Date: Thu, 20 Mar 2025 17:24:06 -0500 Subject: [PATCH 6/7] Updated process_inference_queue to run on the schedule --- config/celery.py | 9 --------- inference/__init__.py | 1 + inference/apps.py | 3 +++ inference/signals.py | 24 ++++++++++++++++++++++++ 4 files changed, 28 insertions(+), 9 deletions(-) create mode 100644 inference/signals.py diff --git a/config/celery.py b/config/celery.py index 1ab83cb9..465e6fb7 100644 --- a/config/celery.py +++ b/config/celery.py @@ -2,7 +2,6 @@ import os from celery import Celery -from celery.schedules import crontab # Set the default Django settings module os.environ.setdefault("DJANGO_SETTINGS_MODULE", "config.settings.local") @@ -14,11 +13,3 @@ # Load task modules from all registered Django app configs app.autodiscover_tasks() - -app.conf.beat_schedule = { - "process-inference-queue": { - "task": "inference.tasks.process_inference_job_queue", - # Only run between 6pm and 7am - "schedule": crontab(minute="*/5", hour="18-23,0-6"), - }, -} diff --git a/inference/__init__.py b/inference/__init__.py index e69de29b..d1fe6f78 100644 --- a/inference/__init__.py +++ b/inference/__init__.py @@ -0,0 +1 @@ +default_app_config = "inference.apps.InferenceConfig" diff --git a/inference/apps.py b/inference/apps.py index 99523aba..657933cb 100644 --- a/inference/apps.py +++ b/inference/apps.py @@ -5,3 +5,6 @@ class InferenceConfig(AppConfig): default_auto_field = "django.db.models.BigAutoField" name = "inference" verbose_name = "Inference" + + def ready(self): + import inference.signals # noqa F401 diff --git a/inference/signals.py b/inference/signals.py new file mode 100644 index 00000000..70d0dd39 --- /dev/null +++ b/inference/signals.py @@ -0,0 +1,24 @@ +from django.db.models.signals import post_migrate +from django.dispatch import receiver + + +@receiver(post_migrate) +def create_periodic_tasks(sender, **kwargs): + if sender.name == "inference": + from django_celery_beat.models import CrontabSchedule, PeriodicTask + + # Create schedule for every 5 minutes between 6pm-7am + crontab, _ = CrontabSchedule.objects.get_or_create( + minute="*/5", + hour="18-23,0-6", + day_of_week="*", + day_of_month="*", + month_of_year="*", + ) + + # Create the periodic task if it doesn't exist + PeriodicTask.objects.get_or_create( + crontab=crontab, + name="Process inference queue (6pm-7am)", + task="inference.tasks.process_inference_job_queue", + ) From 0921d79d0086d8ae72d5bc5466831adb91c16aef Mon Sep 17 00:00:00 2001 From: Dhanur Sharma Date: Thu, 20 Mar 2025 17:35:51 -0500 Subject: [PATCH 7/7] Updated test batch values --- inference/tests/test_batch.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inference/tests/test_batch.py b/inference/tests/test_batch.py index cce016e7..b6d10963 100644 --- a/inference/tests/test_batch.py +++ b/inference/tests/test_batch.py @@ -36,7 +36,7 @@ def mock_url_large(self): """Returns a mock URL object with large text content""" url = Mock() url.id = 2 - url.scraped_text = "X" * 10010 # Exceeds default max size + url.scraped_text = "X" * 12000 # Exceeds default max size url.scraped_title = "Large Content Page" url.url = "https://example.com/large-page" return url @@ -191,7 +191,7 @@ def test_iter_url_batches_mix_normal_and_oversized(self, processor): # Normal URL url1 = Mock(id=1, scraped_text="X" * 2000, scraped_title="Title 1", url="https://example.com/1") # Oversized URL - url2 = Mock(id=2, scraped_text="X" * 11000, scraped_title="Title 2", url="https://example.com/2") + url2 = Mock(id=2, scraped_text="X" * 15000, scraped_title="Title 2", url="https://example.com/2") # Another normal URL url3 = Mock(id=3, scraped_text="X" * 3000, scraped_title="Title 3", url="https://example.com/3")