diff --git a/.agents/skills/add-django-config-env-var/SKILL.md b/.agents/skills/add-django-config-env-var/SKILL.md new file mode 100644 index 0000000000..e3277edd08 --- /dev/null +++ b/.agents/skills/add-django-config-env-var/SKILL.md @@ -0,0 +1,63 @@ +--- +name: add-django-config-env-var +description: Add a new environment variable for a Django setting in Baserow and propagate it to the few repo files that usually need it. Use this when a request says a config env var must be added in several places or references `INTEGRATION_LOCAL_BASEROW_PAGE_SIZE_LIMIT` as the pattern to follow. +--- + +# Add Django Config Env Var + +Use `INTEGRATION_LOCAL_BASEROW_PAGE_SIZE_LIMIT` as the template. The env var name should be prefixed with `BASEROW_` but the internal var isn't. + +Keep the change simple and explicit. Do not add abstractions for this. + +## Files To Check + +When adding a new setting, usually check these files: + +- `backend/src/baserow/config/settings/base.py` +- `docker-compose.yml` +- `docker-compose.no-caddy.yml` +- `web-frontend/env-remap.mjs` +- Backend or frontend code that uses the setting +- A focused test if behavior changes + +## Workflow + +1. Add the Django setting in `backend/src/baserow/config/settings/base.py` near the closest related setting. + +Example: + +```python +MY_SETTING = int(os.getenv("BASEROW_MY_SETTING", 123)) +``` + +2. If the variable should be configurable in Docker, add it everywhere the similar example appears in: + +- `docker-compose.yml` +- `docker-compose.no-caddy.yml` + +3. If the frontend needs it at runtime, add it to `web-frontend/env-remap.mjs`. + +4. Update consumers to use the setting: + +- Backend: `settings.MY_SETTING` +- Tests: `override_settings(MY_SETTING=...)` + +5. Add or update a targeted test if the setting changes behavior. + +6. Add the related documentation + +## Quick Checklist + +1. Add it in `base.py` +2. Mirror the matching Docker entries +3. Add the Nuxt remap if frontend code needs it +4. Use `settings.` in code +5. Add a focused test if needed +6. Add the documentation + +## Guardrails + +- Do not add a raw `os.getenv(...)` in application code when the value belongs in Django settings. +- Do not update only one Docker location if the example appears in several places. +- Do not expose a backend-only setting to Nuxt unless the frontend actually needs it. +- Prefer copying the closest existing setting instead of inventing a new pattern. diff --git a/.agents/skills/add-django-config-env-var/agents/openai.yaml b/.agents/skills/add-django-config-env-var/agents/openai.yaml new file mode 100644 index 0000000000..5f44641896 --- /dev/null +++ b/.agents/skills/add-django-config-env-var/agents/openai.yaml @@ -0,0 +1,4 @@ +interface: + display_name: "Add Django Config Env Var" + short_description: "Add and propagate a Django env variable" + default_prompt: "Use $add-django-config-env-var to add a new environment variable that changes Django configuration across Baserow." diff --git a/.agents/skills/create-update-service/SKILL.md b/.agents/skills/create-update-service/SKILL.md new file mode 100644 index 0000000000..18e8de5e42 --- /dev/null +++ b/.agents/skills/create-update-service/SKILL.md @@ -0,0 +1,145 @@ +--- +name: create-update-service +description: Allow to create or update Baserow Integrations and Services +--- + +# Create Or Update Baserow Services And Integrations + +Use this skill when a task involves creating or updating a Baserow integration type or service type in the `contrib/integrations` stack. + +This repo already has the core patterns. Prefer copying an existing implementation close to the target behavior instead of inventing a new structure. + +Integrations and services are shared by the Application builder and the Automation tool. Each of them should be compatible with both tools. + +## First Step + +Before editing, identify which of these applies: + +1. New integration type only +2. New service type attached to an existing integration +3. Update to an existing integration or service +4. Full feature spanning backend, frontend, translations, and tests + +Then inspect the closest existing example with `rg` before changing files. + +Useful starting points: + +- Backend registrations: `backend/src/baserow/contrib/integrations/apps.py` +- Frontend registrations: `web-frontend/modules/integrations/plugin.js` +- Core backend service examples: `backend/src/baserow/contrib/integrations/core/service_types.py` +- Core frontend service examples: `web-frontend/modules/integrations/core/serviceTypes.js` +- Backend integration example: `backend/src/baserow/contrib/integrations/core/integration_types.py` +- Frontend integration example: `web-frontend/modules/integrations/core/integrationTypes.js` + +## Backend Checklist + +For a new or updated service type, check these areas: + +1. Model fields exist and support the intended configuration. +2. The `ServiceType` subclass exposes the right `type`, `model_class`, `dispatch_types`, `allowed_fields`, and serializer configuration. +3. Related nested objects are handled in `after_create`, update helpers, or custom methods when needed. +4. Context/schema methods are implemented if the service emits data for downstream nodes. +5. The service is registered in `backend/src/baserow/contrib/integrations/apps.py`. +6. A migration is added if models changed. + +For a new or updated integration type, check these areas: + +1. The `IntegrationType` subclass defines `type`, `model_class`, serializer field names, allowed fields, and sensitive fields when relevant. +2. Any integration-specific context data or permissions behavior is preserved. +3. The integration is registered in `backend/src/baserow/contrib/integrations/apps.py`. +4. A migration is added if models changed. + +Common backend files to inspect: + +- `backend/src/baserow/contrib/integrations/*/models.py` +- `backend/src/baserow/contrib/integrations/*/service_types.py` +- `backend/src/baserow/contrib/integrations/*/integration_types.py` +- `backend/src/baserow/contrib/integrations/api/**` +- `backend/src/baserow/contrib/integrations/migrations/**` + +## Frontend Checklist + +If the feature is user-configurable, update the frontend in parallel with the backend: + +1. Add or update the service or integration type class. +2. Register it in `web-frontend/modules/integrations/plugin.js`. +3. Add or update the form component used to configure it. +4. Add translations in `web-frontend/modules/integrations/locales/en.json`. +5. Add any supporting mixins, helpers, or assets only if the existing pattern requires them. + +Common frontend files to inspect: + +- `web-frontend/modules/integrations/*/serviceTypes.js` +- `web-frontend/modules/integrations/*/integrationTypes.js` +- `web-frontend/modules/integrations/*/components/services/**` +- `web-frontend/modules/integrations/*/components/integrations/**` +- `web-frontend/modules/integrations/locales/en.json` + +## How To Implement + +### Creating a new service type + +1. Start from the closest existing service type with similar dispatch behavior: + `ACTION`, `DATA`, or trigger behavior. +2. Add or update the backend model if the service needs persisted fields. +3. Implement or extend the backend `ServiceType` subclass. +4. Register the service in `backend/src/baserow/contrib/integrations/apps.py`. +5. Implement the frontend service type class and form component. +6. Register the service in `web-frontend/modules/integrations/plugin.js`. +7. Add translations and tests. + +### Creating a new integration type + +1. Start from the closest existing integration type with similar auth or configuration needs. +2. Add or update the backend model if required. +3. Implement or extend the backend `IntegrationType` subclass. +4. Register the integration in `backend/src/baserow/contrib/integrations/apps.py`. +5. Implement the frontend integration type class and form component. +6. Register the integration in `web-frontend/modules/integrations/plugin.js`. +7. Add translations and tests. + +### Updating an existing type + +1. Find all backend and frontend registrations for the type string. +2. Check whether API serializers, nested relations, or schema generation need updates. +3. Keep existing `type` identifiers stable unless the user explicitly wants a breaking change. +4. Check whether old records need a migration or a data backfill. +5. Update tests for both create and update flows when behavior changes. + +## Testing Expectations + +Run the narrowest relevant tests first or create one if none exists. + +Backend examples: + +- Integration and Service tests in `backend/tests/baserow/api/integrations/**` + +Frontend examples: + +- Unit tests near `web-frontend/test/unit/integrations/**` + +Minimum validation before finishing: + +1. The type is registered on both backend and frontend when applicable. +2. The create and update flows serialize the intended fields. +3. Required translations exist. +4. Migrations are present for model changes. +5. The most relevant targeted tests pass, or the failure is reported explicitly. + +## Search Patterns + +Use these searches to move quickly: + +- `rg -n "class .*ServiceType" backend/src/baserow/contrib/integrations` +- `rg -n "class .*IntegrationType" backend/src/baserow/contrib/integrations` +- `rg -n "register\\(" backend/src/baserow/contrib/integrations/apps.py web-frontend/modules/integrations/plugin.js` +- `rg -n "getType\\(\\)" web-frontend/modules/integrations` +- `rg -n "\"serviceType\\.|integrationType\\.\"" web-frontend/modules/integrations/locales/en.json` + +## Guardrails + +- Do not add a backend type without checking the matching frontend registration path. +- Do not rename a persisted `type` string casually. +- Do not forget migrations when model fields change. +- Do not add broad abstractions unless at least two existing implementations already need them. +- Prefer matching the nearest existing module layout over introducing a new folder structure. diff --git a/.agents/skills/create-update-service/agents/openai.yaml b/.agents/skills/create-update-service/agents/openai.yaml new file mode 100644 index 0000000000..d6471be610 --- /dev/null +++ b/.agents/skills/create-update-service/agents/openai.yaml @@ -0,0 +1,4 @@ +interface: + display_name: 'Integrations and Services' + short_description: 'Create or update Baserow Integrations and Services' + default_prompt: 'Use $create-update-service to create or update Baserow Integrations and Services.' diff --git a/.agents/skills/write-frontend-unit-test/SKILL.md b/.agents/skills/write-frontend-unit-test/SKILL.md new file mode 100644 index 0000000000..85981e2f77 --- /dev/null +++ b/.agents/skills/write-frontend-unit-test/SKILL.md @@ -0,0 +1,175 @@ +--- +name: write-frontend-unit-test +description: Write or update Baserow frontend unit tests for core, premium, or enterprise code using the repo's existing Vitest, Nuxt, Vue Test Utils, TestApp, and snapshot patterns. +--- + +# Write Baserow Frontend Unit Tests + +Use this skill when a task is to add, fix, or extend a frontend unit test in `web-frontend`, `premium/web-frontend`, or `enterprise/web-frontend`. + +Do not invent a generic Vue testing style. This repo already has established patterns. Start by finding the closest existing spec and copy its setup shape. + +## First Step + +Before editing, identify the test target: + +1. Pure utility or parser function +2. Vuex store logic +3. Vue component mounted with the shared app context +4. Nuxt/Vue 3 component mounted directly with `mountSuspended` +5. Premium or enterprise variant of one of the above + +Then inspect the nearest existing spec in the same module area. + +Useful searches: + +- `rg --files web-frontend/test premium/web-frontend/test enterprise/web-frontend/test | rg '\.spec\.'` +- `rg -n "new TestApp\\(|new PremiumTestApp\\(|mountSuspended\\(" web-frontend/test premium/web-frontend/test enterprise/web-frontend/test` +- `rg -n "toMatchSnapshot\\(|vi\\.fn\\(|vi\\.spyOn\\(" web-frontend/test premium/web-frontend/test enterprise/web-frontend/test` + +## Tooling Used In This Repo + +Current frontend unit tests use: + +- `vitest` for `describe`, `test`, `expect`, `vi` +- `@vue/test-utils` +- `@nuxt/test-utils/runtime` with `mountSuspended` +- Repo helpers such as `TestApp`, `PremiumTestApp`, `MockServer`, and fixtures under `web-frontend/test` +- Snapshot assertions for rendered HTML when the component output matters + +Important local files: + +- `web-frontend/vitest.setup.ts` +- `web-frontend/test/helpers/testApp.js` +- `premium/web-frontend/test/helpers/premiumTestApp.js` + +`vitest.setup.ts` already mocks i18n, UUID generation, and `WebSocket`. Reuse that environment instead of re-mocking those globally in each spec. + +## Choose The Right Pattern + +### Pure utility tests + +For functions in `modules/*/utils/**`, keep the test simple: + +1. Import the function directly. +2. Use plain inputs and deterministic assertions. +3. Prefer `toStrictEqual`, `toBe`, or explicit formatted objects over snapshots. + +Good examples: + +- `web-frontend/test/unit/core/utils/date.spec.js` +- `web-frontend/test/unit/core/utils/string.spec.js` + +### Store tests + +For Vuex store behavior, prefer `TestApp` unless the existing spec clearly uses a temporary local store: + +1. Create `testApp = new TestApp()` in `beforeEach`. +2. Read `store = testApp.store`. +3. Seed state through store actions or the mock server. +4. Always `await testApp.afterEach()` in `afterEach`. + +Good examples: + +- `web-frontend/test/unit/core/store/auth.spec.js` +- `web-frontend/test/unit/builder/store/dataSource.spec.js` + +If the code lives in premium and needs premium-only auth/license behavior, use `PremiumTestApp`. + +### Shared app component tests + +For many components, especially older patterns or components coupled to store, router, registry, or client behavior: + +1. Create `testApp = new TestApp()` or `new PremiumTestApp()`. +2. Mount with `testApp.mount(Component, { props, propsData, slots, listeners, global })`. +3. Prefer the existing helper in the file, for example `mountComponent(...)`. +4. Clean up with `await testApp.afterEach()`. + +`TestApp.mount` supports both `props` and legacy `propsData`, and converts `listeners` into Vue 3 event props. Match the nearby spec instead of rewriting all setup. + +Good examples: + +- `web-frontend/test/unit/core/components/dropdown.spec.js` +- `premium/web-frontend/test/unit/premium/view/calendar/calendarView.spec.js` + +### Direct `mountSuspended` component tests + +For newer Nuxt/Vue 3 component tests that do not need the full helper wrapper: + +1. Use `const testApp = useNuxtApp()` in `beforeEach` if the component expects injected app/store context. +2. Mount with `mountSuspended(Component, { props, slots, global: { provide, stubs, mocks } })`. +3. Provide injected dependencies explicitly. + +Good examples: + +- `web-frontend/test/unit/builder/components/elements/components/HeadingElement.spec.js` + +## Assertions + +Prefer the narrowest assertion that proves behavior: + +- Use `toStrictEqual` or `toEqual` for transformed data and store state. +- Use `toBe` for scalar values. +- Use `vi.fn()` and `vi.spyOn()` for event handlers and method calls. +- Use snapshots for rendered markup where the repo already uses them. + +Do not default to snapshots for pure logic. + +When asserting reactive store objects, this repo sometimes normalizes them with: + +```js +JSON.parse(JSON.stringify(value)) +``` + +Use that only when the nearby test does it for Vue reactivity serialization issues. + +Don't assert internals, always assert visible result in the DOM. For instance don't +use + +```js +expect(wrapper.vm.values.use_instance_smtp_settings).toBe(false) # BAD +``` + +Don't directly use vm properties. + +## Mocking And Fixtures + +Prefer repo helpers over bespoke mocks: + +1. Use `testApp.mockServer` when the behavior depends on store-backed API calls. +2. Use fixtures under `web-frontend/test/fixtures` and premium or enterprise fixture folders when suitable. +3. Use `testApp.dontFailOnErrorResponses()` only when the test intentionally exercises failing responses. + +Do not build a large custom mock environment if `TestApp` already provides the needed app, client, registry, router, and store wiring. + +## File Placement + +Follow the existing test tree: + +- Core: `web-frontend/test/unit/...` +- Premium: `premium/web-frontend/test/unit/...` +- Enterprise: `enterprise/web-frontend/test/unit/...` + +Keep the spec near the feature area rather than creating a new generic test folder. + +## Validation + +Run the narrowest relevant test command first. + +Examples: + +- `just f yarn test:core --run test/unit/core/components/dropdown.spec.js` +- `just f yarn test:core --run test/unit/core/store/auth.spec.js` +- `just f yarn test:premium --run ../premium/web-frontend/test/unit/premium/view/calendar/calendarView.spec.js` +- `just f yarn test:enterprise --run ../enterprise/web-frontend/test/unit/enterprise/plugins.spec.js` + +If a snapshot changes intentionally, review the diff instead of blindly accepting it. + +## Guardrails + +- Do not introduce Jest APIs. Use Vitest APIs already present in the repo. +- Do not add a standalone mount helper when `TestApp` or `PremiumTestApp` already fits. +- Do not over-mock store, router, or client dependencies if the real test helpers can provide them. +- Do not mix unrelated styles in one file. Match the nearest local spec. +- Do not leave out `afterEach` cleanup when using `TestApp` or `PremiumTestApp`. +- Do not create broad integration-style tests when a focused unit test is enough. diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000000..0dbeaf9a2c --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,36 @@ +# Repository Guidelines + +## Project Structure & Module Organization + +Baserow is a monorepo. Core Django code lives in `backend/src`, shared backend tests in `backend/tests`, and the main Nuxt app in `web-frontend/` (`modules/`, `server/`, `test/`, `stories/`). Paid extensions mirror that layout in `premium/backend`, `premium/web-frontend`, `enterprise/backend`, and `enterprise/web-frontend`. End-to-end coverage lives in `e2e-tests/`. Product and contributor docs are in `docs/`, while deployment recipes are under `deploy/`. + +## Build, Test, and Development Commands + +Use `just` from the repo root; it wraps the backend and frontend workflows consistently for local and Docker setups. + +- `just init` installs dependencies and creates `.env.local`. +- `just dev up` starts the local stack; `just dc-dev up -d` runs the Docker dev environment. +- `just b test -n=auto` runs backend pytest suites in parallel. +- `just f test` runs frontend Vitest suites. +- `just lint` runs both backend and frontend linters; `just fix` applies auto-fixes. +- `just b migrate` runs Django migrations. + +For direct package-manager use, backend commands run through `uv` and frontend commands through `yarn`. + +## Coding Style & Naming Conventions + +Python targets Python 3.14, uses 4-space indentation, and is formatted and linted with Ruff (`ruff check`, `ruff format`) with an 88-character line length. Follow existing Django app/module naming and keep new tests in `test_*.py` or `*_test.py` files. Frontend code uses ESLint, Stylelint, and Prettier; SCSS should follow BEM-style naming already used in `web-frontend/modules`. + +## Testing Guidelines + +Backend tests use `pytest` with `pytest-django`; frontend tests use `vitest`; browser flows live in `e2e-tests/`. Add unit tests for backend changes and targeted frontend tests for component or store behavior. + +Examples: `just b test backend/tests/path/`, `just b test-coverage`, `just f test -- --coverage`, `just f yarn test:core path/to/test`. + +## Commit & Pull Request Guidelines + +Recent history favors short, imperative subjects, often with Conventional Commit prefixes such as `fix:`, `feat:`, and `chore(deps):`. Branch from `develop`, keep PRs focused, and link the related issue or discussion. Include a clear summary, note schema or env changes, attach screenshots for UI work, add a changelog entry when required, and make sure the relevant lint and test commands pass before opening the PR. + +## Security & Configuration Tips + +Do not commit secrets or local overrides. Use `.env.local` for development, keep production settings in the documented deploy configs, and report vulnerabilities privately via the contact path in `CONTRIBUTING.md` rather than opening a public issue. diff --git a/backend/src/baserow/api/admin/views.py b/backend/src/baserow/api/admin/views.py index 4ee8097977..002dc9a9b8 100755 --- a/backend/src/baserow/api/admin/views.py +++ b/backend/src/baserow/api/admin/views.py @@ -13,6 +13,7 @@ from baserow.api.exceptions import ( InvalidSortAttributeException, InvalidSortDirectionException, + QueryParameterValidationException, ) from baserow.api.mixins import ( FilterableViewMixin, @@ -22,6 +23,7 @@ from baserow.api.pagination import PageNumberPagination from baserow.api.schemas import get_error_schema from baserow.api.serializers import get_example_pagination_serializer_class +from baserow.core.utils import split_comma_separated_string class APIListingView( @@ -46,11 +48,13 @@ def get(self, request): search = request.GET.get("search") sorts = request.GET.get("sorts") + ids_param = request.GET.get("ids") queryset = self.get_queryset(request) queryset = self.apply_filters(request.GET, queryset) queryset = self.apply_search(search, queryset) queryset = self.apply_sorts_or_default_sort(sorts, queryset) + queryset = self.apply_ids_filter(ids_param, queryset) paginator = PageNumberPagination(limit_page_size=100) page = paginator.paginate_queryset(queryset, request, self) @@ -61,6 +65,30 @@ def get(self, request): def get_queryset(self, request): raise NotImplementedError("The get_queryset method must be set.") + def apply_ids_filter(self, ids_param, queryset): + if not ids_param: + return queryset + + record_ids = split_comma_separated_string(ids_param) + + invalid_id = next( + (record for record in record_ids if not record.isdigit()), None + ) + if invalid_id is not None: + raise QueryParameterValidationException( + { + "ids": [ + { + "code": "invalid", + "error": f"'{invalid_id}' is not a valid ID. Only positive " + f"integers are accepted.", + } + ] + } + ) + + return queryset.filter(id__in=[int(record_id) for record_id in record_ids]) + def get_serializer(self, request, *args, **kwargs): if not self.serializer_class: raise NotImplementedError( @@ -134,6 +162,13 @@ def get_extend_schema_parameters( type=OpenApiTypes.INT, description=f"Defines how many {name} should be returned per page.", ), + OpenApiParameter( + name="ids", + location=OpenApiParameter.QUERY, + type=OpenApiTypes.STR, + description=f"A comma-separated list of {name} IDs to filter by. " + f"When provided, only {name} with those IDs are returned.", + ), *(extra_parameters or []), ], "responses": { diff --git a/backend/src/baserow/api/admin/workspaces/serializers.py b/backend/src/baserow/api/admin/workspaces/serializers.py index 78bbe45816..fdbc2c64d6 100755 --- a/backend/src/baserow/api/admin/workspaces/serializers.py +++ b/backend/src/baserow/api/admin/workspaces/serializers.py @@ -53,3 +53,11 @@ class Meta: "free_users", "created_on", ) + + +class AdminWorkspaceOptionsSerializer(serializers.ModelSerializer): + value = serializers.CharField(source="name") + + class Meta: + model = Workspace + fields = ("id", "value") diff --git a/backend/src/baserow/api/admin/workspaces/urls.py b/backend/src/baserow/api/admin/workspaces/urls.py index 26247063b5..7802cce4fb 100755 --- a/backend/src/baserow/api/admin/workspaces/urls.py +++ b/backend/src/baserow/api/admin/workspaces/urls.py @@ -1,10 +1,15 @@ from django.urls import re_path -from baserow.api.admin.workspaces.views import WorkspaceAdminView, WorkspacesAdminView +from baserow.api.admin.workspaces.views import ( + WorkspaceAdminView, + WorkspaceOptionsAdminView, + WorkspacesAdminView, +) app_name = "baserow.api.admin.workspaces" urlpatterns = [ re_path(r"^$", WorkspacesAdminView.as_view(), name="list"), + re_path(r"^options/$", WorkspaceOptionsAdminView.as_view(), name="options"), re_path(r"^(?P[0-9]+)/$", WorkspaceAdminView.as_view(), name="edit"), ] diff --git a/backend/src/baserow/api/admin/workspaces/views.py b/backend/src/baserow/api/admin/workspaces/views.py index cad07b3de4..118bdc1728 100644 --- a/backend/src/baserow/api/admin/workspaces/views.py +++ b/backend/src/baserow/api/admin/workspaces/views.py @@ -7,7 +7,7 @@ from rest_framework.response import Response from rest_framework.views import APIView -from baserow.api.admin.views import AdminListingView +from baserow.api.admin.views import AdminListingView, APIListingView from baserow.api.decorators import map_exceptions from baserow.api.errors import ERROR_GROUP_DOES_NOT_EXIST from baserow.api.schemas import get_error_schema @@ -19,7 +19,10 @@ from baserow.core.usage.handler import UsageHandler from .errors import ERROR_CANNOT_DELETE_A_TEMPLATE_GROUP -from .serializers import WorkspacesAdminResponseSerializer +from .serializers import ( + AdminWorkspaceOptionsSerializer, + WorkspacesAdminResponseSerializer, +) class WorkspacesAdminView(AdminListingView): @@ -62,6 +65,30 @@ def get(self, request): return super().get(request) +class WorkspaceOptionsAdminView(APIListingView): + permission_classes = (IsAdminUser,) + serializer_class = AdminWorkspaceOptionsSerializer + search_fields = ["name"] + default_order_by = "name" + + def get_queryset(self, request): + return Workspace.objects.filter(template__isnull=True) + + @extend_schema( + tags=["Admin"], + operation_id="admin_list_workspaces_as_options", + description=( + "Lists all workspaces. This endpoint is intended for admin-level " + "features that need a workspace dropdown." + ), + **APIListingView.get_extend_schema_parameters( + "workspaces", serializer_class, search_fields, {} + ), + ) + def get(self, request): + return super().get(request) + + class WorkspaceAdminView(APIView): permission_classes = (IsAdminUser,) diff --git a/backend/src/baserow/config/settings/base.py b/backend/src/baserow/config/settings/base.py index 6796069e99..cf55b38195 100644 --- a/backend/src/baserow/config/settings/base.py +++ b/backend/src/baserow/config/settings/base.py @@ -816,6 +816,9 @@ def __setitem__(self, key, value): INTEGRATION_LOCAL_BASEROW_PAGE_SIZE_LIMIT = int( os.getenv("BASEROW_INTEGRATION_LOCAL_BASEROW_PAGE_SIZE_LIMIT", 200) ) +INTEGRATION_ALLOW_SMTP_SERVICE_TO_USE_INSTANCE_SETTINGS = str_to_bool( + os.getenv("BASEROW_INTEGRATION_ALLOW_SMTP_SERVICE_TO_USE_INSTANCE_SETTINGS", "true") +) AUTOMATION_HISTORY_PAGE_SIZE_LIMIT = int( os.getenv("BASEROW_AUTOMATION_HISTORY_PAGE_SIZE_LIMIT", 100) diff --git a/backend/src/baserow/contrib/automation/nodes/registries.py b/backend/src/baserow/contrib/automation/nodes/registries.py index 0800daa126..75152efe6b 100644 --- a/backend/src/baserow/contrib/automation/nodes/registries.py +++ b/backend/src/baserow/contrib/automation/nodes/registries.py @@ -264,7 +264,7 @@ def prepare_values( # If we received any service values, prepare them. service_values = values.pop("service", None) or {} prepared_service_values = service_type.prepare_values( - service_values, user, service + service_values, user, service if instance else None ) # Update the service instance with any prepared service values. diff --git a/backend/src/baserow/contrib/builder/workflow_actions/workflow_action_types.py b/backend/src/baserow/contrib/builder/workflow_actions/workflow_action_types.py index 6320f77dd5..ba0770e8b4 100644 --- a/backend/src/baserow/contrib/builder/workflow_actions/workflow_action_types.py +++ b/backend/src/baserow/contrib/builder/workflow_actions/workflow_action_types.py @@ -381,7 +381,7 @@ def prepare_values( # If we received any service values, prepare them. service_values = values.pop("service", None) or {} prepared_service_values = service_type.prepare_values( - service_values, user, service + service_values, user, service if instance else None ) # Update the service instance with any prepared service values. diff --git a/backend/src/baserow/contrib/database/locale/en/LC_MESSAGES/django.po b/backend/src/baserow/contrib/database/locale/en/LC_MESSAGES/django.po index 6ee1964a6f..418277a125 100644 --- a/backend/src/baserow/contrib/database/locale/en/LC_MESSAGES/django.po +++ b/backend/src/baserow/contrib/database/locale/en/LC_MESSAGES/django.po @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: PACKAGE VERSION\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2026-03-16 14:50+0000\n" +"POT-Creation-Date: 2026-03-16 21:52+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" diff --git a/backend/src/baserow/contrib/integrations/core/models.py b/backend/src/baserow/contrib/integrations/core/models.py index ff5568fab5..1f8f0bb204 100644 --- a/backend/src/baserow/contrib/integrations/core/models.py +++ b/backend/src/baserow/contrib/integrations/core/models.py @@ -132,6 +132,11 @@ class CoreSMTPEmailService(Service): A service for sending emails via SMTP. """ + use_instance_smtp_settings = models.BooleanField( + default=False, + db_default=False, + help_text="Whether to use the instance-level Django SMTP configuration.", + ) from_email = FormulaField( help_text="The sender's email address.", ) @@ -166,6 +171,10 @@ class CoreSMTPEmailService(Service): help_text="The email body content.", ) + @property + def instance_smtp_settings_enabled(self) -> bool: + return self.get_type()._instance_smtp_is_available() + class CoreRouterService(Service): default_edge_label = models.CharField( diff --git a/backend/src/baserow/contrib/integrations/core/service_types.py b/backend/src/baserow/contrib/integrations/core/service_types.py index 1bc3ed452e..566f9251fd 100644 --- a/backend/src/baserow/contrib/integrations/core/service_types.py +++ b/backend/src/baserow/contrib/integrations/core/service_types.py @@ -621,6 +621,7 @@ class CoreSMTPEmailServiceType(CoreServiceType): allowed_fields = [ "integration_id", + "use_instance_smtp_settings", "from_email", "from_name", "to_emails", @@ -633,6 +634,8 @@ class CoreSMTPEmailServiceType(CoreServiceType): serializer_field_names = [ "integration_id", + "use_instance_smtp_settings", + "instance_smtp_settings_enabled", "from_email", "from_name", "to_emails", @@ -644,6 +647,7 @@ class CoreSMTPEmailServiceType(CoreServiceType): ] class SerializedDict(ServiceDict): + use_instance_smtp_settings: bool from_email: str from_name: str to_emails: str @@ -652,7 +656,6 @@ class SerializedDict(ServiceDict): subject: str body_type: str body: str - body: str simple_formula_fields = [ "from_email", @@ -669,11 +672,21 @@ def serializer_field_overrides(self): from baserow.core.formula.serializers import FormulaSerializerField return { + "use_instance_smtp_settings": serializers.BooleanField( + required=False, + default=self._instance_smtp_is_available(), + help_text=CoreSMTPEmailService._meta.get_field( + "use_instance_smtp_settings" + ).help_text, + ), "integration_id": serializers.IntegerField( required=False, allow_null=True, help_text="The id of the SMTP integration.", ), + "instance_smtp_settings_enabled": serializers.ReadOnlyField( + help_text="Whether the instance SMTP configuration can be used and should be the default option in the UI.", + ), "from_email": FormulaSerializerField( help_text=CoreSMTPEmailService._meta.get_field("from_email").help_text, ), @@ -706,6 +719,41 @@ def serializer_field_overrides(self): ), } + def _instance_smtp_is_available(self) -> bool: + return bool( + settings.INTEGRATION_ALLOW_SMTP_SERVICE_TO_USE_INSTANCE_SETTINGS + and getattr(settings, "CELERY_EMAIL_BACKEND", None) + == "django.core.mail.backends.smtp.EmailBackend" + and getattr(settings, "EMAIL_HOST", "") + ) + + def _should_use_instance_smtp(self, service: CoreSMTPEmailService) -> bool: + return bool( + service.use_instance_smtp_settings and self._instance_smtp_is_available() + ) + + def requires_integration(self, service: CoreSMTPEmailService) -> bool: + return not self._should_use_instance_smtp(service) + + def prepare_values(self, values, user: AbstractUser, instance=None): + values = super().prepare_values(values, user, instance) + + use_instance_smtp_settings = ( + values.get( + "use_instance_smtp_settings", + instance.use_instance_smtp_settings if instance else True, + ) + if self._instance_smtp_is_available() + else False + ) + + if use_instance_smtp_settings: + values["integration"] = None + + values["use_instance_smtp_settings"] = use_instance_smtp_settings + + return values + def get_schema_name(self, service: CoreSMTPEmailService) -> str: return f"SMTPEmail{service.id}Schema" @@ -734,15 +782,13 @@ def generate_schema( } def formulas_to_resolve( - self, service: CoreHTTPRequestService + self, service: CoreSMTPEmailService ) -> list[FormulaToResolve]: """ Returns the formula to resolve for this service. """ ensurers = { - "from_email": ensure_email, - "from_name": ensure_string, "to_emails": lambda v: [ensure_email(e) for e in ensure_array(v)], "cc_emails": lambda v: [ensure_email(e) for e in ensure_array(v)], "bcc_emails": lambda v: [ensure_email(e) for e in ensure_array(v)], @@ -750,6 +796,13 @@ def formulas_to_resolve( "body": ensure_string, } + if not self._should_use_instance_smtp(service): + ensurers = { + "from_email": ensure_email, + "from_name": ensure_string, + **ensurers, + } + formulas = [] for key, ensurer in ensurers.items(): @@ -772,31 +825,48 @@ def dispatch_data( "At least one recipient email is required" ) - smtp_integration = service.integration.specific - to_emails = resolved_values["to_emails"] cc_emails = resolved_values["cc_emails"] bcc_emails = resolved_values["bcc_emails"] - from_email = ( - f"{resolved_values['from_name']} <{resolved_values['from_email']}>" - if resolved_values["from_name"] - else resolved_values["from_email"] - ) + using_instance_smtp = self._should_use_instance_smtp(service) + + if using_instance_smtp: + from_email = settings.DEFAULT_FROM_EMAIL + connection = get_connection( + backend=settings.CELERY_EMAIL_BACKEND, + ) + smtp_host = settings.EMAIL_HOST + smtp_port = settings.EMAIL_PORT + else: + if not service.integration_id: + # This situation can happen if we have changed the + # configuration variable in the meantime. + raise ServiceImproperlyConfiguredDispatchException( + "Integration for this service is missing" + ) + + smtp_integration = service.integration.specific + from_email = ( + f"{resolved_values['from_name']} <{resolved_values['from_email']}>" + if resolved_values["from_name"] + else resolved_values["from_email"] + ) + connection = get_connection( + backend=settings.CELERY_EMAIL_BACKEND, + host=smtp_integration.host, + port=smtp_integration.port, + username=smtp_integration.username, + password=smtp_integration.password, + use_tls=smtp_integration.use_tls, + ) + smtp_host = smtp_integration.host + smtp_port = smtp_integration.port subject = resolved_values["subject"] body_content = resolved_values["body"] - connection = get_connection( - backend="django.core.mail.backends.smtp.EmailBackend", - host=smtp_integration.host, - port=smtp_integration.port, - username=smtp_integration.username, - password=smtp_integration.password, - use_tls=smtp_integration.use_tls, - ) - email = EmailMultiAlternatives( subject, body_content, @@ -822,12 +892,11 @@ def dispatch_data( ) from e except socket.gaierror as e: raise ServiceImproperlyConfiguredDispatchException( - f"The host {smtp_integration.host}:{smtp_integration.port} could not " - "be reached" + f"The host {smtp_host}:{smtp_port} could not be reached" ) from e except ConnectionRefusedError as e: raise ServiceImproperlyConfiguredDispatchException( - f"Connection refused by {smtp_integration.host}:{smtp_integration.port}" + f"Connection refused by {smtp_host}:{smtp_port}" ) from e except SMTPAuthenticationError as e: raise ServiceImproperlyConfiguredDispatchException( @@ -848,9 +917,13 @@ def dispatch_transform( def export_prepared_values(self, instance: Service) -> dict[str, Any]: values = super().export_prepared_values(instance) + + values["integration_id"] = None + if values.get("integration"): del values["integration"] values["integration_id"] = instance.integration_id + return values diff --git a/backend/src/baserow/contrib/integrations/migrations/0027_coresmtpemailservice_use_instance_smtp_settings.py b/backend/src/baserow/contrib/integrations/migrations/0027_coresmtpemailservice_use_instance_smtp_settings.py new file mode 100644 index 0000000000..8311e13df6 --- /dev/null +++ b/backend/src/baserow/contrib/integrations/migrations/0027_coresmtpemailservice_use_instance_smtp_settings.py @@ -0,0 +1,19 @@ +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("integrations", "0026_backfill_coreperiodicservice_next_run_at"), + ] + + operations = [ + migrations.AddField( + model_name="coresmtpemailservice", + name="use_instance_smtp_settings", + field=models.BooleanField( + default=False, + db_default=False, + help_text="Whether to use the instance-level Django SMTP configuration.", + ), + ), + ] diff --git a/backend/src/baserow/core/locale/en/LC_MESSAGES/django.po b/backend/src/baserow/core/locale/en/LC_MESSAGES/django.po index 1e92428abd..7f4df3cba3 100644 --- a/backend/src/baserow/core/locale/en/LC_MESSAGES/django.po +++ b/backend/src/baserow/core/locale/en/LC_MESSAGES/django.po @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: PACKAGE VERSION\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2026-03-16 14:50+0000\n" +"POT-Creation-Date: 2026-03-16 21:52+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" diff --git a/backend/src/baserow/core/services/handler.py b/backend/src/baserow/core/services/handler.py index 0abad082ac..47fff8a85b 100644 --- a/backend/src/baserow/core/services/handler.py +++ b/backend/src/baserow/core/services/handler.py @@ -220,9 +220,8 @@ def dispatch_service( :return: The result of dispatching the service. """ - if ( - service.integration_id is None - and service.get_type().integration_type is not None + if service.integration_id is None and service.get_type().requires_integration( + service ): raise ServiceImproperlyConfiguredDispatchException( "No integration selected" diff --git a/backend/src/baserow/core/services/registries.py b/backend/src/baserow/core/services/registries.py index ca689f9eae..419e8e4c61 100644 --- a/backend/src/baserow/core/services/registries.py +++ b/backend/src/baserow/core/services/registries.py @@ -248,6 +248,9 @@ def get_context_data_schema(self, service: ServiceSubClass): return None + def requires_integration(self, service: ServiceSubClass) -> bool: + return self.integration_type is not None + def formulas_to_resolve(self, service: ServiceSubClass) -> list[FormulaToResolve]: return [] diff --git a/backend/src/baserow/locale/en/LC_MESSAGES/django.po b/backend/src/baserow/locale/en/LC_MESSAGES/django.po index f185ecf311..bf0c3be665 100755 --- a/backend/src/baserow/locale/en/LC_MESSAGES/django.po +++ b/backend/src/baserow/locale/en/LC_MESSAGES/django.po @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: PACKAGE VERSION\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2026-03-16 14:50+0000\n" +"POT-Creation-Date: 2026-03-16 21:52+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" @@ -205,20 +205,24 @@ msgstr "" msgid "Widget \"%(widget_title)s\" (%(widget_id)s) deleted" msgstr "" -#: src/baserow/contrib/integrations/core/service_types.py:1067 +#: src/baserow/contrib/integrations/core/service_types.py:1062 msgid "Branch taken" msgstr "" -#: src/baserow/contrib/integrations/core/service_types.py:1072 +#: src/baserow/contrib/integrations/core/service_types.py:1067 msgid "Label" msgstr "" -#: src/baserow/contrib/integrations/core/service_types.py:1074 +#: src/baserow/contrib/integrations/core/service_types.py:1069 msgid "The label of the branch that matched the condition." msgstr "" -#: src/baserow/contrib/integrations/core/service_types.py:1418 -msgid "Triggered at" +#: src/baserow/contrib/integrations/core/service_types.py:1438 +msgid "Previous scheduled run" +msgstr "" + +#: src/baserow/contrib/integrations/core/service_types.py:1442 +msgid "Next scheduled run" msgstr "" #: src/baserow/contrib/integrations/local_baserow/service_types.py:1688 diff --git a/backend/tests/baserow/api/admin/groups/test_workspaces_admin_views.py b/backend/tests/baserow/api/admin/groups/test_workspaces_admin_views.py index f077b400a0..5fd5646a58 100644 --- a/backend/tests/baserow/api/admin/groups/test_workspaces_admin_views.py +++ b/backend/tests/baserow/api/admin/groups/test_workspaces_admin_views.py @@ -219,3 +219,159 @@ def test_cant_delete_template_workspace(api_client, data_fixture): assert response.status_code == HTTP_400_BAD_REQUEST assert response.json()["error"] == "ERROR_CANNOT_DELETE_A_TEMPLATE_GROUP" assert Workspace.objects.all().count() == 1 + + +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_non_admin_list_workspaces_as_options(api_client, data_fixture): + ( + admin_user, + admin_token, + ) = data_fixture.create_user_and_token() + + # no search query should return all workspaces + response = api_client.get( + reverse("api:admin:workspaces:options"), + format="json", + HTTP_AUTHORIZATION=f"JWT {admin_token}", + ) + assert response.status_code == HTTP_403_FORBIDDEN + + +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_admin_list_workspaces_as_options(api_client, data_fixture): + ( + admin_user, + admin_token, + ) = data_fixture.create_user_and_token(is_staff=True) + workspace_1 = data_fixture.create_workspace(name="workspace 1", user=admin_user) + workspace_2 = data_fixture.create_workspace(name="workspace 2", user=admin_user) + + # no search query should return all workspaces + response = api_client.get( + reverse("api:admin:workspaces:options"), + format="json", + HTTP_AUTHORIZATION=f"JWT {admin_token}", + ) + assert response.status_code == HTTP_200_OK + assert response.json() == { + "count": 2, + "next": None, + "previous": None, + "results": [ + {"id": workspace_1.id, "value": workspace_1.name}, + {"id": workspace_2.id, "value": workspace_2.name}, + ], + } + + # searching by name should return only the correct workspace + response = api_client.get( + reverse("api:admin:workspaces:options") + "?search=1", + format="json", + HTTP_AUTHORIZATION=f"JWT {admin_token}", + ) + assert response.status_code == HTTP_200_OK + assert response.json() == { + "count": 1, + "next": None, + "previous": None, + "results": [{"id": workspace_1.id, "value": workspace_1.name}], + } + + +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_admin_list_workspaces_as_options_filter_by_ids(api_client, data_fixture): + ( + admin_user, + admin_token, + ) = data_fixture.create_user_and_token(is_staff=True) + workspace_1 = data_fixture.create_workspace(name="workspace 1", user=admin_user) + workspace_2 = data_fixture.create_workspace(name="workspace 2", user=admin_user) + data_fixture.create_workspace(name="workspace 3", user=admin_user) + + # filtering by a single id should return only that workspace + response = api_client.get( + reverse("api:admin:workspaces:options") + f"?ids={workspace_1.id}", + format="json", + HTTP_AUTHORIZATION=f"JWT {admin_token}", + ) + assert response.status_code == HTTP_200_OK + assert response.json() == { + "count": 1, + "next": None, + "previous": None, + "results": [{"id": workspace_1.id, "value": workspace_1.name}], + } + + # filtering by multiple ids should return all matching workspaces + response = api_client.get( + reverse("api:admin:workspaces:options") + + f"?ids={workspace_1.id},{workspace_2.id}", + format="json", + HTTP_AUTHORIZATION=f"JWT {admin_token}", + ) + assert response.status_code == HTTP_200_OK + assert response.json() == { + "count": 2, + "next": None, + "previous": None, + "results": [ + {"id": workspace_1.id, "value": workspace_1.name}, + {"id": workspace_2.id, "value": workspace_2.name}, + ], + } + + +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_admin_list_workspaces_as_options_filter_by_invalid_ids( + api_client, data_fixture +): + _, admin_token = data_fixture.create_user_and_token(is_staff=True) + + # Negative IDs should be rejected. + response = api_client.get( + reverse("api:admin:workspaces:options") + "?ids=-1", + format="json", + HTTP_AUTHORIZATION=f"JWT {admin_token}", + ) + assert response.status_code == HTTP_400_BAD_REQUEST + assert response.json()["error"] == "ERROR_QUERY_PARAMETER_VALIDATION" + assert response.json()["detail"]["ids"] == [ + { + "code": "invalid", + "error": "'-1' is not a valid ID. Only positive integers are accepted.", + } + ] + + # Non-numeric values should be rejected. + response = api_client.get( + reverse("api:admin:workspaces:options") + "?ids=abc", + format="json", + HTTP_AUTHORIZATION=f"JWT {admin_token}", + ) + assert response.status_code == HTTP_400_BAD_REQUEST + assert response.json()["error"] == "ERROR_QUERY_PARAMETER_VALIDATION" + assert response.json()["detail"]["ids"] == [ + { + "code": "invalid", + "error": "'abc' is not a valid ID. Only positive integers are accepted.", + } + ] + + # A mix of valid and invalid values should still be rejected. + response = api_client.get( + reverse("api:admin:workspaces:options") + "?ids=1,-2,3", + format="json", + HTTP_AUTHORIZATION=f"JWT {admin_token}", + ) + assert response.status_code == HTTP_400_BAD_REQUEST + assert response.json()["error"] == "ERROR_QUERY_PARAMETER_VALIDATION" + assert response.json()["detail"]["ids"] == [ + { + "code": "invalid", + "error": "'-2' is not a valid ID. Only positive integers are accepted.", + } + ] diff --git a/backend/tests/baserow/contrib/integrations/core/test_smtp_email_service_type.py b/backend/tests/baserow/contrib/integrations/core/test_smtp_email_service_type.py index ceb7658372..9d521ae144 100644 --- a/backend/tests/baserow/contrib/integrations/core/test_smtp_email_service_type.py +++ b/backend/tests/baserow/contrib/integrations/core/test_smtp_email_service_type.py @@ -4,6 +4,8 @@ from contextlib import contextmanager from unittest.mock import MagicMock, patch +from django.test import override_settings + import pytest from baserow.contrib.integrations.core.service_types import CoreSMTPEmailServiceType @@ -55,6 +57,9 @@ def mock_django_email( @pytest.mark.django_db +@override_settings( + CELERY_EMAIL_BACKEND="django.core.mail.backends.smtp.EmailBackend", +) def test_send_smtp_email_basic(data_fixture): smtp_integration = data_fixture.create_smtp_integration( host="smtp.example.com", @@ -100,6 +105,46 @@ def test_send_smtp_email_basic(data_fixture): assert result.data == {"success": True} +@pytest.mark.django_db +@override_settings( + INTEGRATION_ALLOW_SMTP_SERVICE_TO_USE_INSTANCE_SETTINGS=True, + CELERY_EMAIL_BACKEND="django.core.mail.backends.smtp.EmailBackend", + EMAIL_HOST="instance.smtp.example.com", + EMAIL_PORT=2525, + DEFAULT_FROM_EMAIL="instance@example.com", +) +def test_send_smtp_email_uses_instance_smtp_settings(data_fixture): + service = data_fixture.create_core_smtp_email_service( + integration=None, + use_instance_smtp_settings=True, + from_email="''", + from_name="''", + to_emails="'recipient@example.com'", + subject="'Test Subject'", + body="'Hello, this is a test email!'", + body_type="plain", + ) + + service_type = service.get_type() + dispatch_context = FakeDispatchContext() + + with mock_django_email() as (mock_email, mock_connection): + result = service_type.dispatch(service, dispatch_context) + mock_connection.assert_called_once_with( + backend="django.core.mail.backends.smtp.EmailBackend", + ) + mock_email.assert_called_once_with( + "Test Subject", + "Hello, this is a test email!", + "instance@example.com", + ["recipient@example.com"], + bcc=[], + cc=[], + connection=mock_connection.return_value, + ) + assert result.data == {"success": True} + + @pytest.mark.django_db def test_send_smtp_email_multiple_to_cc_and_bcc(data_fixture): smtp_integration = data_fixture.create_smtp_integration( @@ -338,6 +383,26 @@ def test_send_smtp_email_no_recipients_error(data_fixture): assert str(exc_info.value) == "At least one recipient email is required" +@pytest.mark.django_db +def test_send_smtp_email_missing_integration_error(data_fixture): + service = data_fixture.create_core_smtp_email_service( + integration=None, + use_instance_smtp_settings=False, + from_email="'sender@example.com'", + to_emails="'recipient@example.com'", + subject="'Test Subject'", + body="'Test body'", + ) + + service_type = service.get_type() + dispatch_context = FakeDispatchContext() + + with pytest.raises(ServiceImproperlyConfiguredDispatchException) as exc_info: + service_type.dispatch(service, dispatch_context) + + assert str(exc_info.value) == "Integration for this service is missing" + + @pytest.mark.django_db def test_smtp_email_service_generate_schema(data_fixture): smtp_integration = data_fixture.create_smtp_integration() @@ -366,6 +431,18 @@ def test_smtp_email_service_generate_schema(data_fixture): } +@pytest.mark.django_db +@override_settings( + INTEGRATION_ALLOW_SMTP_SERVICE_TO_USE_INSTANCE_SETTINGS=True, + CELERY_EMAIL_BACKEND="django.core.mail.backends.smtp.EmailBackend", + EMAIL_HOST="instance.smtp.example.com", +) +def test_smtp_email_service_exposes_instance_smtp_enabled_flag(data_fixture): + service = data_fixture.create_core_smtp_email_service() + + assert service.instance_smtp_settings_enabled is True + + @pytest.mark.django_db def test_serialized_export_import(data_fixture): smtp_integration = data_fixture.create_smtp_integration( @@ -378,6 +455,7 @@ def test_serialized_export_import(data_fixture): service = data_fixture.create_core_smtp_email_service( integration=smtp_integration, + use_instance_smtp_settings=False, from_email="'sender@example.com'", from_name="'Test Sender'", to_emails="'recipient@example.com'", @@ -395,6 +473,7 @@ def test_serialized_export_import(data_fixture): expected_serialized = { "id": AnyInt(), "integration_id": smtp_integration.id, + "use_instance_smtp_settings": False, "sample_data": None, "type": "smtp_email", "from_email": { @@ -437,6 +516,37 @@ def test_serialized_export_import(data_fixture): assert new_service.subject["formula"] == "'Test Subject'" assert new_service.body_type == "html" assert new_service.body["formula"] == "'Test body'" + assert new_service.use_instance_smtp_settings is False + + +@pytest.mark.django_db +@override_settings( + INTEGRATION_ALLOW_SMTP_SERVICE_TO_USE_INSTANCE_SETTINGS=True, + CELERY_EMAIL_BACKEND="django.core.mail.backends.smtp.EmailBackend", + EMAIL_HOST="instance.smtp.example.com", +) +def test_serialized_export_import_with_instance_smtp(data_fixture): + service = data_fixture.create_core_smtp_email_service( + integration=None, + use_instance_smtp_settings=True, + from_email="''", + from_name="''", + to_emails="'recipient@example.com'", + subject="'Test Subject'", + body="'Test body'", + body_type="html", + ) + + service_type = service.get_type() + serialized = json.loads(json.dumps(service_type.export_serialized(service))) + + assert serialized["integration_id"] is None + assert serialized["use_instance_smtp_settings"] is True + + new_service = service_type.import_serialized(None, serialized, {}, lambda x, d: x) + + assert new_service.integration_id is None + assert new_service.use_instance_smtp_settings is True @pytest.mark.django_db @@ -446,6 +556,7 @@ def test_smtp_email_service_create_update(data_fixture): service = ServiceHandler().create_service( CoreSMTPEmailServiceType(), integration_id=smtp_integration.id, + use_instance_smtp_settings=False, from_email="'sender@example.com'", from_name="'Test Sender'", to_emails="'recipient@example.com'", @@ -481,6 +592,7 @@ def test_smtp_email_service_create_update(data_fixture): } assert service.body_type == "plain" assert service.integration_id == smtp_integration.id + assert service.use_instance_smtp_settings is False service_type = service.get_type() ServiceHandler().update_service( @@ -497,3 +609,73 @@ def test_smtp_email_service_create_update(data_fixture): assert service.subject["formula"] == "'Updated Subject'" assert service.body["formula"] == "'Test body'" assert service.body_type == "html" + + +@pytest.mark.django_db +@override_settings( + INTEGRATION_ALLOW_SMTP_SERVICE_TO_USE_INSTANCE_SETTINGS=True, + CELERY_EMAIL_BACKEND="django.core.mail.backends.smtp.EmailBackend", + EMAIL_HOST="instance.smtp.example.com", +) +def test_smtp_email_service_create_update_with_instance_smtp(data_fixture): + service_type = CoreSMTPEmailServiceType() + + prepared_values = service_type.prepare_values( + { + "use_instance_smtp_settings": True, + "integration_id": None, + "to_emails": "'recipient@example.com'", + "subject": "'Test Subject'", + "body": "'Test body'", + "from_email": "''", + "from_name": "''", + }, + data_fixture.create_user(), + ) + + service = ServiceHandler().create_service(service_type, **prepared_values) + + assert service.integration_id is None + assert service.use_instance_smtp_settings is True + + smtp_integration = data_fixture.create_smtp_integration() + prepared_updates = service_type.prepare_values( + { + "use_instance_smtp_settings": False, + "integration_id": smtp_integration.id, + "from_email": "'sender@example.com'", + }, + data_fixture.create_user(), + service, + ) + + ServiceHandler().update_service(service_type, service, **prepared_updates) + service.refresh_from_db() + + assert service.integration_id == smtp_integration.id + assert service.use_instance_smtp_settings is False + assert service.from_email["formula"] == "'sender@example.com'" + + +@pytest.mark.django_db +@override_settings( + INTEGRATION_ALLOW_SMTP_SERVICE_TO_USE_INSTANCE_SETTINGS=False, + CELERY_EMAIL_BACKEND="django.core.mail.backends.smtp.EmailBackend", + EMAIL_HOST="instance.smtp.example.com", +) +def test_smtp_email_service_prepare_values_disables_instance_smtp_when_unavailable( + data_fixture, +): + service_type = CoreSMTPEmailServiceType() + service = data_fixture.create_core_smtp_email_service( + integration=None, + use_instance_smtp_settings=True, + ) + + prepared_values = service_type.prepare_values( + {}, + data_fixture.create_user(), + service, + ) + + assert prepared_values["use_instance_smtp_settings"] is False diff --git a/backend/uv.lock b/backend/uv.lock index 6715e97cc4..ee367b6e29 100644 --- a/backend/uv.lock +++ b/backend/uv.lock @@ -534,16 +534,16 @@ wheels = [ [[package]] name = "cbor2" -version = "5.8.0" +version = "5.9.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d9/8e/8b4fdde28e42ffcd741a37f4ffa9fb59cd4fe01625b544dfcfd9ccb54f01/cbor2-5.8.0.tar.gz", hash = "sha256:b19c35fcae9688ac01ef75bad5db27300c2537eb4ee00ed07e05d8456a0d4931", size = 107825, upload-time = "2025-12-30T18:44:22.455Z" } +sdist = { url = "https://files.pythonhosted.org/packages/bd/cb/09939728be094d155b5d4ac262e39877875f5f7e36eea66beb359f647bd0/cbor2-5.9.0.tar.gz", hash = "sha256:85c7a46279ac8f226e1059275221e6b3d0e370d2bb6bd0500f9780781615bcea", size = 111231, upload-time = "2026-03-22T15:56:50.638Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/4b/0c/0654233d7543ac8a50f4785f172430ddc97538ba418eb305d6e529d1a120/cbor2-5.8.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:ad72381477133046ce217617d839ea4e9454f8b77d9a6351b229e214102daeb7", size = 70710, upload-time = "2025-12-30T18:44:03.209Z" }, - { url = "https://files.pythonhosted.org/packages/84/62/4671d24e557d7f5a74a01b422c538925140c0495e57decde7e566f91d029/cbor2-5.8.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6da25190fad3434ce99876b11d4ca6b8828df6ca232cf7344cd14ae1166fb718", size = 285005, upload-time = "2025-12-30T18:44:05.109Z" }, - { url = "https://files.pythonhosted.org/packages/87/85/0c67d763a08e848c9a80d7e4723ba497cce676f41bc7ca1828ae90a0a872/cbor2-5.8.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c13919e3a24c5a6d286551fa288848a4cedc3e507c58a722ccd134e461217d99", size = 282435, upload-time = "2025-12-30T18:44:06.465Z" }, - { url = "https://files.pythonhosted.org/packages/b2/01/0650972b4dbfbebcfbe37cbba7fc3cd9019a8da6397ab3446e07175e342b/cbor2-5.8.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:f8c40d32e5972047a777f9bf730870828f3cf1c43b3eb96fd0429c57a1d3b9e6", size = 277493, upload-time = "2025-12-30T18:44:07.609Z" }, - { url = "https://files.pythonhosted.org/packages/b3/6c/7704a4f32adc7f10f3b41ec067f500a4458f7606397af5e4cf2d368fd288/cbor2-5.8.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:7627894bc0b3d5d0807f31e3107e11b996205470c4429dc2bb4ef8bfe7f64e1e", size = 276085, upload-time = "2025-12-30T18:44:09.021Z" }, - { url = "https://files.pythonhosted.org/packages/d6/4f/101071f880b4da05771128c0b89f41e334cff044dee05fb013c8f4be661c/cbor2-5.8.0-py3-none-any.whl", hash = "sha256:3727d80f539567b03a7aa11890e57798c67092c38df9e6c23abb059e0f65069c", size = 24374, upload-time = "2025-12-30T18:44:21.476Z" }, + { url = "https://files.pythonhosted.org/packages/08/7d/9ccc36d10ef96e6038e48046ebe1ce35a1e7814da0e1e204d09e6ef09b8d/cbor2-5.9.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:23606d31ba1368bd1b6602e3020ee88fe9523ca80e8630faf6b2fc904fd84560", size = 71500, upload-time = "2026-03-22T15:56:31.876Z" }, + { url = "https://files.pythonhosted.org/packages/70/e1/a6cca2cc72e13f00030c6a649f57ae703eb2c620806ab70c40db8eab33fa/cbor2-5.9.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0322296b9d52f55880e300ba8ba09ecf644303b99b51138bbb1c0fb644fa7c3e", size = 286953, upload-time = "2026-03-22T15:56:33.292Z" }, + { url = "https://files.pythonhosted.org/packages/08/3c/24cd5ef488a957d90e016f200a3aad820e4c2f85edd61c9fe4523007a1ee/cbor2-5.9.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:422817286c1d0ce947fb2f7eca9212b39bddd7231e8b452e2d2cc52f15332dba", size = 285454, upload-time = "2026-03-22T15:56:34.703Z" }, + { url = "https://files.pythonhosted.org/packages/a4/35/dca96818494c0ba47cdd73e8d809b27fa91f8fa0ce32a068a09237687454/cbor2-5.9.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:9a4907e0c3035bb8836116854ed8e56d8aef23909d601fa59706320897ec2551", size = 279441, upload-time = "2026-03-22T15:56:35.888Z" }, + { url = "https://files.pythonhosted.org/packages/a4/44/d3362378b16e53cf7e535a3f5aed8476e2109068154e24e31981ef5bde9e/cbor2-5.9.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:fb7afe77f8d269e42d7c4b515c6fd14f1ccc0625379fb6829b269f493d16eddd", size = 279673, upload-time = "2026-03-22T15:56:37.08Z" }, + { url = "https://files.pythonhosted.org/packages/42/ff/b83492b096fbef26e9cb62c1a4bf2d3cef579ea7b33138c6c37c4ae66f67/cbor2-5.9.0-py3-none-any.whl", hash = "sha256:27695cbd70c90b8de5c4a284642c2836449b14e2c2e07e3ffe0744cb7669a01b", size = 24627, upload-time = "2026-03-22T15:56:48.847Z" }, ] [[package]] diff --git a/changelog/entries/unreleased/breaking_change/4999_instance_smtp_configuration_can_be_used_to_send_emails_with_.json b/changelog/entries/unreleased/breaking_change/4999_instance_smtp_configuration_can_be_used_to_send_emails_with_.json new file mode 100644 index 0000000000..0ead27adac --- /dev/null +++ b/changelog/entries/unreleased/breaking_change/4999_instance_smtp_configuration_can_be_used_to_send_emails_with_.json @@ -0,0 +1,9 @@ +{ + "type": "breaking_change", + "message": "Instance SMTP configuration is used by default to send e-mails with the `Send email` action. Set `BASEROW_INTEGRATION_ALLOW_SMTP_SERVICE_TO_USE_INSTANCE_SETTINGS=false` env var to disable this behaviour.", + "issue_origin": "github", + "issue_number": 4999, + "domain": "integration", + "bullet_points": [], + "created_at": "2026-03-23" +} diff --git a/changelog/entries/unreleased/feature/data_scanner.json b/changelog/entries/unreleased/feature/data_scanner.json new file mode 100644 index 0000000000..a2c9806adb --- /dev/null +++ b/changelog/entries/unreleased/feature/data_scanner.json @@ -0,0 +1,9 @@ +{ + "type": "feature", + "message": "Add instace wide data scanner.", + "issue_origin": "github", + "issue_number": null, + "domain": "database", + "bullet_points": [], + "created_at": "2026-03-16" +} diff --git a/deploy/cloudron/CloudronManifest.json b/deploy/cloudron/CloudronManifest.json index f382d9181e..9ede2593d2 100644 --- a/deploy/cloudron/CloudronManifest.json +++ b/deploy/cloudron/CloudronManifest.json @@ -1,7 +1,7 @@ { "id": "io.baserow.cloudronapp", "title": "Baserow", - "author": "Bram Wiepjes", + "author": "Baserow", "description": "file://DESCRIPTION.md", "tagline": "Collaborate on any form of data", "website": "https://baserow.io", diff --git a/deploy/cloudron/embeddings/CloudronManifest.json b/deploy/cloudron/embeddings/CloudronManifest.json new file mode 100644 index 0000000000..79eb49ee78 --- /dev/null +++ b/deploy/cloudron/embeddings/CloudronManifest.json @@ -0,0 +1,15 @@ +{ + "id": "io.baserow.embeddings", + "title": "Baserow Embeddings", + "author": "Baserow", + "description": "file://DESCRIPTION.md", + "tagline": "Embeddings server for the AI assistant docs lookup.", + "website": "https://baserow.io", + "contactEmail": "bram@baserow.io", + "tags": ["no-code", "nocode", "database", "data", "collaborate", "airtable"], + "version": "2.1.4", + "healthCheckPath": "/api/_health/", + "httpPort": 80, + "memoryLimit": 1000000, + "manifestVersion": 2 +} diff --git a/deploy/cloudron/embeddings/DESCRIPTION.md b/deploy/cloudron/embeddings/DESCRIPTION.md new file mode 100644 index 0000000000..f2dcb75439 --- /dev/null +++ b/deploy/cloudron/embeddings/DESCRIPTION.md @@ -0,0 +1,9 @@ +``` +cloudron install -l embeddings.{YOUR_DOMAIN} --image baserow/embeddings:1.0.0 +``` + +In the Baserow app in Cloudron do: + +``` +cloudron env set BASEROW_EMBEDDINGS_API_URL=https://embeddings.{YOUR_DOMAIN} +``` diff --git a/docker-compose.no-caddy.yml b/docker-compose.no-caddy.yml index d1d7958d0d..354f369775 100644 --- a/docker-compose.no-caddy.yml +++ b/docker-compose.no-caddy.yml @@ -240,6 +240,7 @@ services: BASEROW_UNIQUE_ROW_VALUES_SIZE_LIMIT: BASEROW_ROW_PAGE_SIZE_LIMIT: BASEROW_INTEGRATION_LOCAL_BASEROW_PAGE_SIZE_LIMIT: + BASEROW_INTEGRATION_ALLOW_SMTP_SERVICE_TO_USE_INSTANCE_SETTINGS: BASEROW_INTEGRATIONS_PERIODIC_MINUTE_MIN: BASEROW_BUILDER_DOMAINS: BASEROW_FRONTEND_SAME_SITE_COOKIE: diff --git a/docker-compose.yml b/docker-compose.yml index 12daf3ef17..a08d0a1b0d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -91,6 +91,7 @@ x-backend-variables: BASEROW_AMOUNT_OF_WORKERS: BASEROW_ROW_PAGE_SIZE_LIMIT: BASEROW_INTEGRATION_LOCAL_BASEROW_PAGE_SIZE_LIMIT: + BASEROW_INTEGRATION_ALLOW_SMTP_SERVICE_TO_USE_INSTANCE_SETTINGS: BATCH_ROWS_SIZE_LIMIT: INITIAL_TABLE_DATA_LIMIT: BASEROW_FILE_UPLOAD_SIZE_LIMIT_MB: @@ -316,6 +317,7 @@ services: BASEROW_UNIQUE_ROW_VALUES_SIZE_LIMIT: BASEROW_ROW_PAGE_SIZE_LIMIT: BASEROW_INTEGRATION_LOCAL_BASEROW_PAGE_SIZE_LIMIT: + BASEROW_INTEGRATION_ALLOW_SMTP_SERVICE_TO_USE_INSTANCE_SETTINGS: BASEROW_BUILDER_DOMAINS: BASEROW_FRONTEND_SAME_SITE_COOKIE: SENTRY_DSN: diff --git a/enterprise/backend/pytest.ini b/enterprise/backend/pytest.ini index 28c968f590..37fe40b6ac 100644 --- a/enterprise/backend/pytest.ini +++ b/enterprise/backend/pytest.ini @@ -3,5 +3,6 @@ DJANGO_SETTINGS_MODULE = baserow.config.settings.test python_files = test_*.py markers = eval: mark test as an eval test (requires LLM API key) + data_scanner: mark test as a data scanner test env = DJANGO_SETTINGS_MODULE = baserow.config.settings.test diff --git a/enterprise/backend/src/baserow_enterprise/api/admin/audit_log/urls.py b/enterprise/backend/src/baserow_enterprise/api/admin/audit_log/urls.py index 259b5894ba..3144a96724 100755 --- a/enterprise/backend/src/baserow_enterprise/api/admin/audit_log/urls.py +++ b/enterprise/backend/src/baserow_enterprise/api/admin/audit_log/urls.py @@ -5,7 +5,6 @@ AuditLogActionTypeFilterView, AuditLogUserFilterView, AuditLogView, - AuditLogWorkspaceFilterView, ) app_name = "baserow_enterprise.api.audit_log" @@ -13,7 +12,6 @@ urlpatterns = [ re_path(r"^$", AuditLogView.as_view(), name="list"), re_path(r"users/$", AuditLogUserFilterView.as_view(), name="users"), - re_path(r"workspaces/$", AuditLogWorkspaceFilterView.as_view(), name="workspaces"), re_path( r"action-types/$", AuditLogActionTypeFilterView.as_view(), name="action_types" ), diff --git a/enterprise/backend/src/baserow_enterprise/api/admin/data_scanner/__init__.py b/enterprise/backend/src/baserow_enterprise/api/admin/data_scanner/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/enterprise/backend/src/baserow_enterprise/api/admin/data_scanner/errors.py b/enterprise/backend/src/baserow_enterprise/api/admin/data_scanner/errors.py new file mode 100644 index 0000000000..8f15f1e398 --- /dev/null +++ b/enterprise/backend/src/baserow_enterprise/api/admin/data_scanner/errors.py @@ -0,0 +1,19 @@ +from rest_framework.status import HTTP_404_NOT_FOUND, HTTP_409_CONFLICT + +ERROR_DATA_SCAN_DOES_NOT_EXIST = ( + "ERROR_DATA_SCAN_DOES_NOT_EXIST", + HTTP_404_NOT_FOUND, + "The requested data scan does not exist.", +) + +ERROR_DATA_SCAN_ALREADY_RUNNING = ( + "ERROR_DATA_SCAN_ALREADY_RUNNING", + HTTP_409_CONFLICT, + "The data scan is already running.", +) + +ERROR_DATA_SCAN_RESULT_DOES_NOT_EXIST = ( + "ERROR_DATA_SCAN_RESULT_DOES_NOT_EXIST", + HTTP_404_NOT_FOUND, + "The requested data scan result does not exist.", +) diff --git a/enterprise/backend/src/baserow_enterprise/api/admin/data_scanner/serializers.py b/enterprise/backend/src/baserow_enterprise/api/admin/data_scanner/serializers.py new file mode 100644 index 0000000000..f6ef8ce103 --- /dev/null +++ b/enterprise/backend/src/baserow_enterprise/api/admin/data_scanner/serializers.py @@ -0,0 +1,268 @@ +from rest_framework import serializers + +from baserow.contrib.database.fields.models import Field +from baserow.core.jobs.registries import job_type_registry +from baserow_enterprise.data_scanner.constants import ( + SCAN_TYPE_LIST_OF_VALUES, + SCAN_TYPE_LIST_TABLE, + SCAN_TYPE_PATTERN, + SCANNABLE_FIELD_CONTENT_TYPES, +) +from baserow_enterprise.data_scanner.job_types import DataScanResultExportJobType +from baserow_enterprise.data_scanner.models import DataScan, DataScanResult + + +class DataScanSerializer(serializers.ModelSerializer): + workspace_ids = serializers.SerializerMethodField() + list_items = serializers.SerializerMethodField() + source_table_id = serializers.SerializerMethodField() + source_field_id = serializers.SerializerMethodField() + source_workspace_id = serializers.SerializerMethodField() + source_database_id = serializers.SerializerMethodField() + results_count = serializers.SerializerMethodField() + + class Meta: + model = DataScan + fields = [ + "id", + "name", + "scan_type", + "pattern", + "frequency", + "scan_all_workspaces", + "workspace_ids", + "is_running", + "last_run_started_at", + "last_run_finished_at", + "last_error", + "list_items", + "results_count", + "source_table_id", + "source_field_id", + "source_workspace_id", + "source_database_id", + "created_on", + "updated_on", + ] + + def get_workspace_ids(self, obj): + return [ws.id for ws in obj.workspaces.all()] + + def get_list_items(self, obj): + return [item.value for item in obj.list_items.all()] + + def get_source_table_id(self, obj): + return obj.source_table_id + + def get_source_field_id(self, obj): + return obj.source_field_id + + def get_source_workspace_id(self, obj): + try: + return obj.source_table.database.workspace_id + except AttributeError: + return None + + def get_source_database_id(self, obj): + try: + return obj.source_table.database_id + except AttributeError: + return None + + def get_results_count(self, obj): + if hasattr(obj, "results_count"): + return obj.results_count + return obj.results.count() + + +class DataScanWriteSerializer(serializers.Serializer): + name = serializers.CharField(max_length=255, required=False) + scan_type = serializers.ChoiceField( + choices=DataScan.SCAN_TYPE_CHOICES, + required=False, + ) + pattern = serializers.CharField( + max_length=100, required=False, allow_blank=True, allow_null=True + ) + frequency = serializers.ChoiceField( + choices=DataScan.FREQUENCY_CHOICES, + required=False, + ) + scan_all_workspaces = serializers.BooleanField(required=False) + workspace_ids = serializers.ListField( + child=serializers.IntegerField(), + required=False, + ) + list_items = serializers.ListField( + child=serializers.CharField(), + required=False, + ) + source_table_id = serializers.IntegerField(required=False, allow_null=True) + source_field_id = serializers.IntegerField(required=False, allow_null=True) + + def validate_source_field_id(self, value): + if value is not None: + try: + field = Field.objects.get(id=value) + except Field.DoesNotExist: + raise serializers.ValidationError( + "The specified source field does not exist." + ) + if field.content_type.model not in SCANNABLE_FIELD_CONTENT_TYPES: + raise serializers.ValidationError( + "The specified source field type is not compatible with data " + "scanning." + ) + return value + + +class DataScanCreateSerializer(DataScanWriteSerializer): + name = serializers.CharField(max_length=255) + scan_type = serializers.ChoiceField( + choices=DataScan.SCAN_TYPE_CHOICES, + ) + workspace_ids = serializers.ListField( + child=serializers.IntegerField(), + default=list, + ) + list_items = serializers.ListField( + child=serializers.CharField(), + default=list, + ) + pattern = serializers.CharField( + max_length=100, required=False, allow_blank=True, default=None + ) + frequency = serializers.ChoiceField( + choices=DataScan.FREQUENCY_CHOICES, + default="manual", + ) + scan_all_workspaces = serializers.BooleanField(default=True) + + def validate(self, data): + scan_type = data.get("scan_type") + if scan_type == SCAN_TYPE_PATTERN and not data.get("pattern"): + raise serializers.ValidationError( + {"pattern": "Pattern is required for pattern scan type."} + ) + if scan_type == SCAN_TYPE_LIST_OF_VALUES and not data.get("list_items"): + raise serializers.ValidationError( + {"list_items": "List items are required for list of values scan type."} + ) + if scan_type == SCAN_TYPE_LIST_TABLE: + if not data.get("source_table_id") or not data.get("source_field_id"): + raise serializers.ValidationError( + { + "source_table_id": "Source table and field are required for list table scan type." + } + ) + return data + + +class DataScanUpdateSerializer(DataScanWriteSerializer): + pass + + +class DataScanResultSerializer(serializers.ModelSerializer): + scan_name = serializers.SerializerMethodField() + workspace_name = serializers.SerializerMethodField() + database_id = serializers.SerializerMethodField() + database_name = serializers.SerializerMethodField() + table_name = serializers.SerializerMethodField() + field_name = serializers.SerializerMethodField() + + class Meta: + model = DataScanResult + fields = [ + "id", + "scan_id", + "scan_name", + "workspace_name", + "database_id", + "database_name", + "table_id", + "table_name", + "field_name", + "row_id", + "matched_value", + "first_identified_on", + "last_identified_on", + ] + + def get_scan_name(self, obj): + return obj.scan.name + + def get_workspace_name(self, obj): + try: + return obj.field.table.database.workspace.name + except AttributeError: + return None + + def get_database_id(self, obj): + return obj.table.database_id + + def get_database_name(self, obj): + try: + return obj.field.table.database.name + except AttributeError: + return None + + def get_table_name(self, obj): + try: + return obj.field.table.name + except AttributeError: + return None + + def get_field_name(self, obj): + try: + return obj.field.name + except AttributeError: + return None + + +class WorkspaceStructureFieldSerializer(serializers.Serializer): + id = serializers.IntegerField() + name = serializers.CharField() + type = serializers.SerializerMethodField() + + def get_type(self, obj): + field_type = obj.content_type.model + if field_type.endswith("field"): + field_type = field_type[: -len("field")] + return field_type + + +class WorkspaceStructureTableSerializer(serializers.Serializer): + id = serializers.IntegerField() + name = serializers.CharField() + + def to_representation(self, instance): + data = super().to_representation(instance) + data["fields"] = WorkspaceStructureFieldSerializer( + instance.field_set.all(), many=True + ).data + return data + + +class WorkspaceStructureDatabaseSerializer(serializers.Serializer): + id = serializers.IntegerField() + name = serializers.CharField() + tables = serializers.SerializerMethodField() + + def get_tables(self, obj): + return WorkspaceStructureTableSerializer(obj.table_set.all(), many=True).data + + +DataScanResultExportJobRequestSerializer = job_type_registry.get( + DataScanResultExportJobType.type +).get_serializer_class( + base_class=serializers.Serializer, + request_serializer=True, + meta_ref_name="SingleDataScanResultExportJobRequestSerializer", +) + +DataScanResultExportJobResponseSerializer = job_type_registry.get( + DataScanResultExportJobType.type +).get_serializer_class( + base_class=serializers.Serializer, + meta_ref_name="SingleDataScanResultExportJobResponseSerializer", +) diff --git a/enterprise/backend/src/baserow_enterprise/api/admin/data_scanner/urls.py b/enterprise/backend/src/baserow_enterprise/api/admin/data_scanner/urls.py new file mode 100644 index 0000000000..738b6faad3 --- /dev/null +++ b/enterprise/backend/src/baserow_enterprise/api/admin/data_scanner/urls.py @@ -0,0 +1,43 @@ +from django.urls import re_path + +from .views import ( + DataScanDetailView, + DataScanListView, + DataScanResultDeleteView, + DataScanResultExportView, + DataScanResultListView, + DataScanTriggerView, + DataScanWorkspaceStructureView, +) + +app_name = "baserow_enterprise.api.admin.data_scanner" + +urlpatterns = [ + re_path(r"^scans/$", DataScanListView.as_view(), name="list"), + re_path( + r"^scans/(?P[0-9]+)/$", + DataScanDetailView.as_view(), + name="detail", + ), + re_path( + r"^scans/(?P[0-9]+)/trigger/$", + DataScanTriggerView.as_view(), + name="trigger", + ), + re_path(r"^results/$", DataScanResultListView.as_view(), name="results"), + re_path( + r"^results/export/$", + DataScanResultExportView.as_view(), + name="results_export", + ), + re_path( + r"^results/(?P[0-9]+)/$", + DataScanResultDeleteView.as_view(), + name="result_delete", + ), + re_path( + r"^workspace-structure/(?P[0-9]+)/$", + DataScanWorkspaceStructureView.as_view(), + name="workspace_structure", + ), +] diff --git a/enterprise/backend/src/baserow_enterprise/api/admin/data_scanner/views.py b/enterprise/backend/src/baserow_enterprise/api/admin/data_scanner/views.py new file mode 100644 index 0000000000..ea2989d154 --- /dev/null +++ b/enterprise/backend/src/baserow_enterprise/api/admin/data_scanner/views.py @@ -0,0 +1,370 @@ +from django.db import transaction +from django.db.models import Prefetch + +from drf_spectacular.utils import extend_schema +from rest_framework.permissions import IsAdminUser +from rest_framework.response import Response +from rest_framework.status import HTTP_202_ACCEPTED, HTTP_204_NO_CONTENT +from rest_framework.views import APIView + +from baserow.api.admin.views import APIListingView +from baserow.api.decorators import map_exceptions, validate_body +from baserow.api.errors import ERROR_GROUP_DOES_NOT_EXIST +from baserow.api.jobs.errors import ERROR_MAX_JOB_COUNT_EXCEEDED +from baserow.api.jobs.serializers import JobSerializer +from baserow.api.schemas import get_error_schema +from baserow.contrib.database.fields.models import Field +from baserow.contrib.database.models import Database +from baserow.contrib.database.table.models import Table +from baserow.core.action.registries import action_type_registry +from baserow.core.exceptions import WorkspaceDoesNotExist +from baserow.core.jobs.exceptions import MaxJobCountExceeded +from baserow.core.jobs.handler import JobHandler +from baserow.core.jobs.registries import job_type_registry +from baserow.core.models import Workspace +from baserow_enterprise.api.admin.data_scanner.errors import ( + ERROR_DATA_SCAN_ALREADY_RUNNING, + ERROR_DATA_SCAN_DOES_NOT_EXIST, + ERROR_DATA_SCAN_RESULT_DOES_NOT_EXIST, +) +from baserow_enterprise.api.admin.data_scanner.serializers import ( + DataScanCreateSerializer, + DataScanResultExportJobRequestSerializer, + DataScanResultExportJobResponseSerializer, + DataScanResultSerializer, + DataScanSerializer, + DataScanUpdateSerializer, + WorkspaceStructureDatabaseSerializer, +) +from baserow_enterprise.data_scanner.actions import ( + CreateDataScanActionType, + DeleteDataScanActionType, + UpdateDataScanActionType, +) +from baserow_enterprise.data_scanner.constants import SCANNABLE_FIELD_CONTENT_TYPES +from baserow_enterprise.data_scanner.exceptions import ( + DataScanDoesNotExist, + DataScanIsAlreadyRunning, + DataScanResultDoesNotExist, +) +from baserow_enterprise.data_scanner.handler import DataScannerHandler +from baserow_enterprise.data_scanner.job_types import DataScanResultExportJobType +from baserow_enterprise.data_scanner.models import DataScanResult +from baserow_enterprise.features import DATA_SCANNER +from baserow_premium.license.handler import LicenseHandler + + +class DataScanListView(APIListingView): + permission_classes = (IsAdminUser,) + serializer_class = DataScanSerializer + search_fields = ["name"] + sort_field_mapping = { + "name": "name", + "scan_type": "scan_type", + "frequency": "frequency", + "created_on": "created_on", + } + default_order_by = "created_on" + + def get_queryset(self, request): + return DataScannerHandler.list_scans(request.user) + + @extend_schema( + tags=["Admin data scanner"], + operation_id="admin_data_scanner_list_scans", + description=( + "Lists all data scans configured for this Baserow instance. Data scans " + "allow administrators to search the entire instance for sensitive data " + "matching a pattern, a list of uploaded values, or values from another " + "Baserow table. **Enterprise feature.**" + ), + **APIListingView.get_extend_schema_parameters( + "data scans", + DataScanSerializer, + ["name"], + sort_field_mapping, + ), + ) + def get(self, request): + return super().get(request) + + @extend_schema( + tags=["Admin data scanner"], + operation_id="admin_data_scanner_create_scan", + description=( + "Creates a new data scan. A data scan searches the Baserow instance " + "for sensitive data matching a pattern (e.g. credit card numbers), a " + "list of uploaded values, or values sourced from another Baserow table. " + "**Enterprise feature.**" + ), + request=DataScanCreateSerializer, + responses={200: DataScanSerializer}, + ) + @transaction.atomic + @validate_body(DataScanCreateSerializer) + def post(self, request, data): + LicenseHandler.raise_if_user_doesnt_have_feature_instance_wide( + DATA_SCANNER, request.user + ) + scan = action_type_registry.get_by_type(CreateDataScanActionType).do( + user=request.user, + name=data["name"], + scan_type=data["scan_type"], + pattern=data.get("pattern"), + frequency=data.get("frequency", "manual"), + scan_all_workspaces=data.get("scan_all_workspaces", True), + workspace_ids=data.get("workspace_ids", []), + list_items=data.get("list_items", []), + source_table_id=data.get("source_table_id"), + source_field_id=data.get("source_field_id"), + ) + return Response(DataScanSerializer(scan).data) + + +class DataScanDetailView(APIView): + permission_classes = (IsAdminUser,) + + @extend_schema( + tags=["Admin data scanner"], + operation_id="admin_data_scanner_get_scan", + description=( + "Returns a single data scan configuration. **Enterprise feature.**" + ), + responses={ + 200: DataScanSerializer, + 404: get_error_schema(["ERROR_DATA_SCAN_DOES_NOT_EXIST"]), + }, + ) + @map_exceptions({DataScanDoesNotExist: ERROR_DATA_SCAN_DOES_NOT_EXIST}) + def get(self, request, scan_id): + scan = DataScannerHandler.get_scan(user=request.user, scan_id=scan_id) + return Response(DataScanSerializer(scan).data) + + @extend_schema( + tags=["Admin data scanner"], + operation_id="admin_data_scanner_update_scan", + description=( + "Updates a data scan configuration. When the scan type, pattern, or " + "list items change, stale results are automatically cleaned up. " + "**Enterprise feature.**" + ), + request=DataScanUpdateSerializer, + responses={ + 200: DataScanSerializer, + 404: get_error_schema(["ERROR_DATA_SCAN_DOES_NOT_EXIST"]), + 409: get_error_schema(["ERROR_DATA_SCAN_ALREADY_RUNNING"]), + }, + ) + @transaction.atomic + @map_exceptions( + { + DataScanDoesNotExist: ERROR_DATA_SCAN_DOES_NOT_EXIST, + DataScanIsAlreadyRunning: ERROR_DATA_SCAN_ALREADY_RUNNING, + } + ) + @validate_body(DataScanUpdateSerializer) + def patch(self, request, scan_id, data): + scan = action_type_registry.get_by_type(UpdateDataScanActionType).do( + user=request.user, + scan_id=scan_id, + **data, + ) + return Response(DataScanSerializer(scan).data) + + @extend_schema( + tags=["Admin data scanner"], + operation_id="admin_data_scanner_delete_scan", + description=( + "Deletes a data scan and all of its results. **Enterprise feature.**" + ), + responses={ + 204: None, + 404: get_error_schema(["ERROR_DATA_SCAN_DOES_NOT_EXIST"]), + 409: get_error_schema(["ERROR_DATA_SCAN_ALREADY_RUNNING"]), + }, + ) + @transaction.atomic + @map_exceptions( + { + DataScanDoesNotExist: ERROR_DATA_SCAN_DOES_NOT_EXIST, + DataScanIsAlreadyRunning: ERROR_DATA_SCAN_ALREADY_RUNNING, + } + ) + def delete(self, request, scan_id): + action_type_registry.get_by_type(DeleteDataScanActionType).do( + user=request.user, scan_id=scan_id + ) + return Response(status=HTTP_204_NO_CONTENT) + + +class DataScanTriggerView(APIView): + permission_classes = (IsAdminUser,) + + @extend_schema( + tags=["Admin data scanner"], + operation_id="admin_data_scanner_trigger_scan", + description=( + "Triggers an immediate run of the given data scan. The scan executes " + "asynchronously and searches the configured workspaces for matches. " + "**Enterprise feature.**" + ), + responses={ + 202: DataScanSerializer, + 404: get_error_schema(["ERROR_DATA_SCAN_DOES_NOT_EXIST"]), + 409: get_error_schema(["ERROR_DATA_SCAN_ALREADY_RUNNING"]), + }, + ) + @map_exceptions( + { + DataScanDoesNotExist: ERROR_DATA_SCAN_DOES_NOT_EXIST, + DataScanIsAlreadyRunning: ERROR_DATA_SCAN_ALREADY_RUNNING, + } + ) + def post(self, request, scan_id): + scan = DataScannerHandler.trigger_scan(user=request.user, scan_id=scan_id) + return Response(DataScanSerializer(scan).data, status=HTTP_202_ACCEPTED) + + +class DataScanResultListView(APIListingView): + permission_classes = (IsAdminUser,) + serializer_class = DataScanResultSerializer + search_fields = ["matched_value"] + filters_field_mapping = { + "scan_id": "scan_id", + } + sort_field_mapping = { + "first_identified_on": "first_identified_on", + "last_identified_on": "last_identified_on", + } + default_order_by = "-first_identified_on" + + def get_queryset(self, request): + LicenseHandler.raise_if_user_doesnt_have_feature_instance_wide( + DATA_SCANNER, request.user + ) + return DataScanResult.objects.select_related( + "scan", "table__database", "field__table__database__workspace" + ).all() + + @extend_schema( + tags=["Admin data scanner"], + operation_id="admin_data_scanner_list_results", + description=( + "Lists all data scan results across all scans. Results represent " + "individual matches found in database fields during scan execution. " + "Can be filtered by scan_id and searched by matched value. " + "**Enterprise feature.**" + ), + **APIListingView.get_extend_schema_parameters( + "data scan results", + DataScanResultSerializer, + ["matched_value"], + sort_field_mapping, + ), + ) + def get(self, request): + return super().get(request) + + +class DataScanWorkspaceStructureView(APIView): + permission_classes = (IsAdminUser,) + + @extend_schema( + tags=["Admin data scanner"], + operation_id="admin_data_scanner_workspace_structure", + description=( + "Returns the database/table/field structure of a workspace for use " + "in data scan configuration. Only text-compatible fields are included. " + "**Enterprise feature.**" + ), + responses={ + 200: WorkspaceStructureDatabaseSerializer(many=True), + 404: get_error_schema(["ERROR_GROUP_DOES_NOT_EXIST"]), + }, + ) + @map_exceptions({WorkspaceDoesNotExist: ERROR_GROUP_DOES_NOT_EXIST}) + def get(self, request, workspace_id): + LicenseHandler.raise_if_user_doesnt_have_feature_instance_wide( + DATA_SCANNER, request.user + ) + + try: + workspace = Workspace.objects.get(id=workspace_id) + except Workspace.DoesNotExist: + raise WorkspaceDoesNotExist() + + databases = ( + Database.objects.filter(workspace=workspace) + .prefetch_related( + Prefetch( + "table_set", + queryset=Table.objects.prefetch_related( + Prefetch( + "field_set", + queryset=Field.objects.filter( + content_type__model__in=SCANNABLE_FIELD_CONTENT_TYPES, + ).select_related("content_type"), + ) + ), + ) + ) + .order_by("order", "id") + ) + + serializer = WorkspaceStructureDatabaseSerializer(databases, many=True) + return Response(serializer.data) + + +class DataScanResultExportView(APIView): + permission_classes = (IsAdminUser,) + + @extend_schema( + tags=["Admin data scanner"], + operation_id="admin_data_scanner_export_results", + description=( + "Creates a job to export data scan results to CSV. The exported file " + "includes scan name, workspace, database, table, field, row ID, matched " + "value, and timestamps for each result. **Enterprise feature.**" + ), + request=DataScanResultExportJobRequestSerializer, + responses={ + 202: DataScanResultExportJobResponseSerializer, + 400: get_error_schema(["ERROR_MAX_JOB_COUNT_EXCEEDED"]), + }, + ) + @transaction.atomic + @map_exceptions({MaxJobCountExceeded: ERROR_MAX_JOB_COUNT_EXCEEDED}) + @validate_body(DataScanResultExportJobRequestSerializer, return_validated=True) + def post(self, request, data): + LicenseHandler.raise_if_user_doesnt_have_feature_instance_wide( + DATA_SCANNER, request.user + ) + job = JobHandler().create_and_start_job( + request.user, DataScanResultExportJobType.type, **data + ) + serializer = job_type_registry.get_serializer( + job, JobSerializer, context={"request": request} + ) + return Response(serializer.data, status=HTTP_202_ACCEPTED) + + +class DataScanResultDeleteView(APIView): + permission_classes = (IsAdminUser,) + + @extend_schema( + tags=["Admin data scanner"], + operation_id="admin_data_scanner_delete_result", + description=( + "Deletes (resolves) a single data scan result, marking it as reviewed. " + "**Enterprise feature.**" + ), + responses={ + 204: None, + 404: get_error_schema(["ERROR_DATA_SCAN_RESULT_DOES_NOT_EXIST"]), + }, + ) + @transaction.atomic + @map_exceptions({DataScanResultDoesNotExist: ERROR_DATA_SCAN_RESULT_DOES_NOT_EXIST}) + def delete(self, request, result_id): + DataScannerHandler.delete_result(user=request.user, result_id=result_id) + return Response(status=HTTP_204_NO_CONTENT) diff --git a/enterprise/backend/src/baserow_enterprise/api/admin/urls.py b/enterprise/backend/src/baserow_enterprise/api/admin/urls.py index 4b073f1adb..20efb8d2d6 100644 --- a/enterprise/backend/src/baserow_enterprise/api/admin/urls.py +++ b/enterprise/backend/src/baserow_enterprise/api/admin/urls.py @@ -2,10 +2,12 @@ from .audit_log import urls as audit_log_urls from .auth_provider import urls as auth_provider_urls +from .data_scanner import urls as data_scanner_urls app_name = "baserow_enterprise.api.admin" urlpatterns = [ path("auth-provider/", include(auth_provider_urls, namespace="auth_provider")), path("audit-log/", include(audit_log_urls, namespace="audit_log")), + path("data-scanner/", include(data_scanner_urls, namespace="data_scanner")), ] diff --git a/enterprise/backend/src/baserow_enterprise/api/audit_log/serializers.py b/enterprise/backend/src/baserow_enterprise/api/audit_log/serializers.py index a3ef44ab35..87c3caa0a3 100644 --- a/enterprise/backend/src/baserow_enterprise/api/audit_log/serializers.py +++ b/enterprise/backend/src/baserow_enterprise/api/audit_log/serializers.py @@ -8,7 +8,6 @@ from baserow.core.action.registries import action_type_registry from baserow.core.jobs.registries import job_type_registry -from baserow.core.models import Workspace from baserow_enterprise.audit_log.job_types import AuditLogExportJobType from baserow_enterprise.audit_log.models import AuditLogEntry @@ -92,14 +91,6 @@ class Meta: fields = ("id", "value") -class AuditLogWorkspaceSerializer(serializers.ModelSerializer): - value = serializers.CharField(source="name") - - class Meta: - model = Workspace - fields = ("id", "value") - - class AuditLogActionTypeSerializer(serializers.Serializer): id = serializers.ChoiceField( choices=lazy(action_type_registry.get_types, list)(), diff --git a/enterprise/backend/src/baserow_enterprise/api/audit_log/urls.py b/enterprise/backend/src/baserow_enterprise/api/audit_log/urls.py index e631b22510..a3a85d1f8b 100755 --- a/enterprise/backend/src/baserow_enterprise/api/audit_log/urls.py +++ b/enterprise/backend/src/baserow_enterprise/api/audit_log/urls.py @@ -5,7 +5,6 @@ AuditLogActionTypeFilterView, AuditLogUserFilterView, AuditLogView, - AuditLogWorkspaceFilterView, ) app_name = "baserow_enterprise.api.audit_log" @@ -13,7 +12,6 @@ urlpatterns = [ re_path(r"^$", AuditLogView.as_view(), name="list"), re_path(r"users/$", AuditLogUserFilterView.as_view(), name="users"), - re_path(r"workspaces/$", AuditLogWorkspaceFilterView.as_view(), name="workspaces"), re_path( r"action-types/$", AuditLogActionTypeFilterView.as_view(), name="action_types" ), diff --git a/enterprise/backend/src/baserow_enterprise/api/audit_log/views.py b/enterprise/backend/src/baserow_enterprise/api/audit_log/views.py index 13ff4a4bad..1410e1702f 100755 --- a/enterprise/backend/src/baserow_enterprise/api/audit_log/views.py +++ b/enterprise/backend/src/baserow_enterprise/api/audit_log/views.py @@ -3,7 +3,7 @@ from drf_spectacular.types import OpenApiTypes from drf_spectacular.utils import OpenApiParameter, extend_schema -from rest_framework.permissions import IsAdminUser, IsAuthenticated +from rest_framework.permissions import IsAuthenticated from rest_framework.response import Response from rest_framework.status import HTTP_202_ACCEPTED from rest_framework.views import APIView @@ -23,14 +23,12 @@ from baserow.core.jobs.exceptions import MaxJobCountExceeded from baserow.core.jobs.handler import JobHandler from baserow.core.jobs.registries import job_type_registry -from baserow.core.models import User, Workspace +from baserow.core.models import User from baserow_enterprise.audit_log.job_types import AuditLogExportJobType from baserow_enterprise.audit_log.models import AuditLogEntry from baserow_enterprise.audit_log.utils import ( check_for_license_and_permissions_or_raise, ) -from baserow_enterprise.features import AUDIT_LOG -from baserow_premium.license.handler import LicenseHandler from .serializers import ( AuditLogActionTypeSerializer, @@ -40,7 +38,6 @@ AuditLogSerializer, AuditLogUserSerializer, AuditLogWorkspaceFilterQueryParamsSerializer, - AuditLogWorkspaceSerializer, serialize_filtered_action_types, ) @@ -244,33 +241,6 @@ def get(self, request, query_params): return super().get(request) -class AuditLogWorkspaceFilterView(APIListingView): - permission_classes = (IsAdminUser,) - serializer_class = AuditLogWorkspaceSerializer - search_fields = ["name"] - default_order_by = "name" - - def get_queryset(self, request): - return Workspace.objects.filter(template__isnull=True) - - @extend_schema( - tags=["Audit log"], - operation_id="audit_log_workspaces", - description=( - "List all distinct workspace names related to an audit log entry." - "\n\nThis is a **enterprise** feature." - ), - **APIListingView.get_extend_schema_parameters( - "workspaces", serializer_class, search_fields, {} - ), - ) - def get(self, request): - LicenseHandler.raise_if_user_doesnt_have_feature_instance_wide( - AUDIT_LOG, request.user - ) - return super().get(request) - - class AsyncAuditLogExportView(APIView): permission_classes = (IsAuthenticated,) diff --git a/enterprise/backend/src/baserow_enterprise/apps.py b/enterprise/backend/src/baserow_enterprise/apps.py index 9a482941bb..1c61528d94 100755 --- a/enterprise/backend/src/baserow_enterprise/apps.py +++ b/enterprise/backend/src/baserow_enterprise/apps.py @@ -14,8 +14,12 @@ def ready(self): from baserow_enterprise.audit_log.operations import ( ListWorkspaceAuditLogEntriesOperationType, ) + from baserow_enterprise.data_scanner.job_types import ( + DataScanResultExportJobType, + ) job_type_registry.register(AuditLogExportJobType()) + job_type_registry.register(DataScanResultExportJobType()) from baserow.api.user.registries import member_data_registry from baserow.core.action.registries import ( @@ -261,11 +265,19 @@ def ready(self): data_sync_type_registry.unregister(PostgreSQLDataSyncType.type) data_sync_type_registry.register(PostgreSQLDataSyncType()) + from baserow_enterprise.data_scanner.actions import ( + CreateDataScanActionType, + DeleteDataScanActionType, + UpdateDataScanActionType, + ) from baserow_enterprise.data_sync.actions import ( UpdatePeriodicDataSyncIntervalActionType, ) action_type_registry.register(UpdatePeriodicDataSyncIntervalActionType()) + action_type_registry.register(CreateDataScanActionType()) + action_type_registry.register(UpdateDataScanActionType()) + action_type_registry.register(DeleteDataScanActionType()) from baserow.contrib.database.webhooks.registries import ( webhook_event_type_registry, @@ -303,6 +315,9 @@ def ready(self): connect_to_post_delete_signals_to_cascade_deletion_to_role_assignments() from baserow.core.notifications.registries import notification_type_registry + from baserow_enterprise.data_scanner.notification_types import ( + DataScanNewResultsNotificationType, + ) from baserow_enterprise.data_sync.notification_types import ( PeriodicDataSyncDeactivatedNotificationType, TwoWaySyncDeactivatedNotificationType, @@ -314,6 +329,7 @@ def ready(self): ) notification_type_registry.register(TwoWaySyncUpdateFailedNotificationType()) notification_type_registry.register(TwoWaySyncDeactivatedNotificationType()) + notification_type_registry.register(DataScanNewResultsNotificationType()) from baserow_enterprise.views.operations import ( ListenToAllRestrictedViewEventsOperationType, @@ -368,6 +384,7 @@ def ready(self): # which need to be filled first. import baserow_enterprise.assistant.tasks # noqa: F401 import baserow_enterprise.audit_log.signals # noqa: F401 + import baserow_enterprise.data_scanner.tasks # noqa: F401 import baserow_enterprise.ws.signals # noqa: F401 diff --git a/enterprise/backend/src/baserow_enterprise/data_scanner/__init__.py b/enterprise/backend/src/baserow_enterprise/data_scanner/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/enterprise/backend/src/baserow_enterprise/data_scanner/actions.py b/enterprise/backend/src/baserow_enterprise/data_scanner/actions.py new file mode 100644 index 0000000000..e7bf1407b1 --- /dev/null +++ b/enterprise/backend/src/baserow_enterprise/data_scanner/actions.py @@ -0,0 +1,94 @@ +import dataclasses + +from django.contrib.auth.models import AbstractUser +from django.utils.translation import gettext_lazy as _ + +from baserow.core.action.registries import ( + ActionScopeStr, + ActionType, + ActionTypeDescription, +) +from baserow.core.action.scopes import RootActionScopeType +from baserow_enterprise.data_scanner.handler import DataScannerHandler + + +class CreateDataScanActionType(ActionType): + type = "create_data_scan" + description = ActionTypeDescription( + _("Create data scan"), + _('Data scan "%(scan_name)s" (%(scan_id)s) created'), + ) + analytics_params = ["scan_id"] + + @dataclasses.dataclass + class Params: + scan_id: int + scan_name: str + + @classmethod + def do(cls, user: AbstractUser, **kwargs): + scan = DataScannerHandler.create_scan(user=user, **kwargs) + + params = cls.Params(scan.id, scan.name) + cls.register_action(user, params, cls.scope()) + + return scan + + @classmethod + def scope(cls) -> ActionScopeStr: + return RootActionScopeType.value() + + +class UpdateDataScanActionType(ActionType): + type = "update_data_scan" + description = ActionTypeDescription( + _("Update data scan"), + _('Data scan "%(scan_name)s" (%(scan_id)s) updated'), + ) + analytics_params = ["scan_id"] + + @dataclasses.dataclass + class Params: + scan_id: int + scan_name: str + + @classmethod + def do(cls, user: AbstractUser, scan_id: int, **kwargs): + scan = DataScannerHandler.update_scan(user=user, scan_id=scan_id, **kwargs) + + params = cls.Params(scan.id, scan.name) + cls.register_action(user, params, cls.scope()) + + return scan + + @classmethod + def scope(cls) -> ActionScopeStr: + return RootActionScopeType.value() + + +class DeleteDataScanActionType(ActionType): + type = "delete_data_scan" + description = ActionTypeDescription( + _("Delete data scan"), + _('Data scan "%(scan_name)s" (%(scan_id)s) deleted'), + ) + analytics_params = ["scan_id"] + + @dataclasses.dataclass + class Params: + scan_id: int + scan_name: str + + @classmethod + def do(cls, user: AbstractUser, scan_id: int): + scan = DataScannerHandler.get_scan(user=user, scan_id=scan_id) + + scan_name = scan.name + DataScannerHandler.delete_scan(user=user, scan_id=scan_id) + + params = cls.Params(scan_id, scan_name) + cls.register_action(user, params, cls.scope()) + + @classmethod + def scope(cls) -> ActionScopeStr: + return RootActionScopeType.value() diff --git a/enterprise/backend/src/baserow_enterprise/data_scanner/constants.py b/enterprise/backend/src/baserow_enterprise/data_scanner/constants.py new file mode 100644 index 0000000000..d3ea77bdb7 --- /dev/null +++ b/enterprise/backend/src/baserow_enterprise/data_scanner/constants.py @@ -0,0 +1,38 @@ +from datetime import timedelta + +from baserow.contrib.database.fields.models import ( + AutonumberField, + EmailField, + NumberField, + PhoneNumberField, + TextField, + URLField, + UUIDField, +) + +# Contains the field types that can be used in the Baserow source table. +SCANNABLE_FIELD_TYPES = [ + TextField, + URLField, + EmailField, + NumberField, + AutonumberField, + PhoneNumberField, + UUIDField, +] + +SCANNABLE_FIELD_CONTENT_TYPES = [ + field._meta.model_name for field in SCANNABLE_FIELD_TYPES +] + +SCAN_TYPE_PATTERN = "pattern" +SCAN_TYPE_LIST_OF_VALUES = "list_of_values" +SCAN_TYPE_LIST_TABLE = "list_table" + +STALE_SCAN_THRESHOLD_HOURS = 2 + +FREQUENCY_INTERVALS = { + "hourly": timedelta(hours=1), + "daily": timedelta(days=1), + "weekly": timedelta(weeks=1), +} diff --git a/enterprise/backend/src/baserow_enterprise/data_scanner/exceptions.py b/enterprise/backend/src/baserow_enterprise/data_scanner/exceptions.py new file mode 100644 index 0000000000..7b6526de32 --- /dev/null +++ b/enterprise/backend/src/baserow_enterprise/data_scanner/exceptions.py @@ -0,0 +1,10 @@ +class DataScanDoesNotExist(Exception): + pass + + +class DataScanIsAlreadyRunning(Exception): + pass + + +class DataScanResultDoesNotExist(Exception): + pass diff --git a/enterprise/backend/src/baserow_enterprise/data_scanner/handler.py b/enterprise/backend/src/baserow_enterprise/data_scanner/handler.py new file mode 100644 index 0000000000..e276a7f244 --- /dev/null +++ b/enterprise/backend/src/baserow_enterprise/data_scanner/handler.py @@ -0,0 +1,773 @@ +import re +import traceback +from datetime import datetime, timedelta +from typing import Optional + +from django.contrib.auth.models import AbstractUser +from django.contrib.postgres.search import SearchQuery +from django.core.exceptions import PermissionDenied +from django.db.models import Count, QuerySet, TextField +from django.db.models.functions import Cast +from django.utils import timezone + +from baserow.contrib.database.fields.models import Field +from baserow.contrib.database.search.handler import SearchHandler +from baserow.contrib.database.table.models import Table +from baserow.core.models import Workspace +from baserow_enterprise.data_scanner.constants import ( + FREQUENCY_INTERVALS, + SCAN_TYPE_LIST_OF_VALUES, + SCAN_TYPE_LIST_TABLE, + SCAN_TYPE_PATTERN, + STALE_SCAN_THRESHOLD_HOURS, +) +from baserow_enterprise.data_scanner.exceptions import ( + DataScanDoesNotExist, + DataScanIsAlreadyRunning, + DataScanResultDoesNotExist, +) +from baserow_enterprise.data_scanner.models import ( + DataScan, + DataScanListItem, + DataScanResult, +) +from baserow_enterprise.data_scanner.tasks import run_data_scan +from baserow_enterprise.features import DATA_SCANNER +from baserow_premium.license.handler import LicenseHandler + + +def convert_pattern_to_regex(pattern: str) -> str: + """ + Converts a custom pattern syntax to a regex string. + + Tokens: + - `A` -> any letter `[A-Za-z]` + - `D` -> any digit `[0-9]` + - `X` -> any character `.` + - `\\c` -> literal character `c` + + :param pattern: The custom pattern string (e.g. `AADDAAAADDDDDDDDDD`). + :return: A regex string equivalent. + """ + + TOKEN_MAP = { + "A": "[A-Za-z]", + "D": "[0-9]", + "X": ".", + } + + parts: list[str] = [] + i = 0 + while i < len(pattern): + char = pattern[i] + if char == "\\" and i + 1 < len(pattern): + # Escaped literal + parts.append(re.escape(pattern[i + 1])) + i += 2 + elif char in TOKEN_MAP: + parts.append(TOKEN_MAP[char]) + i += 1 + else: + parts.append(re.escape(char)) + i += 1 + return "".join(parts) + + +def _check_data_scanner_access(user: AbstractUser) -> None: + """ + Verifies that the given user holds the DATA_SCANNER enterprise feature + and is a staff member. Raises if either check fails. + + :param user: The user to verify. + :raises FeaturesNotAvailableError: When the enterprise license is missing. + :raises PermissionDenied: When the user is not staff. + """ + + LicenseHandler.raise_if_user_doesnt_have_feature_instance_wide(DATA_SCANNER, user) + if not user.is_staff: + raise PermissionDenied() + + +class DataScannerHandler: + @staticmethod + def create_scan( + user: AbstractUser, + name: str, + scan_type: str, + pattern: Optional[str] = None, + frequency: str = "manual", + scan_all_workspaces: bool = True, + workspace_ids: Optional[list[int]] = None, + list_items: Optional[list[str]] = None, + source_table_id: Optional[int] = None, + source_field_id: Optional[int] = None, + ) -> DataScan: + """ + Creates a new data scan configuration. + + :param user: The staff user performing the action. + :param name: Human-readable name for the scan. + :param scan_type: One of `pattern`, `list_of_values`, or `list_table`. + :param pattern: Required when scan_type is `pattern`. + :param frequency: How often the scan runs automatically. + :param scan_all_workspaces: When False, only the given workspace_ids are + scanned. + :param workspace_ids: Workspace IDs to restrict scanning to. + :param list_items: Values to match when scan_type is `list_of_values`. + :param source_table_id: Source table ID when scan_type is `list_table`. + :param source_field_id: Source field ID when scan_type is `list_table`. + :return: The newly created DataScan instance. + """ + + _check_data_scanner_access(user) + + scan = DataScan.objects.create( + name=name, + scan_type=scan_type, + pattern=pattern, + frequency=frequency, + scan_all_workspaces=scan_all_workspaces, + created_by=user, + source_table_id=source_table_id + if scan_type == SCAN_TYPE_LIST_TABLE + else None, + source_field_id=source_field_id + if scan_type == SCAN_TYPE_LIST_TABLE + else None, + ) + + if not scan_all_workspaces and workspace_ids: + workspaces = Workspace.objects.filter(id__in=workspace_ids) + scan.workspaces.set(workspaces) + + if scan_type == SCAN_TYPE_LIST_OF_VALUES and list_items: + DataScanListItem.objects.bulk_create( + [DataScanListItem(scan=scan, value=v) for v in list_items] + ) + + return scan + + @staticmethod + def update_scan(user: AbstractUser, scan_id: int, **kwargs) -> DataScan: + """ + Updates an existing data scan and cleans up stale results when the + configuration changes in a way that invalidates them. + + :param user: The staff user performing the action. + :param scan_id: Primary key of the scan to update. + :param kwargs: Fields to update (name, scan_type, pattern, frequency, + scan_all_workspaces, workspace_ids, list_items, source_table_id, + source_field_id). + :return: The updated DataScan instance. + :raises DataScanDoesNotExist: When the scan is not found. + """ + + _check_data_scanner_access(user) + + try: + scan = DataScan.objects.select_for_update(of=("self",)).get(id=scan_id) + except DataScan.DoesNotExist: + raise DataScanDoesNotExist() + + if scan.is_running: + raise DataScanIsAlreadyRunning() + + simple_fields = [ + "name", + "scan_type", + "pattern", + "frequency", + "scan_all_workspaces", + "source_table_id", + "source_field_id", + ] + for field_name in simple_fields: + if field_name in kwargs: + setattr(scan, field_name, kwargs[field_name]) + scan.save() + + if "workspace_ids" in kwargs: + if scan.scan_all_workspaces: + scan.workspaces.clear() + else: + workspaces = Workspace.objects.filter(id__in=kwargs["workspace_ids"]) + scan.workspaces.set(workspaces) + + if "list_items" in kwargs: + scan.list_items.all().delete() + items = kwargs["list_items"] + if items: + DataScanListItem.objects.bulk_create( + [DataScanListItem(scan=scan, value=v) for v in items] + ) + + DataScannerHandler._cleanup_stale_results(scan, kwargs) + + return scan + + @staticmethod + def _cleanup_stale_results(scan: DataScan, kwargs: dict) -> None: + """ + Removes results that are no longer valid after a scan update. For + example, if the pattern or scan type changed, all existing results are + cleared. If list items changed, only results whose matched value is no + longer in the list are removed. + + :param scan: The scan whose results may need pruning. + :param kwargs: The update kwargs that were applied to the scan. + """ + + if "scan_type" in kwargs: + scan.results.all().delete() + return + + if "pattern" in kwargs and scan.scan_type == SCAN_TYPE_PATTERN: + scan.results.all().delete() + return + + if "list_items" in kwargs and scan.scan_type == SCAN_TYPE_LIST_OF_VALUES: + new_items = set(kwargs["list_items"] or []) + if not new_items: + scan.results.all().delete() + else: + scan.results.exclude(matched_value__in=new_items).delete() + + @staticmethod + def delete_scan(user: AbstractUser, scan_id: int) -> None: + """ + Deletes a data scan and all of its related objects. + + :param user: The staff user performing the action. + :param scan_id: Primary key of the scan to delete. + :raises DataScanDoesNotExist: When the scan is not found. + """ + + _check_data_scanner_access(user) + + try: + scan = DataScan.objects.select_for_update(of=("self",)).get(id=scan_id) + except DataScan.DoesNotExist: + raise DataScanDoesNotExist() + + if scan.is_running: + raise DataScanIsAlreadyRunning() + + scan.delete() + + @staticmethod + def list_scans(user: AbstractUser) -> QuerySet[DataScan]: + """ + Returns all data scans. Requires an enterprise license and staff + access. + + :param user: The staff user performing the action. + :return: A queryset of all DataScan instances. + """ + + _check_data_scanner_access(user) + return ( + DataScan.objects.annotate(results_count=Count("results")) + .prefetch_related( + "workspaces", + "list_items", + ) + .select_related( + "created_by", + "source_table__database__workspace", + ) + ) + + @staticmethod + def get_scan(user: AbstractUser, scan_id: int) -> DataScan: + """ + Returns a single data scan by its primary key. + + :param user: The staff user performing the action. + :param scan_id: Primary key of the scan. + :return: The DataScan instance. + :raises DataScanDoesNotExist: When the scan is not found. + """ + + _check_data_scanner_access(user) + + try: + return DataScan.objects.get(id=scan_id) + except DataScan.DoesNotExist: + raise DataScanDoesNotExist() + + @staticmethod + def delete_result(user: AbstractUser, result_id: int) -> None: + """ + Deletes (resolves) a single data scan result. + + :param user: The staff user performing the action. + :param result_id: Primary key of the result to delete. + :raises DataScanResult.DoesNotExist: When the result is not found. + """ + + _check_data_scanner_access(user) + + try: + result = DataScanResult.objects.get(id=result_id) + except DataScanResult.DoesNotExist: + raise DataScanResultDoesNotExist() + + result.delete() + + @staticmethod + def trigger_scan(user: AbstractUser, scan_id: int) -> DataScan: + """ + Queues an immediate asynchronous run of the given scan. + + :param user: The staff user triggering the scan. + :param scan_id: Primary key of the scan to trigger. + :return: The DataScan instance. + :raises DataScanDoesNotExist: When the scan is not found. + :raises DataScanIsAlreadyRunning: When the scan is already in progress. + """ + + _check_data_scanner_access(user) + + try: + scan = DataScan.objects.get(id=scan_id) + except DataScan.DoesNotExist: + raise DataScanDoesNotExist() + + if scan.is_running: + raise DataScanIsAlreadyRunning() + + run_data_scan.delay(scan_id) + return scan + + @staticmethod + def run_scan(scan_id: int) -> None: + """ + Executes the scan logic synchronously. Typically called from a Celery + task. Iterates over the relevant workspaces and searches for matches + using the workspace search tables. Results that were not re-identified + in this run are removed. + + :param scan_id: Primary key of the scan to execute. + """ + + try: + scan = DataScan.objects.get(id=scan_id) + except DataScan.DoesNotExist: + return + + now = timezone.now() + scan.is_running = True + scan.last_run_started_at = now + scan.last_error = None + scan.save(update_fields=["is_running", "last_run_started_at", "last_error"]) + + new_results_count = 0 + try: + if not LicenseHandler.instance_has_feature(DATA_SCANNER): + scan.last_error = "Enterprise license no longer active" + return + + if scan.scan_all_workspaces: + workspace_ids = list( + Workspace.objects.filter(trashed=False).values_list("id", flat=True) + ) + else: + workspace_ids = list( + scan.workspaces.filter(trashed=False).values_list("id", flat=True) + ) + + pre_computed: dict = {} + + if scan.scan_type == SCAN_TYPE_PATTERN: + regex = convert_pattern_to_regex(scan.pattern) + pre_computed["regex"] = regex + pre_computed["compiled"] = re.compile(regex, re.IGNORECASE) + + elif scan.scan_type == SCAN_TYPE_LIST_OF_VALUES: + pre_computed["values"] = list( + DataScanListItem.objects.filter(scan=scan).values_list( + "value", flat=True + ) + ) + + elif scan.scan_type == SCAN_TYPE_LIST_TABLE: + if not scan.source_table or not scan.source_field: + pre_computed["skip"] = True + else: + source_table = scan.source_table + source_field = scan.source_field + model = source_table.get_model() + field_name = source_field.db_column + values = list( + model.objects.values_list(field_name, flat=True).distinct() + ) + pre_computed["values"] = [str(v) for v in values if v] + pre_computed["exclude_table_id"] = source_table.id + + if not pre_computed.get("skip"): + # Compute trashed field exclusions once for all workspaces. Three + # separate indexed queries (one per trashed column) are combined with + # set union in Python, avoiding an OR that would prevent index usage. + trashed_field_ids = set( + Field.objects_and_trash.filter(trashed=True).values_list( + "id", flat=True + ) + ) + trashed_field_ids |= set( + Field.objects_and_trash.filter(table__trashed=True).values_list( + "id", flat=True + ) + ) + trashed_field_ids |= set( + Field.objects_and_trash.filter( + table__database__trashed=True + ).values_list("id", flat=True) + ) + + for workspace_id in workspace_ids: + if not SearchHandler.workspace_search_table_exists(workspace_id): + continue + + search_model = SearchHandler.get_workspace_search_table_model( + workspace_id + ) + + if scan.scan_type == SCAN_TYPE_PATTERN: + matches = ( + search_model.objects.annotate( + text_value=Cast("value", TextField()) + ) + .filter(text_value__iregex=pre_computed["regex"]) + .values_list("field_id", "row_id", "text_value") + ) + new_results_count += ( + DataScannerHandler._process_pattern_matches( + scan, + matches, + pre_computed["compiled"], + now, + trashed_field_ids, + ) + ) + elif scan.scan_type == SCAN_TYPE_LIST_OF_VALUES: + new_results_count += DataScannerHandler._run_list_scan( + scan, + search_model, + pre_computed["values"], + now, + trashed_field_ids, + ) + elif scan.scan_type == SCAN_TYPE_LIST_TABLE: + new_results_count += DataScannerHandler._run_list_scan( + scan, + search_model, + pre_computed["values"], + now, + trashed_field_ids, + exclude_table_id=pre_computed["exclude_table_id"], + ) + + scan.results.filter(last_identified_on__lt=now).delete() + + except Exception: + scan.last_error = traceback.format_exc() + finally: + scan.is_running = False + scan.last_run_finished_at = timezone.now() + scan.save( + update_fields=[ + "is_running", + "last_run_finished_at", + "last_error", + ] + ) + + if new_results_count > 0 and not scan.last_error: + from baserow_enterprise.data_scanner.notification_types import ( + DataScanNewResultsNotificationType, + ) + + DataScanNewResultsNotificationType.notify_instance_admins( + scan, new_results_count + ) + + @staticmethod + def _run_list_scan( + scan: DataScan, + search_model, + values: list[str], + now: datetime, + trashed_field_ids: set[int], + exclude_table_id: Optional[int] = None, + ) -> int: + """ + Searches the workspace search table for rows matching any of the given + values using PostgreSQL full-text search. Processes values in batches + and bulk-upserts results. + + :param scan: The scan being executed. + :param search_model: The Django model for the workspace search table. + :param values: The list of string values to search for. + :param now: The current timestamp used for result bookkeeping. + :param trashed_field_ids: Set of field IDs to exclude because the + field, table, or database is trashed. + :param exclude_table_id: When set, fields belonging to this table are + excluded from results (used for list_table scans to avoid matching + the source table itself). + :return: The number of newly created results. + """ + + excluded_field_ids: set[int] = set() + if exclude_table_id is not None: + excluded_field_ids = set( + Field.objects.filter(table_id=exclude_table_id).values_list( + "id", flat=True + ) + ) + + all_matches: list[tuple[int, int, str]] = [] + batch_size = 100 + for i in range(0, len(values), batch_size): + batch = values[i : i + batch_size] + + # Build a list of (sanitized_query, original_value) pairs, skipping values + # that produce an empty sanitized string. + sanitized_pairs: list[tuple[str, str]] = [] + for search_value in batch: + sanitized = SearchHandler.escape_postgres_query(search_value) + if sanitized: + sanitized_pairs.append((sanitized, search_value)) + + if not sanitized_pairs: + continue + + # Combine all sanitized values into a single OR tsquery so we + # execute one database query per batch instead of one per value. + # Each individual tsquery is wrapped in parentheses to preserve + # the phrase (<->) operator precedence within each value. + combined_raw = " | ".join(f"({s})" for s, _ in sanitized_pairs) + combined_query = SearchQuery( + combined_raw, + search_type="raw", + config=SearchHandler.search_config(), + ) + matches = ( + search_model.objects.filter(value=combined_query) + .annotate(text_value=Cast("value", TextField())) + .values_list("field_id", "row_id", "text_value") + ) + for field_id, row_id, text_value in matches: + if field_id in excluded_field_ids: + continue + matched_value = DataScannerHandler._find_list_match( + text_value, sanitized_pairs + ) + all_matches.append((field_id, row_id, matched_value)) + + return DataScannerHandler._bulk_upsert_results( + scan, all_matches, now, trashed_field_ids + ) + + @staticmethod + def _find_list_match( + tsvector_text: str, + sanitized_pairs: list[tuple[str, str]], + ) -> str: + """ + Given a tsvector text representation and the list of (sanitized_query, + original_value) pairs used to build the combined OR query, determines which + original value matched. Extracts tokens from the tsvector and checks which + query's terms are all present. + + :param tsvector_text: The text representation of a tsvector value. + :param sanitized_pairs: List of (sanitized_query, original_value). + :return: The original value that matched, or the first value as + fallback. + """ + + tokens = {m.group(1) for m in re.finditer(r"'([^']*)'", tsvector_text)} + for sanitized, original in sanitized_pairs: + # Extract bare terms from the sanitized tsquery, stripping dollar-quoting, + # positional operators, and wildcards. + terms = re.findall(r"\$\$([^$]+)\$\$", sanitized) + if terms and all(term.lower() in tokens for term in terms): + return original + return sanitized_pairs[0][1] + + @staticmethod + def _extract_matching_token(tsvector_text: str, compiled_regex: re.Pattern) -> str: + """ + Extracts the first matching token from a tsvector text representation. + + A tsvector cast to text looks like `'nl23ingb0001234321':2 'test':1,3`. + Each token is a single-quoted string followed by `:` and position info. + We test each token against the compiled pattern regex and return the + first match. The token is already lowercased by PostgreSQL. + + :param tsvector_text: The text representation of a tsvector value. + :param compiled_regex: A compiled regex to match tokens against. + :return: The first matching token, or the raw tsvector_text as fallback. + """ + + for m in re.finditer(r"'([^']*)'", tsvector_text): + token = m.group(1) + if compiled_regex.search(token): + return token + return tsvector_text + + @staticmethod + def _process_pattern_matches( + scan: DataScan, + matches, + compiled_regex: re.Pattern, + now: datetime, + trashed_field_ids: set[int], + ) -> int: + """ + Processes raw pattern matches from the database and bulk-upserts results. + + :param scan: The scan being executed. + :param matches: An iterable of (field_id, row_id, text_value) tuples. + :param compiled_regex: The compiled pattern regex. + :param now: The current timestamp used for result bookkeeping. + :param trashed_field_ids: Set of field IDs to exclude because the + field, table, or database is trashed. + :return: The number of newly created results. + """ + + all_matches: list[tuple[int, int, str]] = [] + for field_id, row_id, text_value in matches: + matched = DataScannerHandler._extract_matching_token( + text_value, compiled_regex + ) + all_matches.append((field_id, row_id, matched)) + + return DataScannerHandler._bulk_upsert_results( + scan, all_matches, now, trashed_field_ids + ) + + @staticmethod + def _bulk_upsert_results( + scan: DataScan, + matches: list[tuple[int, int, str]], + now: datetime, + trashed_field_ids: set[int], + ) -> int: + """ + Bulk-upserts DataScanResult rows for a list of matches. + + :param scan: The scan the results belong to. + :param matches: A list of (field_id, row_id, matched_value) tuples. + :param now: The current timestamp. + :param trashed_field_ids: Set of field IDs to exclude because the + field, table, or database is trashed. + :return: The number of newly created results. + """ + + if not matches: + return 0 + + # Build field_id -> table_id and table_id -> Table mappings in a single query + # using select_related to avoid per-table lookups later. + field_ids = {field_id for field_id, _, _ in matches} + field_to_table: dict[int, int] = {} + table_by_id: dict[int, Table] = {} + for field_obj in Field.objects_and_trash.filter( + id__in=field_ids + ).select_related("table"): + field_to_table[field_obj.id] = field_obj.table_id + table_by_id[field_obj.table_id] = field_obj.table + + # Filter out matches for fields that no longer exist or where the field, + # table, or database is trashed. This is done in Python against a small + # blocklist rather than adding a large `field_id__in` filter to the search + # query, which would be slower on instances with millions of fields. + valid_matches = [ + (field_id, row_id, matched_value) + for field_id, row_id, matched_value in matches + if field_id in field_to_table and field_id not in trashed_field_ids + ] + + # Filter out trashed rows. Query each table once for its (typically + # small) set of trashed row IDs. The intersection with matched row IDs + # is done in Python to avoid a potentially huge `id__in` clause. + trashed_row_ids: set[tuple[int, int]] = set() + for table_id, table in table_by_id.items(): + model = table.get_model() + trashed_ids = set( + model.objects_and_trash.filter(trashed=True).values_list( + "id", flat=True + ) + ) + for row_id in trashed_ids: + trashed_row_ids.add((table_id, row_id)) + + if trashed_row_ids: + valid_matches = [ + (field_id, row_id, matched_value) + for field_id, row_id, matched_value in valid_matches + if (field_to_table[field_id], row_id) not in trashed_row_ids + ] + + if not valid_matches: + return 0 + + count_before = DataScanResult.objects.filter(scan=scan).count() + + batch_size = 500 + for i in range(0, len(valid_matches), batch_size): + batch = valid_matches[i : i + batch_size] + objects = [ + DataScanResult( + scan=scan, + table_id=field_to_table[field_id], + field_id=field_id, + row_id=row_id, + matched_value=str(matched_value), + first_identified_on=now, + last_identified_on=now, + ) + for field_id, row_id, matched_value in batch + ] + DataScanResult.objects.bulk_create( + objects, + update_conflicts=True, + unique_fields=["scan", "table", "row_id", "field"], + update_fields=["matched_value", "last_identified_on"], + ) + + count_after = DataScanResult.objects.filter(scan=scan).count() + return count_after - count_before + + @staticmethod + def check_scans_due() -> None: + """ + Periodic check that resets stale running scans and dispatches any + scheduled scans whose interval has elapsed. Called by the + `check_data_scans_due` Celery beat task. + """ + + now = timezone.now() + + stale_threshold = now - timedelta(hours=STALE_SCAN_THRESHOLD_HOURS) + DataScan.objects.filter( + is_running=True, + last_run_started_at__lt=stale_threshold, + ).update( + is_running=False, + last_error="Scan timed out and was automatically reset", + ) + + if not LicenseHandler.instance_has_feature(DATA_SCANNER): + return + + scans = DataScan.objects.filter(is_running=False).exclude(frequency="manual") + for scan in scans: + interval = FREQUENCY_INTERVALS.get(scan.frequency) + if not interval: + continue + + if scan.last_run_started_at is None or ( + now - scan.last_run_started_at >= interval + ): + run_data_scan.delay(scan.id) diff --git a/enterprise/backend/src/baserow_enterprise/data_scanner/job_types.py b/enterprise/backend/src/baserow_enterprise/data_scanner/job_types.py new file mode 100644 index 0000000000..061ebb1d1f --- /dev/null +++ b/enterprise/backend/src/baserow_enterprise/data_scanner/job_types.py @@ -0,0 +1,279 @@ +from collections import OrderedDict +from uuid import uuid4 + +from django.core.paginator import Paginator +from django.db.models import QuerySet +from django.utils.translation import gettext as _ + +import unicodecsv as csv +from loguru import logger +from rest_framework import serializers + +from baserow.contrib.database.api.export.serializers import ( + SUPPORTED_CSV_COLUMN_SEPARATORS, + SUPPORTED_EXPORT_CHARSETS, + DisplayChoiceField, + ExportedFileURLSerializerMixin, +) +from baserow.contrib.database.export.handler import ( + ExportHandler, + _create_storage_dir_if_missing_and_open, +) +from baserow.core.jobs.registries import JobType +from baserow.core.storage import get_default_storage +from baserow.core.utils import ChildProgressBuilder, Progress +from baserow_enterprise.features import DATA_SCANNER +from baserow_premium.license.handler import LicenseHandler + +from .models import DataScanResult, DataScanResultExportJob + +DATA_SCAN_RESULT_CSV_COLUMN_NAMES = OrderedDict( + { + "scan_name": { + "field": "scan_name", + "descr": _("Scan Name"), + }, + "workspace_name": { + "field": "workspace_name", + "descr": _("Workspace Name"), + }, + "database_name": { + "field": "database_name", + "descr": _("Database Name"), + }, + "table_name": { + "field": "table_name", + "descr": _("Table Name"), + }, + "field_name": { + "field": "field_name", + "descr": _("Field Name"), + }, + "row_id": { + "field": "row_id", + "descr": _("Row ID"), + }, + "matched_value": { + "field": "matched_value", + "descr": _("Matched Value"), + }, + "first_identified_on": { + "field": "first_identified_on", + "descr": _("First Identified On"), + }, + "last_identified_on": { + "field": "last_identified_on", + "descr": _("Last Identified On"), + }, + } +) + + +class DataScanResultExportJobType(JobType): + type = "data_scan_result_export" + model_class = DataScanResultExportJob + max_count = 1 + + serializer_mixins = [ExportedFileURLSerializerMixin] + request_serializer_field_names = [ + "csv_column_separator", + "csv_first_row_header", + "export_charset", + "filter_scan_id", + ] + + serializer_field_names = [ + *request_serializer_field_names, + "created_on", + "exported_file_name", + "url", + ] + base_serializer_field_overrides = { + "export_charset": DisplayChoiceField( + choices=SUPPORTED_EXPORT_CHARSETS, + default="utf-8", + help_text="The character set to use when creating the export file.", + ), + "csv_column_separator": DisplayChoiceField( + choices=SUPPORTED_CSV_COLUMN_SEPARATORS, + default=",", + help_text="The value used to separate columns in the resulting csv file.", + ), + "csv_first_row_header": serializers.BooleanField( + default=True, + help_text="Whether or not to generate a header row at the top of the csv file.", + ), + "filter_scan_id": serializers.IntegerField( + min_value=0, + required=False, + help_text="Optional: Filter results by scan ID.", + ), + } + request_serializer_field_overrides = { + **base_serializer_field_overrides, + } + serializer_field_overrides = { + **base_serializer_field_overrides, + "created_on": serializers.DateTimeField( + read_only=True, + help_text="The date and time when the export job was created.", + ), + "exported_file_name": serializers.CharField( + read_only=True, + help_text="The name of the file that was created by the export job.", + ), + "url": serializers.SerializerMethodField( + help_text="The URL to download the exported file.", + ), + } + + def before_delete(self, job: DataScanResultExportJob) -> None: + """ + Deletes the exported CSV file from storage before the job row is + removed. + + :param job: The export job about to be deleted. + """ + + if not job.exported_file_name: + return + + storage = get_default_storage() + storage_location = ExportHandler.export_file_path(job.exported_file_name) + try: + storage.delete(storage_location) + except FileNotFoundError: + logger.error( + "Could not delete file %s for 'data_scan_result_export' job %s", + storage_location, + job.id, + ) + + @staticmethod + def _safe_attr(obj, *attrs, default="") -> str: + """ + Traverses a chain of attributes, returning `default` if any link + raises AttributeError. + """ + + try: + for attr in attrs: + obj = getattr(obj, attr) + return obj + except AttributeError: + return default + + def _get_row_data(self, result: DataScanResult) -> dict: + """ + Extracts a dict of field values from a DataScanResult, with + AttributeError protection for nullable nested relations. + + :param result: A DataScanResult instance. + :return: A dict keyed by CSV column name with string/int/datetime values. + """ + + return { + "scan_name": self._safe_attr(result, "scan", "name"), + "workspace_name": self._safe_attr( + result, "field", "table", "database", "workspace", "name" + ), + "database_name": self._safe_attr( + result, "field", "table", "database", "name" + ), + "table_name": self._safe_attr(result, "field", "table", "name"), + "field_name": self._safe_attr(result, "field", "name"), + "row_id": result.row_id, + "matched_value": result.matched_value, + "first_identified_on": result.first_identified_on, + "last_identified_on": result.last_identified_on, + } + + def write_rows( + self, + job: DataScanResultExportJob, + file, + queryset: QuerySet, + progress, + ) -> None: + """ + Writes all result rows from the queryset to the CSV file. + + :param job: The export job with CSV formatting options. + :param file: A writable binary file handle. + :param queryset: The queryset of DataScanResult rows to export. + :param progress: Progress tracker for reporting export advancement. + """ + + # add BOM to support utf-8 CSVs in MS Excel (for Windows only) + if job.export_charset == "utf-8": + file.write(b"\xef\xbb\xbf") + + field_header_mapping = { + k: v["descr"] for (k, v) in DATA_SCAN_RESULT_CSV_COLUMN_NAMES.items() + } + + writer = csv.writer( + file, + field_header_mapping.values(), + encoding=job.export_charset, + delimiter=job.csv_column_separator, + ) + + if job.csv_first_row_header: + writer.writerow(field_header_mapping.values()) + + fields = [v["field"] for v in DATA_SCAN_RESULT_CSV_COLUMN_NAMES.values()] + paginator = Paginator(queryset.all(), 2000) + export_progress = ChildProgressBuilder.build( + progress.create_child_builder(represents_progress=progress.total), + paginator.num_pages, + ) + + for page in paginator.page_range: + rows = [] + for result in paginator.page(page).object_list: + row_data = self._get_row_data(result) + rows.append([row_data[field] for field in fields]) + writer.writerows(rows) + export_progress.increment() + + def get_filtered_queryset(self, job: DataScanResultExportJob) -> QuerySet: + """ + Returns the queryset of DataScanResult rows to export, applying + any filters configured on the job. + + :param job: The export job whose filters should be applied. + :return: A filtered and ordered queryset of DataScanResult instances. + """ + + queryset = DataScanResult.objects.select_related( + "scan", "table__database", "field__table__database__workspace" + ).order_by("-first_identified_on") + + if job.filter_scan_id is not None: + queryset = queryset.filter(scan_id=job.filter_scan_id) + + return queryset + + def run(self, job: DataScanResultExportJob, progress: Progress) -> None: + """ + Export the filtered data scan results to a CSV file. + + :param job: The job that is currently being executed. + :param progress: The progress object that can be used to update the + progress bar. + """ + + LicenseHandler.raise_if_user_doesnt_have_feature_instance_wide( + DATA_SCANNER, job.user + ) + + queryset = self.get_filtered_queryset(job) + + filename = f"data_scan_results_{uuid4().hex[:8]}.csv" + storage_location = ExportHandler.export_file_path(filename) + with _create_storage_dir_if_missing_and_open(storage_location) as file: + self.write_rows(job, file, queryset, progress) + + job.exported_file_name = filename + job.save() diff --git a/enterprise/backend/src/baserow_enterprise/data_scanner/models.py b/enterprise/backend/src/baserow_enterprise/data_scanner/models.py new file mode 100644 index 0000000000..91d7a4b8cf --- /dev/null +++ b/enterprise/backend/src/baserow_enterprise/data_scanner/models.py @@ -0,0 +1,110 @@ +from django.conf import settings +from django.db import models + +from baserow.contrib.database.api.export.serializers import ( + SUPPORTED_CSV_COLUMN_SEPARATORS, + SUPPORTED_EXPORT_CHARSETS, +) +from baserow.contrib.database.fields.models import Field +from baserow.contrib.database.table.models import Table +from baserow.core.jobs.models import Job +from baserow.core.models import Workspace +from baserow_enterprise.data_scanner.constants import ( + SCAN_TYPE_LIST_OF_VALUES, + SCAN_TYPE_LIST_TABLE, + SCAN_TYPE_PATTERN, +) + + +class DataScan(models.Model): + SCAN_TYPE_CHOICES = [ + (SCAN_TYPE_PATTERN, "Pattern"), + (SCAN_TYPE_LIST_OF_VALUES, "List of values"), + (SCAN_TYPE_LIST_TABLE, "List Table"), + ] + + FREQUENCY_CHOICES = [ + ("manual", "Manual"), + ("hourly", "Hourly"), + ("daily", "Daily"), + ("weekly", "Weekly"), + ] + + name = models.CharField(max_length=255) + scan_type = models.CharField(max_length=20, choices=SCAN_TYPE_CHOICES) + pattern = models.TextField(null=True, blank=True) + frequency = models.CharField( + max_length=10, choices=FREQUENCY_CHOICES, default="manual" + ) + scan_all_workspaces = models.BooleanField(default=True) + workspaces = models.ManyToManyField(Workspace, blank=True) + is_running = models.BooleanField(default=False) + last_run_started_at = models.DateTimeField(null=True, blank=True) + last_run_finished_at = models.DateTimeField(null=True, blank=True) + last_error = models.TextField(null=True, blank=True) + created_by = models.ForeignKey( + settings.AUTH_USER_MODEL, + on_delete=models.SET_NULL, + null=True, + blank=True, + ) + created_on = models.DateTimeField(auto_now_add=True) + updated_on = models.DateTimeField(auto_now=True) + source_table = models.ForeignKey( + Table, on_delete=models.SET_NULL, null=True, blank=True + ) + source_field = models.ForeignKey( + Field, on_delete=models.SET_NULL, null=True, blank=True + ) + + class Meta: + ordering = ["-created_on"] + + def __str__(self): + return self.name + + +class DataScanListItem(models.Model): + scan = models.ForeignKey( + DataScan, on_delete=models.CASCADE, related_name="list_items" + ) + value = models.TextField() + + def __str__(self): + return self.value + + +class DataScanResult(models.Model): + scan = models.ForeignKey(DataScan, on_delete=models.CASCADE, related_name="results") + table = models.ForeignKey(Table, on_delete=models.CASCADE) + field = models.ForeignKey(Field, on_delete=models.CASCADE) + row_id = models.IntegerField() + matched_value = models.TextField() + first_identified_on = models.DateTimeField(db_index=True) + last_identified_on = models.DateTimeField() + + class Meta: + unique_together = [("scan", "table", "row_id", "field")] + ordering = ["-first_identified_on"] + indexes = [ + models.Index(fields=["scan", "first_identified_on"]), + ] + + def __str__(self): + return f"Result(scan={self.scan_id}, table={self.table_id}, row={self.row_id})" + + +class DataScanResultExportJob(Job): + export_charset = models.CharField( + max_length=32, + choices=SUPPORTED_EXPORT_CHARSETS, + default="utf-8", + ) + csv_column_separator = models.CharField( + max_length=32, + choices=SUPPORTED_CSV_COLUMN_SEPARATORS, + default=",", + ) + csv_first_row_header = models.BooleanField(default=True) + filter_scan_id = models.PositiveIntegerField(null=True) + exported_file_name = models.TextField(null=True) diff --git a/enterprise/backend/src/baserow_enterprise/data_scanner/notification_types.py b/enterprise/backend/src/baserow_enterprise/data_scanner/notification_types.py new file mode 100644 index 0000000000..4d261d3763 --- /dev/null +++ b/enterprise/backend/src/baserow_enterprise/data_scanner/notification_types.py @@ -0,0 +1,97 @@ +from dataclasses import asdict, dataclass +from typing import List, Optional +from urllib.parse import urlencode, urljoin + +from django.conf import settings +from django.contrib.auth import get_user_model +from django.utils.translation import ngettext + +from baserow.core.notifications.handler import NotificationHandler +from baserow.core.notifications.models import Notification, NotificationRecipient +from baserow.core.notifications.registries import ( + EmailNotificationTypeMixin, + NotificationType, +) + +from .models import DataScan + +User = get_user_model() + + +@dataclass +class DataScanNewResultsData: + scan_id: int + scan_name: str + new_results_count: int + + @classmethod + def from_scan(cls, scan: DataScan, new_results_count: int): + return cls( + scan_id=scan.id, + scan_name=scan.name, + new_results_count=new_results_count, + ) + + +class DataScanNewResultsNotificationType(EmailNotificationTypeMixin, NotificationType): + type = "data_scan_new_results" + has_web_frontend_route = True + + def get_web_frontend_url(self, notification: Notification) -> str: + base_url = settings.BASEROW_EMBEDDED_SHARE_URL + query = urlencode( + { + "scan_id": notification.data.get("scan_id", ""), + "scan_name": notification.data.get("scan_name", ""), + } + ) + return urljoin(base_url, f"/admin/data-scanner/results?{query}") + + @classmethod + def notify_instance_admins( + cls, scan: DataScan, new_results_count: int + ) -> Optional[List[NotificationRecipient]]: + """ + Sends a notification to all instance admins (staff users) informing + them that new data scan results have been found. + + :param scan: The data scan that produced new results. + :param new_results_count: The number of new results found in this run. + :return: The list of created notification recipients, or None. + """ + + admins = User.objects.filter(is_staff=True, is_active=True) + if not admins.exists(): + return None + + return NotificationHandler.create_direct_notification_for_users( + notification_type=cls.type, + recipients=list(admins), + data=asdict(DataScanNewResultsData.from_scan(scan, new_results_count)), + sender=None, + workspace=None, + ) + + @classmethod + def get_notification_title_for_email(cls, notification, context) -> str: + count = notification.data.get("new_results_count", 0) + scan_name = notification.data.get("scan_name", "") + return ngettext( + "%(count)d new result found for %(scan_name)s", + "%(count)d new results found for %(scan_name)s", + count, + ) % {"count": count, "scan_name": scan_name} + + @classmethod + def get_notification_description_for_email( + cls, notification, context + ) -> Optional[str]: + count = notification.data.get("new_results_count", 0) + scan_name = notification.data.get("scan_name", "") + return ngettext( + 'The data scanner "%(scan_name)s" found %(count)d new match ' + "during its latest run. Review the results in the admin panel.", + 'The data scanner "%(scan_name)s" found %(count)d new matches ' + "during its latest run. Review the results in the admin panel.", + count, + ) % {"count": count, "scan_name": scan_name} diff --git a/enterprise/backend/src/baserow_enterprise/data_scanner/tasks.py b/enterprise/backend/src/baserow_enterprise/data_scanner/tasks.py new file mode 100644 index 0000000000..b978916ab6 --- /dev/null +++ b/enterprise/backend/src/baserow_enterprise/data_scanner/tasks.py @@ -0,0 +1,56 @@ +from datetime import timedelta + +from celery_singleton import Singleton + +from baserow.config.celery import app +from baserow_enterprise.data_scanner.constants import STALE_SCAN_THRESHOLD_HOURS + +SCAN_TIME_LIMIT = STALE_SCAN_THRESHOLD_HOURS * 3600 # 2 hours in seconds +CHECK_TIME_LIMIT = 15 * 60 # 15 minutes in seconds + + +@app.task( + bind=True, + queue="export", + base=Singleton, + unique_on="scan_id", + raise_on_duplicate=False, + lock_expiry=SCAN_TIME_LIMIT, + soft_time_limit=SCAN_TIME_LIMIT, + time_limit=SCAN_TIME_LIMIT, +) +def run_data_scan(self, scan_id: int) -> None: + """ + Celery task that executes a single data scan. + + :param scan_id: Primary key of the DataScan to run. + """ + + from baserow_enterprise.data_scanner.handler import DataScannerHandler + + DataScannerHandler.run_scan(scan_id) + + +@app.task( + bind=True, + queue="export", + base=Singleton, + raise_on_duplicate=False, + lock_expiry=CHECK_TIME_LIMIT, + soft_time_limit=CHECK_TIME_LIMIT, + time_limit=CHECK_TIME_LIMIT, +) +def check_data_scans_due(self) -> None: + """ + Periodic Celery task that checks for scheduled scans whose interval has + elapsed and dispatches them. Also resets stale running scans. + """ + + from baserow_enterprise.data_scanner.handler import DataScannerHandler + + DataScannerHandler.check_scans_due() + + +@app.on_after_finalize.connect +def setup_periodic_data_scanner_tasks(sender, **kwargs) -> None: + sender.add_periodic_task(timedelta(minutes=15), check_data_scans_due.s()) diff --git a/enterprise/backend/src/baserow_enterprise/features.py b/enterprise/backend/src/baserow_enterprise/features.py index 9dc59be75d..fef1e0f013 100644 --- a/enterprise/backend/src/baserow_enterprise/features.py +++ b/enterprise/backend/src/baserow_enterprise/features.py @@ -16,3 +16,4 @@ BUILDER_CUSTOM_CODE = "builder_custom_code" DATE_DEPENDENCY = "date_dependency" +DATA_SCANNER = "data_scanner" diff --git a/enterprise/backend/src/baserow_enterprise/license_types.py b/enterprise/backend/src/baserow_enterprise/license_types.py index f78f9e7d45..8bd750c5bf 100755 --- a/enterprise/backend/src/baserow_enterprise/license_types.py +++ b/enterprise/backend/src/baserow_enterprise/license_types.py @@ -8,6 +8,7 @@ BUILDER_FILE_INPUT, BUILDER_NO_BRANDING, BUILDER_SSO, + DATA_SCANNER, DATA_SYNC, DATE_DEPENDENCY, ENTERPRISE_SETTINGS, @@ -102,6 +103,7 @@ class EnterpriseWithoutSupportLicenseType(AdvancedLicenseType): *COMMON_ADVANCED_FEATURES, ENTERPRISE_SETTINGS, SECURE_FILE_SERVE, + DATA_SCANNER, ] def handle_seat_overflow(self, seats_taken: int, license_object: License): diff --git a/enterprise/backend/src/baserow_enterprise/locale/en/LC_MESSAGES/django.po b/enterprise/backend/src/baserow_enterprise/locale/en/LC_MESSAGES/django.po index 2d9b9d358c..25fb169057 100644 --- a/enterprise/backend/src/baserow_enterprise/locale/en/LC_MESSAGES/django.po +++ b/enterprise/backend/src/baserow_enterprise/locale/en/LC_MESSAGES/django.po @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: PACKAGE VERSION\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2026-03-16 14:50+0000\n" +"POT-Creation-Date: 2026-03-16 22:06+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" @@ -187,6 +187,87 @@ msgstr "" msgid "REDONE" msgstr "" +#: src/baserow_enterprise/data_scanner/actions.py:18 +msgid "Create data scan" +msgstr "" + +#: src/baserow_enterprise/data_scanner/actions.py:19 +#, python-format +msgid "Data scan \"%(scan_name)s\" (%(scan_id)s) created" +msgstr "" + +#: src/baserow_enterprise/data_scanner/actions.py:45 +msgid "Update data scan" +msgstr "" + +#: src/baserow_enterprise/data_scanner/actions.py:46 +#, python-format +msgid "Data scan \"%(scan_name)s\" (%(scan_id)s) updated" +msgstr "" + +#: src/baserow_enterprise/data_scanner/actions.py:72 +msgid "Delete data scan" +msgstr "" + +#: src/baserow_enterprise/data_scanner/actions.py:73 +#, python-format +msgid "Data scan \"%(scan_name)s\" (%(scan_id)s) deleted" +msgstr "" + +#: src/baserow_enterprise/data_scanner/job_types.py:34 +msgid "Scan Name" +msgstr "" + +#: src/baserow_enterprise/data_scanner/job_types.py:38 +msgid "Workspace Name" +msgstr "" + +#: src/baserow_enterprise/data_scanner/job_types.py:42 +msgid "Database Name" +msgstr "" + +#: src/baserow_enterprise/data_scanner/job_types.py:46 +msgid "Table Name" +msgstr "" + +#: src/baserow_enterprise/data_scanner/job_types.py:50 +msgid "Field Name" +msgstr "" + +#: src/baserow_enterprise/data_scanner/job_types.py:54 +msgid "Row ID" +msgstr "" + +#: src/baserow_enterprise/data_scanner/job_types.py:58 +msgid "Matched Value" +msgstr "" + +#: src/baserow_enterprise/data_scanner/job_types.py:62 +msgid "First Identified On" +msgstr "" + +#: src/baserow_enterprise/data_scanner/job_types.py:66 +msgid "Last Identified On" +msgstr "" + +#: src/baserow_enterprise/data_scanner/notification_types.py:80 +#, python-format +msgid "%(count)d new result found for %(scan_name)s" +msgid_plural "%(count)d new results found for %(scan_name)s" +msgstr[0] "" +msgstr[1] "" + +#: src/baserow_enterprise/data_scanner/notification_types.py:92 +#, python-format +msgid "" +"The data scanner \"%(scan_name)s\" found %(count)d new match during its " +"latest run. Review the results in the admin panel." +msgid_plural "" +"The data scanner \"%(scan_name)s\" found %(count)d new matches during its " +"latest run. Review the results in the admin panel." +msgstr[0] "" +msgstr[1] "" + #: src/baserow_enterprise/data_sync/actions.py:21 msgid "Update periodic data sync interval" msgstr "" diff --git a/enterprise/backend/src/baserow_enterprise/migrations/0059_datascanresultexportjob_datascan_datascanlistitem_and_more.py b/enterprise/backend/src/baserow_enterprise/migrations/0059_datascanresultexportjob_datascan_datascanlistitem_and_more.py new file mode 100644 index 0000000000..ae49782d08 --- /dev/null +++ b/enterprise/backend/src/baserow_enterprise/migrations/0059_datascanresultexportjob_datascan_datascanlistitem_and_more.py @@ -0,0 +1,246 @@ +# Generated by Django 5.2.12 on 2026-03-24 09:07 + +import django.db.models.deletion +from django.conf import settings +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("baserow_enterprise", "0058_assistantchat_message_history"), + ("core", "0113_alter_notification_options_and_more"), + ("database", "0206_rowhistory_database_ro_action__6ea699_idx"), + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + migrations.CreateModel( + name="DataScanResultExportJob", + fields=[ + ( + "job_ptr", + models.OneToOneField( + auto_created=True, + on_delete=django.db.models.deletion.CASCADE, + parent_link=True, + primary_key=True, + serialize=False, + to="core.job", + ), + ), + ( + "export_charset", + models.CharField( + choices=[ + ("utf-8", "utf-8"), + ("iso-8859-6", "iso-8859-6"), + ("windows-1256", "windows-1256"), + ("iso-8859-4", "iso-8859-4"), + ("windows-1257", "windows-1257"), + ("iso-8859-14", "iso-8859-14"), + ("iso-8859-2", "iso-8859-2"), + ("windows-1250", "windows-1250"), + ("gbk", "gbk"), + ("gb18030", "gb18030"), + ("big5", "big5"), + ("koi8-r", "koi8-r"), + ("koi8-u", "koi8-u"), + ("iso-8859-5", "iso-8859-5"), + ("windows-1251", "windows-1251"), + ("x-mac-cyrillic", "mac-cyrillic"), + ("iso-8859-7", "iso-8859-7"), + ("windows-1253", "windows-1253"), + ("iso-8859-8", "iso-8859-8"), + ("windows-1255", "windows-1255"), + ("euc-jp", "euc-jp"), + ("iso-2022-jp", "iso-2022-jp"), + ("shift-jis", "shift-jis"), + ("euc-kr", "euc-kr"), + ("macintosh", "macintosh"), + ("iso-8859-10", "iso-8859-10"), + ("iso-8859-16", "iso-8859-16"), + ("windows-874", "cp874"), + ("windows-1254", "windows-1254"), + ("windows-1258", "windows-1258"), + ("iso-8859-1", "iso-8859-1"), + ("windows-1252", "windows-1252"), + ("iso-8859-3", "iso-8859-3"), + ], + default="utf-8", + max_length=32, + ), + ), + ( + "csv_column_separator", + models.CharField( + choices=[ + (",", ","), + (";", ";"), + ("|", "|"), + ("tab", "\t"), + ("record_separator", "\x1e"), + ("unit_separator", "\x1f"), + ], + default=",", + max_length=32, + ), + ), + ("csv_first_row_header", models.BooleanField(default=True)), + ("filter_scan_id", models.PositiveIntegerField(null=True)), + ("exported_file_name", models.TextField(null=True)), + ], + options={ + "abstract": False, + }, + bases=("core.job",), + ), + migrations.CreateModel( + name="DataScan", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("name", models.CharField(max_length=255)), + ( + "scan_type", + models.CharField( + choices=[ + ("pattern", "Pattern"), + ("list_of_values", "List of values"), + ("list_table", "List Table"), + ], + max_length=20, + ), + ), + ("pattern", models.TextField(blank=True, null=True)), + ( + "frequency", + models.CharField( + choices=[ + ("manual", "Manual"), + ("hourly", "Hourly"), + ("daily", "Daily"), + ("weekly", "Weekly"), + ], + default="manual", + max_length=10, + ), + ), + ("scan_all_workspaces", models.BooleanField(default=True)), + ("is_running", models.BooleanField(default=False)), + ("last_run_started_at", models.DateTimeField(blank=True, null=True)), + ("last_run_finished_at", models.DateTimeField(blank=True, null=True)), + ("last_error", models.TextField(blank=True, null=True)), + ("created_on", models.DateTimeField(auto_now_add=True)), + ("updated_on", models.DateTimeField(auto_now=True)), + ( + "created_by", + models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + to=settings.AUTH_USER_MODEL, + ), + ), + ( + "source_field", + models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + to="database.field", + ), + ), + ( + "source_table", + models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + to="database.table", + ), + ), + ("workspaces", models.ManyToManyField(blank=True, to="core.workspace")), + ], + options={ + "ordering": ["-created_on"], + }, + ), + migrations.CreateModel( + name="DataScanListItem", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("value", models.TextField()), + ( + "scan", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="list_items", + to="baserow_enterprise.datascan", + ), + ), + ], + ), + migrations.CreateModel( + name="DataScanResult", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("row_id", models.IntegerField()), + ("matched_value", models.TextField()), + ("first_identified_on", models.DateTimeField(db_index=True)), + ("last_identified_on", models.DateTimeField()), + ( + "field", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, to="database.field" + ), + ), + ( + "scan", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="results", + to="baserow_enterprise.datascan", + ), + ), + ( + "table", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, to="database.table" + ), + ), + ], + options={ + "ordering": ["-first_identified_on"], + "indexes": [ + models.Index( + fields=["scan", "first_identified_on"], + name="baserow_ent_scan_id_694735_idx", + ) + ], + "unique_together": {("scan", "table", "row_id", "field")}, + }, + ), + ] diff --git a/enterprise/backend/tests/baserow_enterprise_tests/api/admin/data_scanner/test_data_scanner_views.py b/enterprise/backend/tests/baserow_enterprise_tests/api/admin/data_scanner/test_data_scanner_views.py new file mode 100644 index 0000000000..7e9f384236 --- /dev/null +++ b/enterprise/backend/tests/baserow_enterprise_tests/api/admin/data_scanner/test_data_scanner_views.py @@ -0,0 +1,1703 @@ +from io import BytesIO +from unittest.mock import MagicMock, patch + +from django.db import connection +from django.shortcuts import reverse +from django.test.utils import CaptureQueriesContext, override_settings +from django.utils import timezone + +import pytest +from rest_framework.status import ( + HTTP_200_OK, + HTTP_202_ACCEPTED, + HTTP_204_NO_CONTENT, + HTTP_400_BAD_REQUEST, + HTTP_401_UNAUTHORIZED, + HTTP_402_PAYMENT_REQUIRED, + HTTP_403_FORBIDDEN, + HTTP_404_NOT_FOUND, + HTTP_409_CONFLICT, +) + +from baserow.core.jobs.constants import JOB_FINISHED +from baserow.core.jobs.handler import JobHandler +from baserow.test_utils.helpers import AnyStr +from baserow_enterprise.data_scanner.job_types import DataScanResultExportJobType +from baserow_enterprise.data_scanner.models import DataScan, DataScanResult + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_list_scans_unauthenticated(api_client): + response = api_client.get( + reverse("api:enterprise:admin:data_scanner:list"), + format="json", + ) + assert response.status_code == HTTP_401_UNAUTHORIZED + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_list_scans_non_staff(api_client, enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + _, token = enterprise_data_fixture.create_user_and_token(is_staff=False) + + response = api_client.get( + reverse("api:enterprise:admin:data_scanner:list"), + format="json", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == HTTP_403_FORBIDDEN + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_list_scans_without_enterprise_license(api_client, enterprise_data_fixture): + _, token = enterprise_data_fixture.create_user_and_token(is_staff=True) + + response = api_client.get( + reverse("api:enterprise:admin:data_scanner:list"), + format="json", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == HTTP_402_PAYMENT_REQUIRED + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_list_scans(api_client, enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user, token = enterprise_data_fixture.create_user_and_token(is_staff=True) + + scan = DataScan.objects.create( + name="Scan 1", scan_type="pattern", pattern="AA", created_by=user + ) + DataScan.objects.create(name="Scan 2", scan_type="list_of_values", created_by=user) + + response = api_client.get( + reverse("api:enterprise:admin:data_scanner:list"), + format="json", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == HTTP_200_OK + data = response.json() + assert data["count"] == 2 + assert data["results"][0] == { + "id": scan.id, + "name": "Scan 1", + "scan_type": "pattern", + "pattern": "AA", + "frequency": "manual", + "scan_all_workspaces": True, + "workspace_ids": [], + "is_running": False, + "last_run_started_at": None, + "last_run_finished_at": None, + "last_error": None, + "list_items": [], + "results_count": 0, + "source_table_id": None, + "source_field_id": None, + "source_workspace_id": None, + "source_database_id": None, + "created_on": AnyStr(), + "updated_on": AnyStr(), + } + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_list_scans_search(api_client, enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user, token = enterprise_data_fixture.create_user_and_token(is_staff=True) + + DataScan.objects.create( + name="IBAN Scanner", scan_type="pattern", pattern="AA", created_by=user + ) + DataScan.objects.create( + name="Email Scanner", scan_type="pattern", pattern="99", created_by=user + ) + + response = api_client.get( + reverse("api:enterprise:admin:data_scanner:list"), + {"search": "IBAN"}, + format="json", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == HTTP_200_OK + assert response.json()["count"] == 1 + assert response.json()["results"][0]["name"] == "IBAN Scanner" + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_list_scans_query_count_is_constant(api_client, enterprise_data_fixture): + """ + The number of queries when listing scans must not grow with the number of + scans (no N+1). Adding more scans should not increase the query count. + """ + + enterprise_data_fixture.enable_enterprise() + user, token = enterprise_data_fixture.create_user_and_token(is_staff=True) + workspace = enterprise_data_fixture.create_workspace(user=user) + + table, fields, rows = enterprise_data_fixture.build_table( + user=user, + columns=[("Name", "text")], + rows=[["value"]], + ) + + DataScan.objects.create( + name="Scan 1", scan_type="pattern", pattern="AA", created_by=user + ) + scan2 = DataScan.objects.create( + name="Scan 2", scan_type="list_of_values", created_by=user + ) + scan2.workspaces.add(workspace) + + DataScanResult.objects.create( + scan=scan2, + table=table, + field=fields[0], + row_id=1, + matched_value="test", + first_identified_on=timezone.now(), + last_identified_on=timezone.now(), + ) + + url = reverse("api:enterprise:admin:data_scanner:list") + + with CaptureQueriesContext(connection) as captured_2_scans: + response = api_client.get(url, format="json", HTTP_AUTHORIZATION=f"JWT {token}") + assert response.status_code == HTTP_200_OK + assert response.json()["count"] == 2 + + num_queries_2 = len(captured_2_scans) + + for i in range(3, 8): + DataScan.objects.create( + name=f"Scan {i}", scan_type="pattern", pattern="DD", created_by=user + ) + + with CaptureQueriesContext(connection) as captured_7_scans: + response = api_client.get(url, format="json", HTTP_AUTHORIZATION=f"JWT {token}") + assert response.status_code == HTTP_200_OK + assert response.json()["count"] == 7 + + num_queries_7 = len(captured_7_scans) + + assert num_queries_7 == num_queries_2, ( + f"Query count grew from {num_queries_2} to {num_queries_7} when adding " + f"more scans — likely an N+1 problem." + ) + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_create_scan_unauthenticated(api_client): + response = api_client.post( + reverse("api:enterprise:admin:data_scanner:list"), + {"name": "Test", "scan_type": "pattern", "pattern": "AA"}, + format="json", + ) + assert response.status_code == HTTP_401_UNAUTHORIZED + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_create_scan_non_staff(api_client, enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + _, token = enterprise_data_fixture.create_user_and_token(is_staff=False) + + response = api_client.post( + reverse("api:enterprise:admin:data_scanner:list"), + {"name": "Test", "scan_type": "pattern", "pattern": "AA"}, + format="json", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == HTTP_403_FORBIDDEN + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_create_scan_without_license(api_client, enterprise_data_fixture): + _, token = enterprise_data_fixture.create_user_and_token(is_staff=True) + + response = api_client.post( + reverse("api:enterprise:admin:data_scanner:list"), + {"name": "Test", "scan_type": "pattern", "pattern": "AA"}, + format="json", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == HTTP_402_PAYMENT_REQUIRED + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_create_scan_pattern(api_client, enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + _, token = enterprise_data_fixture.create_user_and_token(is_staff=True) + + response = api_client.post( + reverse("api:enterprise:admin:data_scanner:list"), + { + "name": "IBAN Scan", + "scan_type": "pattern", + "pattern": "AADDAAAADDDDDDDDDD", + "frequency": "daily", + }, + format="json", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == HTTP_200_OK + data = response.json() + assert data["name"] == "IBAN Scan" + assert data["scan_type"] == "pattern" + assert data["pattern"] == "AADDAAAADDDDDDDDDD" + assert data["frequency"] == "daily" + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_create_scan_list_of_values(api_client, enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + _, token = enterprise_data_fixture.create_user_and_token(is_staff=True) + + response = api_client.post( + reverse("api:enterprise:admin:data_scanner:list"), + { + "name": "List Scan", + "scan_type": "list_of_values", + "list_items": ["val1", "val2"], + }, + format="json", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == HTTP_200_OK + data = response.json() + assert data["scan_type"] == "list_of_values" + assert data["list_items"] == ["val1", "val2"] + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_create_scan_list_table(api_client, enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user, token = enterprise_data_fixture.create_user_and_token(is_staff=True) + table, fields, _ = enterprise_data_fixture.build_table( + user=user, + columns=[("Name", "text")], + rows=[["test"]], + ) + + response = api_client.post( + reverse("api:enterprise:admin:data_scanner:list"), + { + "name": "Table Scan", + "scan_type": "list_table", + "source_table_id": table.id, + "source_field_id": fields[0].id, + }, + format="json", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == HTTP_200_OK + data = response.json() + assert data["source_table_id"] == table.id + assert data["source_field_id"] == fields[0].id + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_create_scan_with_specific_workspaces(api_client, enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user, token = enterprise_data_fixture.create_user_and_token(is_staff=True) + ws = enterprise_data_fixture.create_workspace(user=user) + + response = api_client.post( + reverse("api:enterprise:admin:data_scanner:list"), + { + "name": "WS Scan", + "scan_type": "pattern", + "pattern": "AA", + "scan_all_workspaces": False, + "workspace_ids": [ws.id], + }, + format="json", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == HTTP_200_OK + data = response.json() + assert data["scan_all_workspaces"] is False + assert data["workspace_ids"] == [ws.id] + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_create_scan_pattern_missing_pattern(api_client, enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + _, token = enterprise_data_fixture.create_user_and_token(is_staff=True) + + response = api_client.post( + reverse("api:enterprise:admin:data_scanner:list"), + {"name": "Missing Pattern", "scan_type": "pattern"}, + format="json", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == HTTP_400_BAD_REQUEST + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_create_scan_list_of_values_missing_items(api_client, enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + _, token = enterprise_data_fixture.create_user_and_token(is_staff=True) + + response = api_client.post( + reverse("api:enterprise:admin:data_scanner:list"), + {"name": "Missing Items", "scan_type": "list_of_values"}, + format="json", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == HTTP_400_BAD_REQUEST + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_create_scan_list_table_missing_source(api_client, enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + _, token = enterprise_data_fixture.create_user_and_token(is_staff=True) + + response = api_client.post( + reverse("api:enterprise:admin:data_scanner:list"), + {"name": "Missing Source", "scan_type": "list_table"}, + format="json", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == HTTP_400_BAD_REQUEST + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_create_scan_list_table_incompatible_source_field( + api_client, enterprise_data_fixture +): + enterprise_data_fixture.enable_enterprise() + user, token = enterprise_data_fixture.create_user_and_token(is_staff=True) + table = enterprise_data_fixture.create_database_table(user=user) + boolean_field = enterprise_data_fixture.create_boolean_field( + table=table, name="Active" + ) + + response = api_client.post( + reverse("api:enterprise:admin:data_scanner:list"), + { + "name": "Boolean Scan", + "scan_type": "list_table", + "source_table_id": table.id, + "source_field_id": boolean_field.id, + }, + format="json", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == HTTP_400_BAD_REQUEST + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_workspace_structure_excludes_incompatible_fields( + api_client, enterprise_data_fixture +): + enterprise_data_fixture.enable_enterprise() + user, token = enterprise_data_fixture.create_user_and_token(is_staff=True) + table = enterprise_data_fixture.create_database_table(user=user) + text_field = enterprise_data_fixture.create_text_field(table=table, name="Name") + enterprise_data_fixture.create_boolean_field(table=table, name="Active") + workspace = table.database.workspace + + response = api_client.get( + reverse( + "api:enterprise:admin:data_scanner:workspace_structure", + kwargs={"workspace_id": workspace.id}, + ), + format="json", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == HTTP_200_OK + data = response.json() + fields = data[0]["tables"][0]["fields"] + field_names = {f["name"] for f in fields} + assert "Name" in field_names + assert "Active" not in field_names + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_get_scan_unauthenticated(api_client, enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + scan = DataScan.objects.create( + name="Test", scan_type="pattern", pattern="AA", created_by=user + ) + + response = api_client.get( + reverse( + "api:enterprise:admin:data_scanner:detail", + kwargs={"scan_id": scan.id}, + ), + format="json", + ) + assert response.status_code == HTTP_401_UNAUTHORIZED + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_get_scan_non_staff(api_client, enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + admin = enterprise_data_fixture.create_user(is_staff=True) + _, token = enterprise_data_fixture.create_user_and_token(is_staff=False) + scan = DataScan.objects.create( + name="Test", scan_type="pattern", pattern="AA", created_by=admin + ) + + response = api_client.get( + reverse( + "api:enterprise:admin:data_scanner:detail", + kwargs={"scan_id": scan.id}, + ), + format="json", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == HTTP_403_FORBIDDEN + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_get_scan_without_license(api_client, enterprise_data_fixture): + _, token = enterprise_data_fixture.create_user_and_token(is_staff=True) + + response = api_client.get( + reverse( + "api:enterprise:admin:data_scanner:detail", + kwargs={"scan_id": 1}, + ), + format="json", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == HTTP_402_PAYMENT_REQUIRED + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_get_scan(api_client, enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user, token = enterprise_data_fixture.create_user_and_token(is_staff=True) + + scan = DataScan.objects.create( + name="Test Scan", scan_type="pattern", pattern="AA", created_by=user + ) + + response = api_client.get( + reverse( + "api:enterprise:admin:data_scanner:detail", + kwargs={"scan_id": scan.id}, + ), + format="json", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == HTTP_200_OK + assert response.json() == { + "id": scan.id, + "name": "Test Scan", + "scan_type": "pattern", + "pattern": "AA", + "frequency": "manual", + "scan_all_workspaces": True, + "workspace_ids": [], + "is_running": False, + "last_run_started_at": None, + "last_run_finished_at": None, + "last_error": None, + "list_items": [], + "results_count": 0, + "source_table_id": None, + "source_field_id": None, + "source_workspace_id": None, + "source_database_id": None, + "created_on": AnyStr(), + "updated_on": AnyStr(), + } + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_get_scan_not_found(api_client, enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + _, token = enterprise_data_fixture.create_user_and_token(is_staff=True) + + response = api_client.get( + reverse( + "api:enterprise:admin:data_scanner:detail", + kwargs={"scan_id": 99999}, + ), + format="json", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == HTTP_404_NOT_FOUND + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_update_scan_unauthenticated(api_client, enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + scan = DataScan.objects.create( + name="Test", scan_type="pattern", pattern="AA", created_by=user + ) + + response = api_client.patch( + reverse( + "api:enterprise:admin:data_scanner:detail", + kwargs={"scan_id": scan.id}, + ), + {"name": "Updated"}, + format="json", + ) + assert response.status_code == HTTP_401_UNAUTHORIZED + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_update_scan_non_staff(api_client, enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + admin = enterprise_data_fixture.create_user(is_staff=True) + _, token = enterprise_data_fixture.create_user_and_token(is_staff=False) + scan = DataScan.objects.create( + name="Test", scan_type="pattern", pattern="AA", created_by=admin + ) + + response = api_client.patch( + reverse( + "api:enterprise:admin:data_scanner:detail", + kwargs={"scan_id": scan.id}, + ), + {"name": "Updated"}, + format="json", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == HTTP_403_FORBIDDEN + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_update_scan_without_license(api_client, enterprise_data_fixture): + _, token = enterprise_data_fixture.create_user_and_token(is_staff=True) + + response = api_client.patch( + reverse( + "api:enterprise:admin:data_scanner:detail", + kwargs={"scan_id": 1}, + ), + {"name": "Updated"}, + format="json", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == HTTP_402_PAYMENT_REQUIRED + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_update_scan(api_client, enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user, token = enterprise_data_fixture.create_user_and_token(is_staff=True) + + scan = DataScan.objects.create( + name="Original", scan_type="pattern", pattern="AA", created_by=user + ) + + response = api_client.patch( + reverse( + "api:enterprise:admin:data_scanner:detail", + kwargs={"scan_id": scan.id}, + ), + {"name": "Updated", "frequency": "weekly"}, + format="json", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == HTTP_200_OK + assert response.json()["name"] == "Updated" + assert response.json()["frequency"] == "weekly" + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_update_scan_not_found(api_client, enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + _, token = enterprise_data_fixture.create_user_and_token(is_staff=True) + + response = api_client.patch( + reverse( + "api:enterprise:admin:data_scanner:detail", + kwargs={"scan_id": 99999}, + ), + {"name": "Updated"}, + format="json", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == HTTP_404_NOT_FOUND + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_update_scan_already_running(api_client, enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user, token = enterprise_data_fixture.create_user_and_token(is_staff=True) + + scan = DataScan.objects.create( + name="Running", + scan_type="pattern", + pattern="AA", + created_by=user, + is_running=True, + ) + + response = api_client.patch( + reverse( + "api:enterprise:admin:data_scanner:detail", + kwargs={"scan_id": scan.id}, + ), + {"name": "Updated"}, + format="json", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == HTTP_409_CONFLICT + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_delete_scan_unauthenticated(api_client, enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + scan = DataScan.objects.create( + name="Test", scan_type="pattern", pattern="AA", created_by=user + ) + + response = api_client.delete( + reverse( + "api:enterprise:admin:data_scanner:detail", + kwargs={"scan_id": scan.id}, + ), + format="json", + ) + assert response.status_code == HTTP_401_UNAUTHORIZED + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_delete_scan_non_staff(api_client, enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + admin = enterprise_data_fixture.create_user(is_staff=True) + _, token = enterprise_data_fixture.create_user_and_token(is_staff=False) + scan = DataScan.objects.create( + name="Test", scan_type="pattern", pattern="AA", created_by=admin + ) + + response = api_client.delete( + reverse( + "api:enterprise:admin:data_scanner:detail", + kwargs={"scan_id": scan.id}, + ), + format="json", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == HTTP_403_FORBIDDEN + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_delete_scan_without_license(api_client, enterprise_data_fixture): + _, token = enterprise_data_fixture.create_user_and_token(is_staff=True) + + response = api_client.delete( + reverse( + "api:enterprise:admin:data_scanner:detail", + kwargs={"scan_id": 1}, + ), + format="json", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == HTTP_402_PAYMENT_REQUIRED + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_delete_scan(api_client, enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user, token = enterprise_data_fixture.create_user_and_token(is_staff=True) + + scan = DataScan.objects.create( + name="To Delete", scan_type="pattern", pattern="AA", created_by=user + ) + + response = api_client.delete( + reverse( + "api:enterprise:admin:data_scanner:detail", + kwargs={"scan_id": scan.id}, + ), + format="json", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == HTTP_204_NO_CONTENT + assert DataScan.objects.filter(id=scan.id).count() == 0 + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_delete_scan_not_found(api_client, enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + _, token = enterprise_data_fixture.create_user_and_token(is_staff=True) + + response = api_client.delete( + reverse( + "api:enterprise:admin:data_scanner:detail", + kwargs={"scan_id": 99999}, + ), + format="json", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == HTTP_404_NOT_FOUND + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_delete_scan_already_running(api_client, enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user, token = enterprise_data_fixture.create_user_and_token(is_staff=True) + + scan = DataScan.objects.create( + name="Running", + scan_type="pattern", + pattern="AA", + created_by=user, + is_running=True, + ) + + response = api_client.delete( + reverse( + "api:enterprise:admin:data_scanner:detail", + kwargs={"scan_id": scan.id}, + ), + format="json", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == HTTP_409_CONFLICT + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_trigger_scan_unauthenticated(api_client, enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + scan = DataScan.objects.create( + name="Test", scan_type="pattern", pattern="AA", created_by=user + ) + + response = api_client.post( + reverse( + "api:enterprise:admin:data_scanner:trigger", + kwargs={"scan_id": scan.id}, + ), + format="json", + ) + assert response.status_code == HTTP_401_UNAUTHORIZED + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_trigger_scan_non_staff(api_client, enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + admin = enterprise_data_fixture.create_user(is_staff=True) + _, token = enterprise_data_fixture.create_user_and_token(is_staff=False) + scan = DataScan.objects.create( + name="Test", scan_type="pattern", pattern="AA", created_by=admin + ) + + response = api_client.post( + reverse( + "api:enterprise:admin:data_scanner:trigger", + kwargs={"scan_id": scan.id}, + ), + format="json", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == HTTP_403_FORBIDDEN + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_trigger_scan_without_license(api_client, enterprise_data_fixture): + _, token = enterprise_data_fixture.create_user_and_token(is_staff=True) + + response = api_client.post( + reverse( + "api:enterprise:admin:data_scanner:trigger", + kwargs={"scan_id": 1}, + ), + format="json", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == HTTP_402_PAYMENT_REQUIRED + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_trigger_scan(api_client, enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user, token = enterprise_data_fixture.create_user_and_token(is_staff=True) + + scan = DataScan.objects.create( + name="Trigger Test", scan_type="pattern", pattern="AA", created_by=user + ) + + with pytest.MonkeyPatch.context() as m: + m.setattr( + "baserow_enterprise.data_scanner.tasks.run_data_scan.delay", + lambda scan_id: None, + ) + response = api_client.post( + reverse( + "api:enterprise:admin:data_scanner:trigger", + kwargs={"scan_id": scan.id}, + ), + format="json", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == HTTP_202_ACCEPTED + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_trigger_scan_not_found(api_client, enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + _, token = enterprise_data_fixture.create_user_and_token(is_staff=True) + + response = api_client.post( + reverse( + "api:enterprise:admin:data_scanner:trigger", + kwargs={"scan_id": 9999999}, + ), + format="json", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == HTTP_404_NOT_FOUND + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_trigger_scan_already_running(api_client, enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user, token = enterprise_data_fixture.create_user_and_token(is_staff=True) + + scan = DataScan.objects.create( + name="Running", + scan_type="pattern", + pattern="AA", + created_by=user, + is_running=True, + ) + + response = api_client.post( + reverse( + "api:enterprise:admin:data_scanner:trigger", + kwargs={"scan_id": scan.id}, + ), + format="json", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == HTTP_409_CONFLICT + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_list_results_unauthenticated(api_client): + response = api_client.get( + reverse("api:enterprise:admin:data_scanner:results"), + format="json", + ) + assert response.status_code == HTTP_401_UNAUTHORIZED + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_list_results_non_staff(api_client, enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + _, token = enterprise_data_fixture.create_user_and_token(is_staff=False) + + response = api_client.get( + reverse("api:enterprise:admin:data_scanner:results"), + format="json", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == HTTP_403_FORBIDDEN + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_list_results_without_license(api_client, enterprise_data_fixture): + _, token = enterprise_data_fixture.create_user_and_token(is_staff=True) + + response = api_client.get( + reverse("api:enterprise:admin:data_scanner:results"), + format="json", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == HTTP_402_PAYMENT_REQUIRED + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_list_results(api_client, enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user, token = enterprise_data_fixture.create_user_and_token(is_staff=True) + table, fields, _ = enterprise_data_fixture.build_table( + user=user, + columns=[("Name", "text")], + rows=[["test"]], + ) + + scan = DataScan.objects.create( + name="Results Test", scan_type="pattern", pattern="AA", created_by=user + ) + now = timezone.now() + DataScanResult.objects.create( + scan=scan, + table=table, + field=fields[0], + row_id=1, + matched_value="test value", + first_identified_on=now, + last_identified_on=now, + ) + + response = api_client.get( + reverse("api:enterprise:admin:data_scanner:results"), + format="json", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == HTTP_200_OK + data = response.json() + assert data["count"] == 1 + result = data["results"][0] + assert result["matched_value"] == "test value" + assert result["scan_name"] == "Results Test" + assert result["table_name"] == table.name + assert result["field_name"] == fields[0].name + assert result["workspace_name"] == table.database.workspace.name + assert result["database_name"] == table.database.name + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_list_results_filter_by_scan(api_client, enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user, token = enterprise_data_fixture.create_user_and_token(is_staff=True) + table, fields, _ = enterprise_data_fixture.build_table( + user=user, + columns=[("Name", "text")], + rows=[["test"]], + ) + + scan1 = DataScan.objects.create( + name="Scan 1", scan_type="pattern", pattern="AA", created_by=user + ) + scan2 = DataScan.objects.create( + name="Scan 2", scan_type="pattern", pattern="99", created_by=user + ) + now = timezone.now() + DataScanResult.objects.create( + scan=scan1, + table=table, + field=fields[0], + row_id=1, + matched_value="match1", + first_identified_on=now, + last_identified_on=now, + ) + DataScanResult.objects.create( + scan=scan2, + table=table, + field=fields[0], + row_id=2, + matched_value="match2", + first_identified_on=now, + last_identified_on=now, + ) + + response = api_client.get( + reverse("api:enterprise:admin:data_scanner:results"), + {"scan_id": scan1.id}, + format="json", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == HTTP_200_OK + data = response.json() + assert data["count"] == 1 + assert data["results"][0]["scan_name"] == "Scan 1" + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_list_results_search(api_client, enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user, token = enterprise_data_fixture.create_user_and_token(is_staff=True) + table, fields, _ = enterprise_data_fixture.build_table( + user=user, + columns=[("Name", "text")], + rows=[["test"]], + ) + + scan = DataScan.objects.create( + name="Search Test", scan_type="pattern", pattern="AA", created_by=user + ) + now = timezone.now() + DataScanResult.objects.create( + scan=scan, + table=table, + field=fields[0], + row_id=1, + matched_value="NL12ABCD0123456789", + first_identified_on=now, + last_identified_on=now, + ) + DataScanResult.objects.create( + scan=scan, + table=table, + field=fields[0], + row_id=2, + matched_value="something-else", + first_identified_on=now, + last_identified_on=now, + ) + + response = api_client.get( + reverse("api:enterprise:admin:data_scanner:results"), + {"search": "NL12"}, + format="json", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == HTTP_200_OK + assert response.json()["count"] == 1 + assert response.json()["results"][0]["matched_value"] == "NL12ABCD0123456789" + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_list_results_query_count_is_constant(api_client, enterprise_data_fixture): + """ + The number of queries when listing results must not grow with the number of + results (no N+1). Adding more results should not increase the query count. + """ + + enterprise_data_fixture.enable_enterprise() + user, token = enterprise_data_fixture.create_user_and_token(is_staff=True) + table, fields, _ = enterprise_data_fixture.build_table( + user=user, + columns=[("Name", "text")], + rows=[["test"]], + ) + + scan = DataScan.objects.create( + name="Query Count Test", scan_type="pattern", pattern="AA", created_by=user + ) + now = timezone.now() + for i in range(1, 3): + DataScanResult.objects.create( + scan=scan, + table=table, + field=fields[0], + row_id=i, + matched_value=f"match{i}", + first_identified_on=now, + last_identified_on=now, + ) + + url = reverse("api:enterprise:admin:data_scanner:results") + + with CaptureQueriesContext(connection) as captured_2_results: + response = api_client.get(url, format="json", HTTP_AUTHORIZATION=f"JWT {token}") + assert response.status_code == HTTP_200_OK + assert response.json()["count"] == 2 + + num_queries_2 = len(captured_2_results) + + for i in range(3, 8): + DataScanResult.objects.create( + scan=scan, + table=table, + field=fields[0], + row_id=i, + matched_value=f"match{i}", + first_identified_on=now, + last_identified_on=now, + ) + + with CaptureQueriesContext(connection) as captured_7_results: + response = api_client.get(url, format="json", HTTP_AUTHORIZATION=f"JWT {token}") + assert response.status_code == HTTP_200_OK + assert response.json()["count"] == 7 + + num_queries_7 = len(captured_7_results) + + assert num_queries_7 == num_queries_2, ( + f"Query count grew from {num_queries_2} to {num_queries_7} when adding " + f"more results — likely an N+1 problem." + ) + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_delete_result_unauthenticated(api_client, enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + table, fields, _ = enterprise_data_fixture.build_table( + user=user, columns=[("Name", "text")], rows=[["test"]] + ) + scan = DataScan.objects.create( + name="Test", scan_type="pattern", pattern="AA", created_by=user + ) + now = timezone.now() + result = DataScanResult.objects.create( + scan=scan, + table=table, + field=fields[0], + row_id=1, + matched_value="test", + first_identified_on=now, + last_identified_on=now, + ) + + response = api_client.delete( + reverse( + "api:enterprise:admin:data_scanner:result_delete", + kwargs={"result_id": result.id}, + ), + format="json", + ) + assert response.status_code == HTTP_401_UNAUTHORIZED + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_delete_result_non_staff(api_client, enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + admin = enterprise_data_fixture.create_user(is_staff=True) + _, token = enterprise_data_fixture.create_user_and_token(is_staff=False) + table, fields, _ = enterprise_data_fixture.build_table( + user=admin, columns=[("Name", "text")], rows=[["test"]] + ) + scan = DataScan.objects.create( + name="Test", scan_type="pattern", pattern="AA", created_by=admin + ) + now = timezone.now() + result = DataScanResult.objects.create( + scan=scan, + table=table, + field=fields[0], + row_id=1, + matched_value="test", + first_identified_on=now, + last_identified_on=now, + ) + + response = api_client.delete( + reverse( + "api:enterprise:admin:data_scanner:result_delete", + kwargs={"result_id": result.id}, + ), + format="json", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == HTTP_403_FORBIDDEN + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_delete_result_without_license(api_client, enterprise_data_fixture): + _, token = enterprise_data_fixture.create_user_and_token(is_staff=True) + + response = api_client.delete( + reverse( + "api:enterprise:admin:data_scanner:result_delete", + kwargs={"result_id": 1}, + ), + format="json", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == HTTP_402_PAYMENT_REQUIRED + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_delete_result(api_client, enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user, token = enterprise_data_fixture.create_user_and_token(is_staff=True) + table, fields, _ = enterprise_data_fixture.build_table( + user=user, columns=[("Name", "text")], rows=[["test"]] + ) + scan = DataScan.objects.create( + name="Test", scan_type="pattern", pattern="AA", created_by=user + ) + now = timezone.now() + result = DataScanResult.objects.create( + scan=scan, + table=table, + field=fields[0], + row_id=1, + matched_value="test", + first_identified_on=now, + last_identified_on=now, + ) + + response = api_client.delete( + reverse( + "api:enterprise:admin:data_scanner:result_delete", + kwargs={"result_id": result.id}, + ), + format="json", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == HTTP_204_NO_CONTENT + assert DataScanResult.objects.filter(id=result.id).count() == 0 + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_delete_result_not_found(api_client, enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + _, token = enterprise_data_fixture.create_user_and_token(is_staff=True) + + response = api_client.delete( + reverse( + "api:enterprise:admin:data_scanner:result_delete", + kwargs={"result_id": 99999}, + ), + format="json", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == HTTP_404_NOT_FOUND + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_workspace_structure_unauthenticated(api_client, enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + ws = enterprise_data_fixture.create_workspace(user=user) + + response = api_client.get( + reverse( + "api:enterprise:admin:data_scanner:workspace_structure", + kwargs={"workspace_id": ws.id}, + ), + format="json", + ) + assert response.status_code == HTTP_401_UNAUTHORIZED + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_workspace_structure_non_staff(api_client, enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + admin = enterprise_data_fixture.create_user(is_staff=True) + _, token = enterprise_data_fixture.create_user_and_token(is_staff=False) + ws = enterprise_data_fixture.create_workspace(user=admin) + + response = api_client.get( + reverse( + "api:enterprise:admin:data_scanner:workspace_structure", + kwargs={"workspace_id": ws.id}, + ), + format="json", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == HTTP_403_FORBIDDEN + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_workspace_structure_without_license(api_client, enterprise_data_fixture): + _, token = enterprise_data_fixture.create_user_and_token(is_staff=True) + + response = api_client.get( + reverse( + "api:enterprise:admin:data_scanner:workspace_structure", + kwargs={"workspace_id": 1}, + ), + format="json", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == HTTP_402_PAYMENT_REQUIRED + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_workspace_structure(api_client, enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user, token = enterprise_data_fixture.create_user_and_token(is_staff=True) + table, fields, _ = enterprise_data_fixture.build_table( + user=user, + columns=[("Name", "text"), ("Notes", "text")], + rows=[["test", "note"]], + ) + workspace = table.database.workspace + + response = api_client.get( + reverse( + "api:enterprise:admin:data_scanner:workspace_structure", + kwargs={"workspace_id": workspace.id}, + ), + format="json", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == HTTP_200_OK + data = response.json() + assert len(data) == 1 + db = data[0] + assert db["name"] == table.database.name + assert len(db["tables"]) == 1 + tbl = db["tables"][0] + assert tbl["name"] == table.name + assert len(tbl["fields"]) >= 2 + field_names = {f["name"] for f in tbl["fields"]} + assert "Name" in field_names + assert "Notes" in field_names + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_workspace_structure_not_found(api_client, enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + _, token = enterprise_data_fixture.create_user_and_token(is_staff=True) + + response = api_client.get( + reverse( + "api:enterprise:admin:data_scanner:workspace_structure", + kwargs={"workspace_id": 99999}, + ), + format="json", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == HTTP_404_NOT_FOUND + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_export_results_unauthenticated(api_client): + response = api_client.post( + reverse("api:enterprise:admin:data_scanner:results_export"), + {}, + format="json", + ) + assert response.status_code == HTTP_401_UNAUTHORIZED + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_export_results_non_staff(api_client, enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + _, token = enterprise_data_fixture.create_user_and_token(is_staff=False) + + response = api_client.post( + reverse("api:enterprise:admin:data_scanner:results_export"), + {}, + format="json", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == HTTP_403_FORBIDDEN + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_export_results_without_license(api_client, enterprise_data_fixture): + _, token = enterprise_data_fixture.create_user_and_token(is_staff=True) + + response = api_client.post( + reverse("api:enterprise:admin:data_scanner:results_export"), + {}, + format="json", + HTTP_AUTHORIZATION=f"JWT {token}", + ) + assert response.status_code == HTTP_402_PAYMENT_REQUIRED + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +@patch("baserow.core.storage.get_default_storage") +def test_export_results_csv(get_storage_mock, api_client, enterprise_data_fixture): + storage_mock = MagicMock() + get_storage_mock.return_value = storage_mock + + enterprise_data_fixture.enable_enterprise() + user, token = enterprise_data_fixture.create_user_and_token(is_staff=True) + table, fields, _ = enterprise_data_fixture.build_table( + user=user, columns=[("Name", "text")], rows=[["test"]] + ) + scan = DataScan.objects.create( + name="Export Test", scan_type="pattern", pattern="AA", created_by=user + ) + now = timezone.now() + DataScanResult.objects.create( + scan=scan, + table=table, + field=fields[0], + row_id=1, + matched_value="matched", + first_identified_on=now, + last_identified_on=now, + ) + + stub_file = BytesIO() + storage_mock.open.return_value = stub_file + close = stub_file.close + stub_file.close = lambda: None + + csv_export_job = JobHandler().create_and_start_job( + user, + DataScanResultExportJobType.type, + csv_column_separator=",", + csv_first_row_header=True, + export_charset="utf-8", + sync=True, + ) + csv_export_job.refresh_from_db() + assert csv_export_job.state == JOB_FINISHED + + data = stub_file.getvalue().decode("utf-8") + assert "Export Test" in data + assert "matched" in data + assert "Scan Name" in data + + close() + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +@patch("baserow.core.storage.get_default_storage") +def test_export_results_csv_without_header( + get_storage_mock, api_client, enterprise_data_fixture +): + storage_mock = MagicMock() + get_storage_mock.return_value = storage_mock + + enterprise_data_fixture.enable_enterprise() + user, token = enterprise_data_fixture.create_user_and_token(is_staff=True) + table, fields, _ = enterprise_data_fixture.build_table( + user=user, columns=[("Name", "text")], rows=[["test"]] + ) + scan = DataScan.objects.create( + name="NoHeader", scan_type="pattern", pattern="AA", created_by=user + ) + now = timezone.now() + DataScanResult.objects.create( + scan=scan, + table=table, + field=fields[0], + row_id=1, + matched_value="val", + first_identified_on=now, + last_identified_on=now, + ) + + stub_file = BytesIO() + storage_mock.open.return_value = stub_file + close = stub_file.close + stub_file.close = lambda: None + + csv_export_job = JobHandler().create_and_start_job( + user, + DataScanResultExportJobType.type, + csv_column_separator=",", + csv_first_row_header=False, + export_charset="utf-8", + sync=True, + ) + csv_export_job.refresh_from_db() + assert csv_export_job.state == JOB_FINISHED + + data = stub_file.getvalue().decode("utf-8") + assert "Scan Name" not in data + assert "NoHeader" in data + + close() + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +@patch("baserow.core.storage.get_default_storage") +def test_export_results_csv_filter_by_scan( + get_storage_mock, api_client, enterprise_data_fixture +): + storage_mock = MagicMock() + get_storage_mock.return_value = storage_mock + + enterprise_data_fixture.enable_enterprise() + user, token = enterprise_data_fixture.create_user_and_token(is_staff=True) + table, fields, _ = enterprise_data_fixture.build_table( + user=user, columns=[("Name", "text")], rows=[["test"]] + ) + scan1 = DataScan.objects.create( + name="Scan A", scan_type="pattern", pattern="AA", created_by=user + ) + scan2 = DataScan.objects.create( + name="Scan B", scan_type="pattern", pattern="99", created_by=user + ) + now = timezone.now() + DataScanResult.objects.create( + scan=scan1, + table=table, + field=fields[0], + row_id=1, + matched_value="a_match", + first_identified_on=now, + last_identified_on=now, + ) + DataScanResult.objects.create( + scan=scan2, + table=table, + field=fields[0], + row_id=2, + matched_value="b_match", + first_identified_on=now, + last_identified_on=now, + ) + + stub_file = BytesIO() + storage_mock.open.return_value = stub_file + close = stub_file.close + stub_file.close = lambda: None + + csv_export_job = JobHandler().create_and_start_job( + user, + DataScanResultExportJobType.type, + csv_column_separator=",", + csv_first_row_header=True, + export_charset="utf-8", + filter_scan_id=scan1.id, + sync=True, + ) + csv_export_job.refresh_from_db() + assert csv_export_job.state == JOB_FINISHED + + data = stub_file.getvalue().decode("utf-8") + assert "a_match" in data + assert "b_match" not in data + + close() + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +@patch("baserow.core.storage.get_default_storage") +def test_export_results_deleting_job_deletes_file( + get_storage_mock, api_client, enterprise_data_fixture +): + storage_mock = MagicMock() + get_storage_mock.return_value = storage_mock + + enterprise_data_fixture.enable_enterprise() + user, token = enterprise_data_fixture.create_user_and_token(is_staff=True) + + stub_file = BytesIO() + storage_mock.open.return_value = stub_file + close = stub_file.close + stub_file.close = lambda: None + + csv_export_job = JobHandler().create_and_start_job( + user, + DataScanResultExportJobType.type, + csv_column_separator=",", + csv_first_row_header=True, + export_charset="utf-8", + sync=True, + ) + csv_export_job.refresh_from_db() + assert csv_export_job.state == JOB_FINISHED + assert csv_export_job.exported_file_name is not None + + close() + + from baserow.contrib.database.export.handler import ExportHandler + + with patch( + "baserow_enterprise.data_scanner.job_types.get_default_storage" + ) as mock_storage: + mock_storage.return_value = storage_mock + DataScanResultExportJobType().before_delete(csv_export_job) + storage_mock.delete.assert_called_once_with( + ExportHandler.export_file_path(csv_export_job.exported_file_name) + ) diff --git a/enterprise/backend/tests/baserow_enterprise_tests/api/audit_log/test_audit_log_admin_views.py b/enterprise/backend/tests/baserow_enterprise_tests/api/audit_log/test_audit_log_admin_views.py index 6a764bfea7..de5fb73142 100755 --- a/enterprise/backend/tests/baserow_enterprise_tests/api/audit_log/test_audit_log_admin_views.py +++ b/enterprise/backend/tests/baserow_enterprise_tests/api/audit_log/test_audit_log_admin_views.py @@ -52,7 +52,7 @@ def test_admins_cannot_access_audit_log_endpoints_without_an_enterprise_license( @pytest.mark.django_db -@pytest.mark.parametrize("url_name", ["users", "workspaces", "action_types", "list"]) +@pytest.mark.parametrize("url_name", ["users", "action_types", "list"]) @override_settings(DEBUG=True) def test_non_admins_cannot_access_audit_log_endpoints( api_client, enterprise_data_fixture, url_name @@ -129,54 +129,6 @@ def test_audit_log_user_filter_returns_users_correctly( } -@pytest.mark.django_db -@override_settings(DEBUG=True) -def test_audit_log_workspace_filter_returns_workspaces_correctly( - api_client, enterprise_data_fixture -): - ( - admin_user, - admin_token, - ) = enterprise_data_fixture.create_enterprise_admin_user_and_token() - workspace_1 = enterprise_data_fixture.create_workspace( - name="workspace 1", user=admin_user - ) - workspace_2 = enterprise_data_fixture.create_workspace( - name="workspace 2", user=admin_user - ) - - # no search query should return all workspaces - response = api_client.get( - reverse("api:enterprise:audit_log:workspaces"), - format="json", - HTTP_AUTHORIZATION=f"JWT {admin_token}", - ) - assert response.status_code == HTTP_200_OK - assert response.json() == { - "count": 2, - "next": None, - "previous": None, - "results": [ - {"id": workspace_1.id, "value": workspace_1.name}, - {"id": workspace_2.id, "value": workspace_2.name}, - ], - } - - # searching by name should return only the correct workspace - response = api_client.get( - reverse("api:enterprise:audit_log:workspaces") + "?search=1", - format="json", - HTTP_AUTHORIZATION=f"JWT {admin_token}", - ) - assert response.status_code == HTTP_200_OK - assert response.json() == { - "count": 1, - "next": None, - "previous": None, - "results": [{"id": workspace_1.id, "value": workspace_1.name}], - } - - @pytest.mark.django_db @override_settings(DEBUG=True) def test_audit_log_action_type_filter_returns_action_types_correctly( diff --git a/enterprise/backend/tests/baserow_enterprise_tests/data_scanner/__init__.py b/enterprise/backend/tests/baserow_enterprise_tests/data_scanner/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/enterprise/backend/tests/baserow_enterprise_tests/data_scanner/conftest.py b/enterprise/backend/tests/baserow_enterprise_tests/data_scanner/conftest.py new file mode 100644 index 0000000000..aa69ec6d02 --- /dev/null +++ b/enterprise/backend/tests/baserow_enterprise_tests/data_scanner/conftest.py @@ -0,0 +1,36 @@ +from django.contrib.postgres.search import SearchVector +from django.db.models import Value +from django.utils import timezone + +import pytest + +from baserow.contrib.database.search.handler import SearchHandler + + +@pytest.fixture +def populate_search_table(): + """ + Returns a helper that creates a workspace search table and inserts + tsvector rows for every non-empty cell in the given rows / field. + """ + + def _populate(table, field, rows): + workspace = table.database.workspace + SearchHandler.create_workspace_search_table_if_not_exists(workspace.id) + search_model = SearchHandler.get_workspace_search_table_model(workspace.id) + + model = table.get_model() + for row in rows: + row_obj = model.objects.get(id=row.id) + cell_value = getattr(row_obj, field.db_column) + if cell_value: + search_model.objects.create( + row_id=row_obj.id, + field_id=field.id, + updated_on=timezone.now(), + value=SearchVector(Value(str(cell_value))), + ) + + return search_model + + return _populate diff --git a/enterprise/backend/tests/baserow_enterprise_tests/data_scanner/test_data_scanner_handler.py b/enterprise/backend/tests/baserow_enterprise_tests/data_scanner/test_data_scanner_handler.py new file mode 100644 index 0000000000..900a4ef81e --- /dev/null +++ b/enterprise/backend/tests/baserow_enterprise_tests/data_scanner/test_data_scanner_handler.py @@ -0,0 +1,1655 @@ +import re +from datetime import timedelta +from unittest.mock import patch + +from django.core.exceptions import PermissionDenied +from django.test.utils import override_settings +from django.utils import timezone + +import pytest + +from baserow_enterprise.data_scanner.exceptions import ( + DataScanDoesNotExist, + DataScanIsAlreadyRunning, +) +from baserow_enterprise.data_scanner.handler import ( + DataScannerHandler, + convert_pattern_to_regex, +) +from baserow_enterprise.data_scanner.models import ( + DataScan, + DataScanListItem, + DataScanResult, +) +from baserow_premium.license.exceptions import FeaturesNotAvailableError + + +@pytest.mark.data_scanner +def test_convert_pattern_to_regex_alpha_token(): + assert convert_pattern_to_regex("A") == "[A-Za-z]" + assert convert_pattern_to_regex("AA") == "[A-Za-z][A-Za-z]" + + +@pytest.mark.data_scanner +def test_convert_pattern_to_regex_digit_token(): + assert convert_pattern_to_regex("D") == "[0-9]" + assert convert_pattern_to_regex("DD") == "[0-9][0-9]" + + +@pytest.mark.data_scanner +def test_convert_pattern_to_regex_any_char_token(): + assert convert_pattern_to_regex("X") == "." + assert convert_pattern_to_regex("XX") == ".." + + +@pytest.mark.data_scanner +def test_convert_pattern_to_regex_escaped_literals(): + assert convert_pattern_to_regex("\\N\\L") == "NL" + assert convert_pattern_to_regex("\\-") == "\\-" + assert convert_pattern_to_regex("\\.") == "\\." + assert convert_pattern_to_regex("\\D") == "D" + + +@pytest.mark.data_scanner +def test_convert_pattern_to_regex_mixed(): + assert convert_pattern_to_regex("AADD") == "[A-Za-z][A-Za-z][0-9][0-9]" + assert ( + convert_pattern_to_regex("AA\\-DD\\-AA") + == "[A-Za-z][A-Za-z]\\-[0-9][0-9]\\-[A-Za-z][A-Za-z]" + ) + + +@pytest.mark.data_scanner +def test_convert_pattern_to_regex_iban_pattern(): + assert ( + convert_pattern_to_regex("AADDAAAADDDDDDDDDD") + == "[A-Za-z][A-Za-z][0-9][0-9][A-Za-z][A-Za-z][A-Za-z][A-Za-z]" + "[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]" + ) + + +@pytest.mark.data_scanner +def test_convert_pattern_to_regex_dutch_iban_with_literals(): + assert ( + convert_pattern_to_regex("\\N\\LDDAAAADDDDDDDDDD") + == "NL[0-9][0-9][A-Za-z][A-Za-z][A-Za-z][A-Za-z]" + "[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]" + ) + + +@pytest.mark.data_scanner +def test_convert_pattern_to_regex_empty(): + assert convert_pattern_to_regex("") == "" + + +@pytest.mark.data_scanner +def test_convert_pattern_to_regex_trailing_backslash(): + # A trailing backslash with nothing after it is treated as a literal backslash + assert convert_pattern_to_regex("A\\") == "[A-Za-z]\\\\" + + +@pytest.mark.data_scanner +def test_extract_matching_token(): + compiled = re.compile(r"[A-Za-z][A-Za-z][0-9][0-9]", re.IGNORECASE) + + token = DataScannerHandler._extract_matching_token( + "'ab12':1 'something':2", compiled + ) + assert token == "ab12" + + +@pytest.mark.data_scanner +def test_extract_matching_token_no_match_returns_raw(): + compiled = re.compile(r"[A-Za-z][A-Za-z][0-9][0-9]", re.IGNORECASE) + + token = DataScannerHandler._extract_matching_token("'hello':1", compiled) + assert token == "'hello':1" + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_create_scan_pattern(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + + scan = DataScannerHandler.create_scan( + user=user, + name="IBAN Scanner", + scan_type="pattern", + pattern="AADDAAAADDDDDDDDDD", + frequency="daily", + ) + + assert scan.name == "IBAN Scanner" + assert scan.scan_type == "pattern" + assert scan.pattern == "AADDAAAADDDDDDDDDD" + assert scan.frequency == "daily" + assert scan.scan_all_workspaces is True + assert scan.created_by == user + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_create_scan_list_of_values(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + + scan = DataScannerHandler.create_scan( + user=user, + name="Blacklist Scan", + scan_type="list_of_values", + list_items=["value1", "value2", "value3"], + ) + + assert scan.scan_type == "list_of_values" + assert scan.list_items.count() == 3 + assert list(scan.list_items.values_list("value", flat=True)) == [ + "value1", + "value2", + "value3", + ] + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_create_scan_list_table(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + table, fields, _ = enterprise_data_fixture.build_table( + user=user, + columns=[("Name", "text")], + rows=[["test"]], + ) + field = fields[0] + + scan = DataScannerHandler.create_scan( + user=user, + name="Table Scan", + scan_type="list_table", + source_table_id=table.id, + source_field_id=field.id, + ) + + assert scan.scan_type == "list_table" + assert scan.source_table_id == table.id + assert scan.source_field_id == field.id + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_create_scan_with_specific_workspaces(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + workspace1 = enterprise_data_fixture.create_workspace(user=user) + workspace2 = enterprise_data_fixture.create_workspace(user=user) + + scan = DataScannerHandler.create_scan( + user=user, + name="Workspace Scan", + scan_type="pattern", + pattern="AADD", + scan_all_workspaces=False, + workspace_ids=[workspace1.id, workspace2.id], + ) + + assert scan.scan_all_workspaces is False + assert set(scan.workspaces.values_list("id", flat=True)) == { + workspace1.id, + workspace2.id, + } + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_create_scan_all_workspaces(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + + scan = DataScannerHandler.create_scan( + user=user, + name="All Workspaces Scan", + scan_type="pattern", + pattern="99", + scan_all_workspaces=True, + ) + + assert scan.scan_all_workspaces is True + assert scan.workspaces.count() == 0 + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_create_scan_without_enterprise_license(enterprise_data_fixture): + user = enterprise_data_fixture.create_user(is_staff=True) + + with pytest.raises(FeaturesNotAvailableError): + DataScannerHandler.create_scan( + user=user, + name="Test", + scan_type="pattern", + pattern="AA", + ) + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_create_scan_non_staff_user(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=False) + + with pytest.raises(PermissionDenied): + DataScannerHandler.create_scan( + user=user, + name="Test", + scan_type="pattern", + pattern="AA", + ) + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_update_scan(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + + scan = DataScannerHandler.create_scan( + user=user, + name="Original", + scan_type="pattern", + pattern="AA", + ) + + updated = DataScannerHandler.update_scan( + user=user, + scan_id=scan.id, + name="Updated", + frequency="weekly", + pattern="99", + ) + + assert updated.name == "Updated" + assert updated.frequency == "weekly" + assert updated.pattern == "99" + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_update_scan_without_license(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + + scan = DataScannerHandler.create_scan( + user=user, name="Test", scan_type="pattern", pattern="AA" + ) + + from baserow.core.cache import local_cache + from baserow_premium.license.models import License + + License.objects.all().delete() + local_cache.clear() + + with pytest.raises(FeaturesNotAvailableError): + DataScannerHandler.update_scan(user=user, scan_id=scan.id, name="New") + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_update_scan_non_staff(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + admin = enterprise_data_fixture.create_user(is_staff=True) + regular = enterprise_data_fixture.create_user(is_staff=False) + + scan = DataScannerHandler.create_scan( + user=admin, name="Test", scan_type="pattern", pattern="AA" + ) + + with pytest.raises(PermissionDenied): + DataScannerHandler.update_scan(user=regular, scan_id=scan.id, name="New") + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_update_scan_not_found(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + + with pytest.raises(DataScanDoesNotExist): + DataScannerHandler.update_scan(user=user, scan_id=99999, name="New") + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_update_scan_already_running(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + + scan = DataScannerHandler.create_scan( + user=user, + name="Running Scan", + scan_type="pattern", + pattern="AA", + ) + scan.is_running = True + scan.save() + + with pytest.raises(DataScanIsAlreadyRunning): + DataScannerHandler.update_scan(user=user, scan_id=scan.id, name="New Name") + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_update_scan_workspaces(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + ws1 = enterprise_data_fixture.create_workspace(user=user) + ws2 = enterprise_data_fixture.create_workspace(user=user) + + scan = DataScannerHandler.create_scan( + user=user, + name="Test", + scan_type="pattern", + pattern="AA", + scan_all_workspaces=False, + workspace_ids=[ws1.id], + ) + assert set(scan.workspaces.values_list("id", flat=True)) == {ws1.id} + + updated = DataScannerHandler.update_scan( + user=user, scan_id=scan.id, workspace_ids=[ws2.id] + ) + assert set(updated.workspaces.values_list("id", flat=True)) == {ws2.id} + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_update_scan_list_items(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + + scan = DataScannerHandler.create_scan( + user=user, + name="List Scan", + scan_type="list_of_values", + list_items=["a", "b"], + ) + assert scan.list_items.count() == 2 + + updated = DataScannerHandler.update_scan( + user=user, scan_id=scan.id, list_items=["x", "y", "z"] + ) + assert updated.list_items.count() == 3 + assert set(updated.list_items.values_list("value", flat=True)) == {"x", "y", "z"} + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_update_scan_table_source(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + table1, fields1, _ = enterprise_data_fixture.build_table( + user=user, columns=[("Col1", "text")], rows=[["v"]] + ) + table2, fields2, _ = enterprise_data_fixture.build_table( + user=user, columns=[("Col2", "text")], rows=[["v"]] + ) + + scan = DataScannerHandler.create_scan( + user=user, + name="Table Scan", + scan_type="list_table", + source_table_id=table1.id, + source_field_id=fields1[0].id, + ) + assert scan.source_table_id == table1.id + + DataScannerHandler.update_scan( + user=user, + scan_id=scan.id, + source_table_id=table2.id, + source_field_id=fields2[0].id, + ) + scan.refresh_from_db() + assert scan.source_table_id == table2.id + assert scan.source_field_id == fields2[0].id + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_update_scan_clears_workspaces_when_scan_all(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + ws = enterprise_data_fixture.create_workspace(user=user) + + scan = DataScannerHandler.create_scan( + user=user, + name="Test", + scan_type="pattern", + pattern="AA", + scan_all_workspaces=False, + workspace_ids=[ws.id], + ) + assert scan.workspaces.count() == 1 + + DataScannerHandler.update_scan( + user=user, + scan_id=scan.id, + scan_all_workspaces=True, + workspace_ids=[ws.id], + ) + scan.refresh_from_db() + assert scan.workspaces.count() == 0 + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_cleanup_stale_results_on_type_change(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + table, fields, _ = enterprise_data_fixture.build_table( + user=user, columns=[("Name", "text")], rows=[["test"]] + ) + + scan = DataScannerHandler.create_scan( + user=user, name="Test", scan_type="pattern", pattern="AA" + ) + now = timezone.now() + DataScanResult.objects.create( + scan=scan, + table=table, + field=fields[0], + row_id=1, + matched_value="test", + first_identified_on=now, + last_identified_on=now, + ) + assert scan.results.count() == 1 + + DataScannerHandler.update_scan( + user=user, scan_id=scan.id, scan_type="list_of_values", list_items=["x"] + ) + assert scan.results.count() == 0 + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_cleanup_stale_results_on_pattern_change(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + table, fields, _ = enterprise_data_fixture.build_table( + user=user, columns=[("Name", "text")], rows=[["test"]] + ) + + scan = DataScannerHandler.create_scan( + user=user, name="Test", scan_type="pattern", pattern="AA" + ) + now = timezone.now() + DataScanResult.objects.create( + scan=scan, + table=table, + field=fields[0], + row_id=1, + matched_value="test", + first_identified_on=now, + last_identified_on=now, + ) + assert scan.results.count() == 1 + + DataScannerHandler.update_scan(user=user, scan_id=scan.id, pattern="DD") + assert scan.results.count() == 0 + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_cleanup_stale_results_on_list_items_change(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + table, fields, _ = enterprise_data_fixture.build_table( + user=user, columns=[("Name", "text")], rows=[["test"]] + ) + + scan = DataScannerHandler.create_scan( + user=user, + name="Test", + scan_type="list_of_values", + list_items=["keep", "remove"], + ) + now = timezone.now() + DataScanResult.objects.create( + scan=scan, + table=table, + field=fields[0], + row_id=1, + matched_value="keep", + first_identified_on=now, + last_identified_on=now, + ) + DataScanResult.objects.create( + scan=scan, + table=table, + field=fields[0], + row_id=2, + matched_value="remove", + first_identified_on=now, + last_identified_on=now, + ) + assert scan.results.count() == 2 + + DataScannerHandler.update_scan(user=user, scan_id=scan.id, list_items=["keep"]) + assert scan.results.count() == 1 + assert scan.results.first().matched_value == "keep" + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_cleanup_stale_results_on_empty_list(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + table, fields, _ = enterprise_data_fixture.build_table( + user=user, columns=[("Name", "text")], rows=[["test"]] + ) + + scan = DataScannerHandler.create_scan( + user=user, name="Test", scan_type="list_of_values", list_items=["val"] + ) + now = timezone.now() + DataScanResult.objects.create( + scan=scan, + table=table, + field=fields[0], + row_id=1, + matched_value="val", + first_identified_on=now, + last_identified_on=now, + ) + + DataScannerHandler.update_scan(user=user, scan_id=scan.id, list_items=[]) + assert scan.results.count() == 0 + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_delete_scan(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + + scan = DataScannerHandler.create_scan( + user=user, + name="To Delete", + scan_type="list_of_values", + list_items=["val1"], + ) + scan_id = scan.id + + DataScannerHandler.delete_scan(user=user, scan_id=scan_id) + + assert DataScan.objects.filter(id=scan_id).count() == 0 + assert DataScanListItem.objects.filter(scan_id=scan_id).count() == 0 + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_delete_scan_without_license(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + + scan = DataScannerHandler.create_scan( + user=user, name="Test", scan_type="pattern", pattern="AA" + ) + + from baserow.core.cache import local_cache + from baserow_premium.license.models import License + + License.objects.all().delete() + local_cache.clear() + + with pytest.raises(FeaturesNotAvailableError): + DataScannerHandler.delete_scan(user=user, scan_id=scan.id) + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_delete_scan_non_staff(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + admin = enterprise_data_fixture.create_user(is_staff=True) + regular = enterprise_data_fixture.create_user(is_staff=False) + + scan = DataScannerHandler.create_scan( + user=admin, name="Test", scan_type="pattern", pattern="AA" + ) + + with pytest.raises(PermissionDenied): + DataScannerHandler.delete_scan(user=regular, scan_id=scan.id) + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_delete_scan_not_found(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + + with pytest.raises(DataScanDoesNotExist): + DataScannerHandler.delete_scan(user=user, scan_id=99999) + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_delete_scan_already_running(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + + scan = DataScannerHandler.create_scan( + user=user, + name="Running Scan", + scan_type="pattern", + pattern="AA", + ) + scan.is_running = True + scan.save() + + with pytest.raises(DataScanIsAlreadyRunning): + DataScannerHandler.delete_scan(user=user, scan_id=scan.id) + + # Verify the scan was not deleted. + assert DataScan.objects.filter(id=scan.id).exists() + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_list_scans_without_license(enterprise_data_fixture): + user = enterprise_data_fixture.create_user(is_staff=True) + + with pytest.raises(FeaturesNotAvailableError): + DataScannerHandler.list_scans(user=user) + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_list_scans_non_staff(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=False) + + with pytest.raises(PermissionDenied): + DataScannerHandler.list_scans(user=user) + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_get_scan(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + + scan = DataScannerHandler.create_scan( + user=user, name="Get Me", scan_type="pattern", pattern="AA" + ) + + fetched = DataScannerHandler.get_scan(user, scan.id) + assert fetched.id == scan.id + assert fetched.name == "Get Me" + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_get_scan_not_found(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + + with pytest.raises(DataScanDoesNotExist): + DataScannerHandler.get_scan(user, 99999) + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_trigger_scan_without_license(enterprise_data_fixture): + user = enterprise_data_fixture.create_user(is_staff=True) + + with pytest.raises(FeaturesNotAvailableError): + DataScannerHandler.trigger_scan(user=user, scan_id=999) + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_trigger_scan_non_staff(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + admin = enterprise_data_fixture.create_user(is_staff=True) + regular = enterprise_data_fixture.create_user(is_staff=False) + + scan = DataScannerHandler.create_scan( + user=admin, name="Test", scan_type="pattern", pattern="AA" + ) + + with pytest.raises(PermissionDenied): + DataScannerHandler.trigger_scan(user=regular, scan_id=scan.id) + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_trigger_scan_not_found(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + + with pytest.raises(DataScanDoesNotExist): + DataScannerHandler.trigger_scan(user=user, scan_id=99999) + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_trigger_scan_already_running(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + + scan = DataScannerHandler.create_scan( + user=user, + name="Running Scan", + scan_type="pattern", + pattern="AA", + ) + scan.is_running = True + scan.save() + + with pytest.raises(DataScanIsAlreadyRunning): + DataScannerHandler.trigger_scan(user=user, scan_id=scan.id) + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_trigger_scan_dispatches_task(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + + scan = DataScannerHandler.create_scan( + user=user, name="Test", scan_type="pattern", pattern="AA" + ) + + with patch( + "baserow_enterprise.data_scanner.tasks.run_data_scan.delay" + ) as mock_delay: + DataScannerHandler.trigger_scan(user=user, scan_id=scan.id) + mock_delay.assert_called_once_with(scan.id) + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_run_scan_not_found(enterprise_data_fixture): + DataScannerHandler.run_scan(99999) + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_run_pattern_scan(enterprise_data_fixture, populate_search_table): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + table, fields, rows = enterprise_data_fixture.build_table( + user=user, + columns=[("Code", "text")], + rows=[["AB12CD345678901234"], ["not matching"], ["XY99ZZ111111111111"]], + ) + field = fields[0] + + populate_search_table(table, field, rows) + + scan = DataScannerHandler.create_scan( + user=user, + name="Pattern Test", + scan_type="pattern", + pattern="AADDAADDDDDDDDDDDD", + ) + + DataScannerHandler.run_scan(scan.id) + + scan.refresh_from_db() + assert scan.is_running is False + assert scan.last_run_finished_at is not None + assert scan.last_error is None or scan.last_error == "" + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_run_pattern_scan_dutch_iban_with_escaped_literals( + enterprise_data_fixture, populate_search_table +): + """ + Verifies that a pattern with escaped literal characters (e.g. \\N\\L for + the fixed "NL" prefix) correctly matches values in the search table. + PostgreSQL lowercases tsvector tokens, so the regex must match + case-insensitively. + """ + + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + table, fields, rows = enterprise_data_fixture.build_table( + user=user, + columns=[("IBAN", "text")], + rows=[ + ["NL23INGB0007704001"], + ["not an iban"], + ["NL91ABNA0417164300"], + ], + ) + field = fields[0] + + populate_search_table(table, field, rows) + + scan = DataScannerHandler.create_scan( + user=user, + name="Dutch IBAN Scanner", + scan_type="pattern", + pattern="\\N\\LDDAAAADDDDDDDDDD", + ) + + DataScannerHandler.run_scan(scan.id) + + scan.refresh_from_db() + assert scan.is_running is False + assert scan.last_error is None or scan.last_error == "" + + results = list( + scan.results.order_by("row_id").values_list("row_id", "matched_value") + ) + assert len(results) == 2 + # tsvector tokens are lowercased by PostgreSQL + assert results[0][0] == rows[0].id + assert "nl23ingb0007704001" in results[0][1].lower() + assert results[1][0] == rows[2].id + assert "nl91abna0417164300" in results[1][1].lower() + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_run_list_of_values_scan(enterprise_data_fixture, populate_search_table): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + table, fields, rows = enterprise_data_fixture.build_table( + user=user, + columns=[("Name", "text")], + rows=[["secret123"], ["innocent"], ["secret456"]], + ) + field = fields[0] + workspace = table.database.workspace + + populate_search_table(table, field, rows) + + scan = DataScannerHandler.create_scan( + user=user, + name="List of Values Test", + scan_type="list_of_values", + list_items=["secret123"], + scan_all_workspaces=False, + workspace_ids=[workspace.id], + ) + + DataScannerHandler.run_scan(scan.id) + + scan.refresh_from_db() + assert scan.is_running is False + assert scan.last_error is None or scan.last_error == "" + assert scan.results.filter(matched_value="secret123").exists() + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_run_scan_without_license_records_error(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + + scan = DataScannerHandler.create_scan( + user=user, + name="License Test", + scan_type="pattern", + pattern="AA", + ) + + from baserow.core.cache import local_cache + from baserow_premium.license.models import License + + License.objects.all().delete() + local_cache.clear() + + DataScannerHandler.run_scan(scan.id) + + scan.refresh_from_db() + assert scan.is_running is False + assert "Enterprise license no longer active" in scan.last_error + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_run_scan_with_no_search_table(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + workspace = enterprise_data_fixture.create_workspace(user=user) + + scan = DataScannerHandler.create_scan( + user=user, + name="No Search Table", + scan_type="pattern", + pattern="AA", + scan_all_workspaces=False, + workspace_ids=[workspace.id], + ) + + DataScannerHandler.run_scan(scan.id) + + scan.refresh_from_db() + assert scan.is_running is False + assert scan.last_error is None or scan.last_error == "" + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_run_scan_removes_stale_results(enterprise_data_fixture): + """Results from a previous run that are not re-identified get deleted.""" + + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + table, fields, _ = enterprise_data_fixture.build_table( + user=user, columns=[("Name", "text")], rows=[["test"]] + ) + workspace = table.database.workspace + + scan = DataScannerHandler.create_scan( + user=user, + name="Stale Results Test", + scan_type="pattern", + pattern="AA", + scan_all_workspaces=False, + workspace_ids=[workspace.id], + ) + old_time = timezone.now() - timedelta(days=1) + DataScanResult.objects.create( + scan=scan, + table=table, + field=fields[0], + row_id=1, + matched_value="old_match", + first_identified_on=old_time, + last_identified_on=old_time, + ) + assert scan.results.count() == 1 + + DataScannerHandler.run_scan(scan.id) + + assert scan.results.count() == 0 + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_run_scan_removes_result_when_cell_cleared( + enterprise_data_fixture, populate_search_table +): + """ + When a cell that previously matched is emptied, the next scan run should + remove the corresponding result because it is no longer re-identified. + """ + + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + table, fields, rows = enterprise_data_fixture.build_table( + user=user, + columns=[("Name", "text")], + rows=[["secret123"], ["innocent"]], + ) + field = fields[0] + workspace = table.database.workspace + + search_model = populate_search_table(table, field, rows) + + scan = DataScannerHandler.create_scan( + user=user, + name="Cell Cleared Test", + scan_type="list_of_values", + list_items=["secret123"], + scan_all_workspaces=False, + workspace_ids=[workspace.id], + ) + + # First run: the value is present, so we expect a result. + DataScannerHandler.run_scan(scan.id) + scan.refresh_from_db() + assert scan.last_error is None or scan.last_error == "" + assert scan.results.count() == 1 + assert scan.results.filter(matched_value="secret123").exists() + + # Clear the cell and remove the search table entry to simulate the user + # emptying the cell. + model = table.get_model() + row = model.objects.get(id=rows[0].id) + setattr(row, field.db_column, "") + row.save() + search_model.objects.filter(row_id=rows[0].id, field_id=field.id).delete() + + # Second run: the value is gone, so the stale result should be removed. + DataScannerHandler.run_scan(scan.id) + scan.refresh_from_db() + assert scan.last_error is None or scan.last_error == "" + assert scan.results.count() == 0 + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_upsert_result_updates_existing(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + table, fields, _ = enterprise_data_fixture.build_table( + user=user, columns=[("Name", "text")], rows=[["test"]] + ) + field = fields[0] + + scan = DataScannerHandler.create_scan( + user=user, name="Upsert Test", scan_type="pattern", pattern="AA" + ) + t1 = timezone.now() - timedelta(hours=1) + DataScanResult.objects.create( + scan=scan, + table=table, + field=field, + row_id=1, + matched_value="old", + first_identified_on=t1, + last_identified_on=t1, + ) + + t2 = timezone.now() + DataScannerHandler._bulk_upsert_results(scan, [(field.id, 1, "new")], t2, set()) + + result = DataScanResult.objects.get(scan=scan, row_id=1, field=field) + assert result.matched_value == "new" + assert result.first_identified_on == t1 + assert result.last_identified_on == t2 + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_list_table_scan_excludes_source_table( + enterprise_data_fixture, populate_search_table +): + """ + When running a list_table scan, the source table itself must not appear in + the results. + """ + + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + + source_table, source_fields, source_rows = enterprise_data_fixture.build_table( + user=user, + columns=[("Keyword", "text")], + rows=[["secret123"]], + ) + source_field = source_fields[0] + + target_table, target_fields, target_rows = enterprise_data_fixture.build_table( + user=user, + database=source_table.database, + columns=[("Notes", "text")], + rows=[["contains secret123 inside"], ["nothing here"]], + ) + target_field = target_fields[0] + + populate_search_table(source_table, source_field, source_rows) + populate_search_table(target_table, target_field, target_rows) + + workspace = source_table.database.workspace + scan = DataScannerHandler.create_scan( + user=user, + name="List Table Exclusion Test", + scan_type="list_table", + source_table_id=source_table.id, + source_field_id=source_field.id, + scan_all_workspaces=False, + workspace_ids=[workspace.id], + ) + + DataScannerHandler.run_scan(scan.id) + + scan.refresh_from_db() + assert scan.is_running is False + assert scan.last_error is None or scan.last_error == "" + + results = DataScanResult.objects.filter(scan=scan) + assert not results.filter(table=source_table).exists() + assert results.filter(table=target_table).exists() + target_result = results.get(table=target_table) + assert target_result.field_id == target_field.id + assert target_result.matched_value == "secret123" + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_stale_running_scan_reset(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + + scan = DataScannerHandler.create_scan( + user=user, + name="Stale Scan", + scan_type="pattern", + pattern="AA", + frequency="daily", + ) + scan.is_running = True + scan.last_run_started_at = timezone.now() - timedelta(hours=7) + scan.save() + + DataScannerHandler.check_scans_due() + + scan.refresh_from_db() + assert scan.is_running is False + assert scan.last_error == "Scan timed out and was automatically reset" + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_check_scans_due_without_license(enterprise_data_fixture): + """Without a license, stale scans are still reset but no new scans are dispatched.""" + + user = enterprise_data_fixture.create_user(is_staff=True) + + scan = DataScan.objects.create( + name="Scheduled", + scan_type="pattern", + pattern="AA", + frequency="hourly", + created_by=user, + is_running=False, + last_run_started_at=None, + ) + + with patch( + "baserow_enterprise.data_scanner.tasks.run_data_scan.delay" + ) as mock_delay: + DataScannerHandler.check_scans_due() + mock_delay.assert_not_called() + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_check_scans_due_dispatches_scheduled_scan(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + + scan = DataScan.objects.create( + name="Hourly", + scan_type="pattern", + pattern="AA", + frequency="hourly", + created_by=user, + is_running=False, + last_run_started_at=timezone.now() - timedelta(hours=2), + ) + + with patch( + "baserow_enterprise.data_scanner.tasks.run_data_scan.delay" + ) as mock_delay: + DataScannerHandler.check_scans_due() + mock_delay.assert_called_once_with(scan.id) + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_check_scans_due_skips_recently_run_scan(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + + DataScan.objects.create( + name="Recently Run", + scan_type="pattern", + pattern="AA", + frequency="daily", + created_by=user, + is_running=False, + last_run_started_at=timezone.now() - timedelta(hours=1), + ) + + with patch( + "baserow_enterprise.data_scanner.tasks.run_data_scan.delay" + ) as mock_delay: + DataScannerHandler.check_scans_due() + mock_delay.assert_not_called() + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_check_scans_due_skips_manual_scans(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + + DataScan.objects.create( + name="Manual", + scan_type="pattern", + pattern="AA", + frequency="manual", + created_by=user, + is_running=False, + last_run_started_at=None, + ) + + with patch( + "baserow_enterprise.data_scanner.tasks.run_data_scan.delay" + ) as mock_delay: + DataScannerHandler.check_scans_due() + mock_delay.assert_not_called() + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_check_scans_due_dispatches_never_run_scan(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + + scan = DataScan.objects.create( + name="Never Run", + scan_type="pattern", + pattern="AA", + frequency="weekly", + created_by=user, + is_running=False, + last_run_started_at=None, + ) + + with patch( + "baserow_enterprise.data_scanner.tasks.run_data_scan.delay" + ) as mock_delay: + DataScannerHandler.check_scans_due() + mock_delay.assert_called_once_with(scan.id) + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_result_deleted_when_table_deleted(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + table, fields, _ = enterprise_data_fixture.build_table( + user=user, + columns=[("Name", "text")], + rows=[["test"]], + ) + + scan = DataScannerHandler.create_scan( + user=user, + name="Cascade Test", + scan_type="pattern", + pattern="AA", + ) + + DataScanResult.objects.create( + scan=scan, + table=table, + field=fields[0], + row_id=1, + matched_value="test", + first_identified_on=timezone.now(), + last_identified_on=timezone.now(), + ) + + assert DataScanResult.objects.filter(scan=scan).count() == 1 + + table.delete() + + assert DataScanResult.objects.filter(scan=scan).count() == 0 + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_result_deleted_when_field_deleted(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + table, fields, _ = enterprise_data_fixture.build_table( + user=user, + columns=[("Name", "text")], + rows=[["test"]], + ) + field = fields[0] + + scan = DataScannerHandler.create_scan( + user=user, + name="Field Cascade Test", + scan_type="pattern", + pattern="AA", + ) + + DataScanResult.objects.create( + scan=scan, + table=table, + field=field, + row_id=1, + matched_value="test", + first_identified_on=timezone.now(), + last_identified_on=timezone.now(), + ) + + assert DataScanResult.objects.filter(scan=scan).count() == 1 + + field.delete() + + assert DataScanResult.objects.filter(scan=scan).count() == 0 + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_result_deleted_when_scan_deleted(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + table, fields, _ = enterprise_data_fixture.build_table( + user=user, columns=[("Name", "text")], rows=[["test"]] + ) + + scan = DataScannerHandler.create_scan( + user=user, name="Test", scan_type="pattern", pattern="AA" + ) + + DataScanResult.objects.create( + scan=scan, + table=table, + field=fields[0], + row_id=1, + matched_value="test", + first_identified_on=timezone.now(), + last_identified_on=timezone.now(), + ) + + scan_id = scan.id + DataScannerHandler.delete_scan(user=user, scan_id=scan_id) + assert DataScanResult.objects.filter(scan_id=scan_id).count() == 0 + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_source_table_set_null_when_table_deleted(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + table, fields, _ = enterprise_data_fixture.build_table( + user=user, columns=[("Name", "text")], rows=[["test"]] + ) + + scan = DataScannerHandler.create_scan( + user=user, + name="Table Scan", + scan_type="list_table", + source_table_id=table.id, + source_field_id=fields[0].id, + ) + + assert scan.source_table_id == table.id + + table.delete() + + scan.refresh_from_db() + assert scan.source_table is None + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_source_field_set_null_when_field_deleted(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + table, fields, _ = enterprise_data_fixture.build_table( + user=user, columns=[("Name", "text")], rows=[["test"]] + ) + + scan = DataScannerHandler.create_scan( + user=user, + name="Table Scan", + scan_type="list_table", + source_table_id=table.id, + source_field_id=fields[0].id, + ) + + fields[0].delete() + + scan.refresh_from_db() + assert scan.source_field is None + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_run_scan_excludes_trashed_field( + enterprise_data_fixture, populate_search_table +): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + table, fields, rows = enterprise_data_fixture.build_table( + user=user, + columns=[("Name", "text")], + rows=[["secret123"]], + ) + field = fields[0] + populate_search_table(table, field, rows) + + scan = DataScannerHandler.create_scan( + user=user, + name="Trashed Field Test", + scan_type="list_of_values", + list_items=["secret123"], + ) + + field.trashed = True + field.save() + + DataScannerHandler.run_scan(scan.id) + + scan.refresh_from_db() + assert scan.last_error is None or scan.last_error == "" + assert scan.results.count() == 0 + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_run_scan_excludes_trashed_table( + enterprise_data_fixture, populate_search_table +): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + table, fields, rows = enterprise_data_fixture.build_table( + user=user, + columns=[("Name", "text")], + rows=[["secret123"]], + ) + field = fields[0] + populate_search_table(table, field, rows) + + scan = DataScannerHandler.create_scan( + user=user, + name="Trashed Table Test", + scan_type="list_of_values", + list_items=["secret123"], + ) + + table.trashed = True + table.save() + + DataScannerHandler.run_scan(scan.id) + + scan.refresh_from_db() + assert scan.last_error is None or scan.last_error == "" + assert scan.results.count() == 0 + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_run_scan_excludes_trashed_database( + enterprise_data_fixture, populate_search_table +): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + table, fields, rows = enterprise_data_fixture.build_table( + user=user, + columns=[("Name", "text")], + rows=[["secret123"]], + ) + field = fields[0] + populate_search_table(table, field, rows) + + scan = DataScannerHandler.create_scan( + user=user, + name="Trashed Database Test", + scan_type="list_of_values", + list_items=["secret123"], + ) + + database = table.database + database.trashed = True + database.save() + + DataScannerHandler.run_scan(scan.id) + + scan.refresh_from_db() + assert scan.last_error is None or scan.last_error == "" + assert scan.results.count() == 0 + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_run_scan_excludes_trashed_row(enterprise_data_fixture, populate_search_table): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + table, fields, rows = enterprise_data_fixture.build_table( + user=user, + columns=[("Name", "text")], + rows=[["secret123"], ["secret456"]], + ) + field = fields[0] + populate_search_table(table, field, rows) + + scan = DataScannerHandler.create_scan( + user=user, + name="Trashed Row Test", + scan_type="list_of_values", + list_items=["secret123", "secret456"], + ) + + model = table.get_model() + model.objects_and_trash.filter(id=rows[0].id).update(trashed=True) + + DataScannerHandler.run_scan(scan.id) + + scan.refresh_from_db() + assert scan.last_error is None or scan.last_error == "" + # Only the second (non-trashed) row should appear. + assert scan.results.count() == 1 + assert scan.results.filter(row_id=rows[1].id).exists() + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_run_scan_excludes_trashed_workspace( + enterprise_data_fixture, populate_search_table +): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + table, fields, rows = enterprise_data_fixture.build_table( + user=user, + columns=[("Name", "text")], + rows=[["secret123"]], + ) + field = fields[0] + populate_search_table(table, field, rows) + + scan = DataScannerHandler.create_scan( + user=user, + name="Trashed Workspace Test", + scan_type="list_of_values", + list_items=["secret123"], + ) + + workspace = table.database.workspace + workspace.trashed = True + workspace.save() + + DataScannerHandler.run_scan(scan.id) + + scan.refresh_from_db() + assert scan.last_error is None or scan.last_error == "" + assert scan.results.count() == 0 + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_run_pattern_scan_excludes_trashed_field( + enterprise_data_fixture, populate_search_table +): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=True) + table, fields, rows = enterprise_data_fixture.build_table( + user=user, + columns=[("IBAN", "text")], + rows=[["NL23INGB0007704001"]], + ) + field = fields[0] + populate_search_table(table, field, rows) + + scan = DataScannerHandler.create_scan( + user=user, + name="Pattern Trash Test", + scan_type="pattern", + pattern="\\N\\LDDAAAADDDDDDDDDD", + ) + + field.trashed = True + field.save() + + DataScannerHandler.run_scan(scan.id) + + scan.refresh_from_db() + assert scan.last_error is None or scan.last_error == "" + assert scan.results.count() == 0 diff --git a/enterprise/backend/tests/baserow_enterprise_tests/data_scanner/test_data_scanner_notification_types.py b/enterprise/backend/tests/baserow_enterprise_tests/data_scanner/test_data_scanner_notification_types.py new file mode 100644 index 0000000000..1d29734970 --- /dev/null +++ b/enterprise/backend/tests/baserow_enterprise_tests/data_scanner/test_data_scanner_notification_types.py @@ -0,0 +1,277 @@ +from django.test.utils import override_settings + +import pytest + +from baserow.core.notifications.models import Notification, NotificationRecipient +from baserow_enterprise.data_scanner.handler import DataScannerHandler +from baserow_enterprise.data_scanner.models import DataScan +from baserow_enterprise.data_scanner.notification_types import ( + DataScanNewResultsNotificationType, +) + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_notify_instance_admins_creates_notification(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + admin = enterprise_data_fixture.create_user(is_staff=True) + + scan = DataScan.objects.create( + name="Test Scan", scan_type="pattern", pattern="AA", created_by=admin + ) + + recipients = DataScanNewResultsNotificationType.notify_instance_admins(scan, 5) + + assert recipients is not None + assert len(recipients) == 1 + assert recipients[0].recipient == admin + + notification = recipients[0].notification + assert notification.type == "data_scan_new_results" + assert notification.data["scan_id"] == scan.id + assert notification.data["scan_name"] == "Test Scan" + assert notification.data["new_results_count"] == 5 + assert notification.workspace is None + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_notify_instance_admins_sends_to_all_staff(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + admin1 = enterprise_data_fixture.create_user(is_staff=True) + admin2 = enterprise_data_fixture.create_user(is_staff=True) + enterprise_data_fixture.create_user(is_staff=False) + + scan = DataScan.objects.create( + name="Test", scan_type="pattern", pattern="AA", created_by=admin1 + ) + + recipients = DataScanNewResultsNotificationType.notify_instance_admins(scan, 3) + + assert len(recipients) == 2 + recipient_users = {r.recipient_id for r in recipients} + assert recipient_users == {admin1.id, admin2.id} + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_notify_instance_admins_skips_inactive_staff(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + active_admin = enterprise_data_fixture.create_user(is_staff=True) + inactive_admin = enterprise_data_fixture.create_user(is_staff=True) + inactive_admin.is_active = False + inactive_admin.save() + + scan = DataScan.objects.create( + name="Test", scan_type="pattern", pattern="AA", created_by=active_admin + ) + + recipients = DataScanNewResultsNotificationType.notify_instance_admins(scan, 1) + + assert len(recipients) == 1 + assert recipients[0].recipient == active_admin + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_notify_instance_admins_returns_none_when_no_staff(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + user = enterprise_data_fixture.create_user(is_staff=False) + + scan = DataScan.objects.create( + name="Test", scan_type="pattern", pattern="AA", created_by=user + ) + + result = DataScanNewResultsNotificationType.notify_instance_admins(scan, 1) + + assert result is None + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_email_title_singular(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + + notification = Notification( + type="data_scan_new_results", + data={"scan_id": 1, "scan_name": "IBAN Scanner", "new_results_count": 1}, + ) + + title = DataScanNewResultsNotificationType.get_notification_title_for_email( + notification, {} + ) + assert "1 new result found for IBAN Scanner" in title + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_email_title_plural(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + + notification = Notification( + type="data_scan_new_results", + data={"scan_id": 1, "scan_name": "IBAN Scanner", "new_results_count": 10}, + ) + + title = DataScanNewResultsNotificationType.get_notification_title_for_email( + notification, {} + ) + assert "10 new results found for IBAN Scanner" in title + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_email_description(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + + notification = Notification( + type="data_scan_new_results", + data={"scan_id": 1, "scan_name": "IBAN Scanner", "new_results_count": 3}, + ) + + desc = DataScanNewResultsNotificationType.get_notification_description_for_email( + notification, {} + ) + assert "IBAN Scanner" in desc + assert "3 new matches" in desc + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_run_scan_sends_notification_on_new_results( + enterprise_data_fixture, populate_search_table +): + enterprise_data_fixture.enable_enterprise() + admin1 = enterprise_data_fixture.create_user(is_staff=True) + admin2 = enterprise_data_fixture.create_user(is_staff=True) + enterprise_data_fixture.create_user(is_staff=False) + + table, fields, rows = enterprise_data_fixture.build_table( + user=admin1, + columns=[("Name", "text")], + rows=[["secret123"]], + ) + field = fields[0] + workspace = table.database.workspace + + populate_search_table(table, field, rows) + + scan = DataScannerHandler.create_scan( + user=admin1, + name="Secret Scanner", + scan_type="list_of_values", + list_items=["secret123"], + scan_all_workspaces=False, + workspace_ids=[workspace.id], + ) + + DataScannerHandler.run_scan(scan.id) + + scan.refresh_from_db() + assert scan.last_error is None or scan.last_error == "" + + notifications = Notification.objects.filter(type="data_scan_new_results") + assert notifications.count() == 1 + + notification = notifications.first() + assert notification.data["scan_id"] == scan.id + assert notification.data["scan_name"] == "Secret Scanner" + assert notification.data["new_results_count"] > 0 + + admin_recipients = NotificationRecipient.objects.filter(notification=notification) + recipient_ids = set(admin_recipients.values_list("recipient_id", flat=True)) + assert recipient_ids == {admin1.id, admin2.id} + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_run_scan_no_notification_when_no_new_results(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + admin = enterprise_data_fixture.create_user(is_staff=True) + workspace = enterprise_data_fixture.create_workspace(user=admin) + + scan = DataScannerHandler.create_scan( + user=admin, + name="Empty Scanner", + scan_type="pattern", + pattern="AA", + scan_all_workspaces=False, + workspace_ids=[workspace.id], + ) + + DataScannerHandler.run_scan(scan.id) + + assert Notification.objects.filter(type="data_scan_new_results").count() == 0 + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_run_scan_no_notification_when_only_existing_results( + enterprise_data_fixture, populate_search_table +): + enterprise_data_fixture.enable_enterprise() + admin = enterprise_data_fixture.create_user(is_staff=True) + + table, fields, rows = enterprise_data_fixture.build_table( + user=admin, + columns=[("Name", "text")], + rows=[["secret123"]], + ) + field = fields[0] + workspace = table.database.workspace + + populate_search_table(table, field, rows) + + scan = DataScannerHandler.create_scan( + user=admin, + name="Test", + scan_type="list_of_values", + list_items=["secret123"], + scan_all_workspaces=False, + workspace_ids=[workspace.id], + ) + + DataScannerHandler.run_scan(scan.id) + assert Notification.objects.filter(type="data_scan_new_results").count() == 1 + + Notification.objects.all().delete() + + DataScannerHandler.run_scan(scan.id) + assert Notification.objects.filter(type="data_scan_new_results").count() == 0 + + +@pytest.mark.data_scanner +@pytest.mark.django_db +@override_settings(DEBUG=True) +def test_run_scan_no_notification_on_error(enterprise_data_fixture): + enterprise_data_fixture.enable_enterprise() + admin = enterprise_data_fixture.create_user(is_staff=True) + + scan = DataScannerHandler.create_scan( + user=admin, + name="License Test", + scan_type="pattern", + pattern="AA", + ) + + from baserow.core.cache import local_cache + from baserow_premium.license.models import License + + License.objects.all().delete() + local_cache.clear() + + DataScannerHandler.run_scan(scan.id) + + scan.refresh_from_db() + assert scan.last_error is not None + assert Notification.objects.filter(type="data_scan_new_results").count() == 0 diff --git a/enterprise/web-frontend/modules/baserow_enterprise/adminTypes.js b/enterprise/web-frontend/modules/baserow_enterprise/adminTypes.js index 1c9c996baa..7cee5e2d53 100644 --- a/enterprise/web-frontend/modules/baserow_enterprise/adminTypes.js +++ b/enterprise/web-frontend/modules/baserow_enterprise/adminTypes.js @@ -3,6 +3,7 @@ import EnterpriseFeatures from '@baserow_enterprise/features' import PaidFeaturesModal from '@baserow_premium/components/PaidFeaturesModal' import { AuditLogPaidFeature, + DataScannerPaidFeature, SSOPaidFeature, } from '@baserow_enterprise/paidFeatures' @@ -79,3 +80,37 @@ export class AuditLogType extends EnterpriseAdminType { ] } } + +export class DataScannerType extends EnterpriseAdminType { + static getType() { + return 'data-scanner' + } + + getIconClass() { + return 'iconoir-search' + } + + getName() { + const { $i18n } = this.app + return $i18n.t('adminType.DataScanner') + } + + getRouteName() { + return 'admin-data-scanner' + } + + getOrder() { + return 120 + } + + isDeactivated() { + return !this.app.$hasFeature(EnterpriseFeatures.DATA_SCANNER) + } + + getDeactivatedModal() { + return [ + PaidFeaturesModal, + { 'initial-selected-type': DataScannerPaidFeature.getType() }, + ] + } +} diff --git a/enterprise/web-frontend/modules/baserow_enterprise/assets/scss/components/all.scss b/enterprise/web-frontend/modules/baserow_enterprise/assets/scss/components/all.scss index 36ff700ccf..533258413e 100644 --- a/enterprise/web-frontend/modules/baserow_enterprise/assets/scss/components/all.scss +++ b/enterprise/web-frontend/modules/baserow_enterprise/assets/scss/components/all.scss @@ -24,3 +24,4 @@ @import 'assistant'; @import 'assistant_onboarding'; @import 'date_dependency'; +@import 'data_scanner'; diff --git a/enterprise/web-frontend/modules/baserow_enterprise/assets/scss/components/data_scanner.scss b/enterprise/web-frontend/modules/baserow_enterprise/assets/scss/components/data_scanner.scss new file mode 100644 index 0000000000..f1aed2cbef --- /dev/null +++ b/enterprise/web-frontend/modules/baserow_enterprise/assets/scss/components/data_scanner.scss @@ -0,0 +1,52 @@ +.data-scanner__filters { + display: grid; + grid-template-columns: 3fr max-content; + gap: 12px; + padding: 16px 42px 25px; + max-width: 600px; +} + +.data-scanner__clear-filters-button { + margin-top: auto; +} + +.data-scanner__tag-items { + display: flex; + flex-wrap: wrap; + list-style: none; + margin: 0; + padding: 0; +} + +.data-scanner__tag-item { + padding: 0 5px; + margin-bottom: 5px; + background-color: $color-primary-100; + display: flex; + max-width: 100%; + + @include fixed-height(22px, 13px); + @include rounded($rounded); + + &:not(:last-child) { + margin-right: 5px; + } +} + +.data-scanner__tag-name { + @extend %ellipsis; + + max-width: 100%; + color: $color-neutral-900; +} + +.data-scanner__tag-remove { + color: $color-neutral-900; + margin-left: 5px; + font-size: 11px; + padding: 0 2px; + + &:hover { + color: $color-neutral-500; + } +} diff --git a/enterprise/web-frontend/modules/baserow_enterprise/components/admin/dataScanner/DataScanActionsContext.vue b/enterprise/web-frontend/modules/baserow_enterprise/components/admin/dataScanner/DataScanActionsContext.vue new file mode 100644 index 0000000000..0a2653cf93 --- /dev/null +++ b/enterprise/web-frontend/modules/baserow_enterprise/components/admin/dataScanner/DataScanActionsContext.vue @@ -0,0 +1,117 @@ + + + diff --git a/enterprise/web-frontend/modules/baserow_enterprise/components/admin/dataScanner/DataScanForm.vue b/enterprise/web-frontend/modules/baserow_enterprise/components/admin/dataScanner/DataScanForm.vue new file mode 100644 index 0000000000..3914aa3c0a --- /dev/null +++ b/enterprise/web-frontend/modules/baserow_enterprise/components/admin/dataScanner/DataScanForm.vue @@ -0,0 +1,514 @@ + + + diff --git a/enterprise/web-frontend/modules/baserow_enterprise/components/admin/dataScanner/DataScanFrequencyField.vue b/enterprise/web-frontend/modules/baserow_enterprise/components/admin/dataScanner/DataScanFrequencyField.vue new file mode 100644 index 0000000000..82465f7328 --- /dev/null +++ b/enterprise/web-frontend/modules/baserow_enterprise/components/admin/dataScanner/DataScanFrequencyField.vue @@ -0,0 +1,28 @@ + + + diff --git a/enterprise/web-frontend/modules/baserow_enterprise/components/admin/dataScanner/DataScanLastRunField.vue b/enterprise/web-frontend/modules/baserow_enterprise/components/admin/dataScanner/DataScanLastRunField.vue new file mode 100644 index 0000000000..8ea523934f --- /dev/null +++ b/enterprise/web-frontend/modules/baserow_enterprise/components/admin/dataScanner/DataScanLastRunField.vue @@ -0,0 +1,28 @@ + + + diff --git a/enterprise/web-frontend/modules/baserow_enterprise/components/admin/dataScanner/DataScanResolveField.vue b/enterprise/web-frontend/modules/baserow_enterprise/components/admin/dataScanner/DataScanResolveField.vue new file mode 100644 index 0000000000..9f627d01d1 --- /dev/null +++ b/enterprise/web-frontend/modules/baserow_enterprise/components/admin/dataScanner/DataScanResolveField.vue @@ -0,0 +1,56 @@ + + + diff --git a/enterprise/web-frontend/modules/baserow_enterprise/components/admin/dataScanner/DataScanResultsCountField.vue b/enterprise/web-frontend/modules/baserow_enterprise/components/admin/dataScanner/DataScanResultsCountField.vue new file mode 100644 index 0000000000..3e044cae7a --- /dev/null +++ b/enterprise/web-frontend/modules/baserow_enterprise/components/admin/dataScanner/DataScanResultsCountField.vue @@ -0,0 +1,31 @@ + + + diff --git a/enterprise/web-frontend/modules/baserow_enterprise/components/admin/dataScanner/DataScanRowLinkField.vue b/enterprise/web-frontend/modules/baserow_enterprise/components/admin/dataScanner/DataScanRowLinkField.vue new file mode 100644 index 0000000000..d0210a7d6b --- /dev/null +++ b/enterprise/web-frontend/modules/baserow_enterprise/components/admin/dataScanner/DataScanRowLinkField.vue @@ -0,0 +1,46 @@ + + + diff --git a/enterprise/web-frontend/modules/baserow_enterprise/components/admin/dataScanner/DataScanStatusField.vue b/enterprise/web-frontend/modules/baserow_enterprise/components/admin/dataScanner/DataScanStatusField.vue new file mode 100644 index 0000000000..f10d55b630 --- /dev/null +++ b/enterprise/web-frontend/modules/baserow_enterprise/components/admin/dataScanner/DataScanStatusField.vue @@ -0,0 +1,33 @@ + + + diff --git a/enterprise/web-frontend/modules/baserow_enterprise/components/admin/dataScanner/DataScanTypeField.vue b/enterprise/web-frontend/modules/baserow_enterprise/components/admin/dataScanner/DataScanTypeField.vue new file mode 100644 index 0000000000..1980e582a6 --- /dev/null +++ b/enterprise/web-frontend/modules/baserow_enterprise/components/admin/dataScanner/DataScanTypeField.vue @@ -0,0 +1,27 @@ + + + diff --git a/enterprise/web-frontend/modules/baserow_enterprise/components/admin/dataScanner/DataScannerResultsTab.vue b/enterprise/web-frontend/modules/baserow_enterprise/components/admin/dataScanner/DataScannerResultsTab.vue new file mode 100644 index 0000000000..fc8583766c --- /dev/null +++ b/enterprise/web-frontend/modules/baserow_enterprise/components/admin/dataScanner/DataScannerResultsTab.vue @@ -0,0 +1,183 @@ + + + diff --git a/enterprise/web-frontend/modules/baserow_enterprise/components/admin/dataScanner/DataScannerScansTab.vue b/enterprise/web-frontend/modules/baserow_enterprise/components/admin/dataScanner/DataScannerScansTab.vue new file mode 100644 index 0000000000..37a90b7e2e --- /dev/null +++ b/enterprise/web-frontend/modules/baserow_enterprise/components/admin/dataScanner/DataScannerScansTab.vue @@ -0,0 +1,234 @@ + + + diff --git a/enterprise/web-frontend/modules/baserow_enterprise/components/admin/dataScanner/DeleteDataScanModal.vue b/enterprise/web-frontend/modules/baserow_enterprise/components/admin/dataScanner/DeleteDataScanModal.vue new file mode 100644 index 0000000000..f0f44138f7 --- /dev/null +++ b/enterprise/web-frontend/modules/baserow_enterprise/components/admin/dataScanner/DeleteDataScanModal.vue @@ -0,0 +1,66 @@ + + + diff --git a/enterprise/web-frontend/modules/baserow_enterprise/components/admin/forms/DataScanExportForm.vue b/enterprise/web-frontend/modules/baserow_enterprise/components/admin/forms/DataScanExportForm.vue new file mode 100644 index 0000000000..c7dd3049a7 --- /dev/null +++ b/enterprise/web-frontend/modules/baserow_enterprise/components/admin/forms/DataScanExportForm.vue @@ -0,0 +1,28 @@ + + + diff --git a/enterprise/web-frontend/modules/baserow_enterprise/components/admin/modals/AuditLogExportModal.vue b/enterprise/web-frontend/modules/baserow_enterprise/components/admin/modals/AuditLogExportModal.vue index 05cd407e77..de447b0b06 100644 --- a/enterprise/web-frontend/modules/baserow_enterprise/components/admin/modals/AuditLogExportModal.vue +++ b/enterprise/web-frontend/modules/baserow_enterprise/components/admin/modals/AuditLogExportModal.vue @@ -89,7 +89,7 @@ export default { lastFinishedJobs: [], } }, - async fetch() { + async mounted() { this.loading = true const jobs = await AuditLogAdminService(this.$client).getLastExportJobs( MAX_EXPORT_FILES @@ -105,7 +105,6 @@ export default { this.loading = false } }, - fetchOnServer: false, methods: { loadRunningJob() { const runningJob = this.$store.getters['job/getUnfinishedJobs'].find( diff --git a/enterprise/web-frontend/modules/baserow_enterprise/components/admin/modals/DataScanExportModal.vue b/enterprise/web-frontend/modules/baserow_enterprise/components/admin/modals/DataScanExportModal.vue new file mode 100644 index 0000000000..db622d325b --- /dev/null +++ b/enterprise/web-frontend/modules/baserow_enterprise/components/admin/modals/DataScanExportModal.vue @@ -0,0 +1,180 @@ + + + diff --git a/enterprise/web-frontend/modules/baserow_enterprise/components/admin/modals/DataScanModal.vue b/enterprise/web-frontend/modules/baserow_enterprise/components/admin/modals/DataScanModal.vue new file mode 100644 index 0000000000..bf254a36c3 --- /dev/null +++ b/enterprise/web-frontend/modules/baserow_enterprise/components/admin/modals/DataScanModal.vue @@ -0,0 +1,106 @@ + + + diff --git a/enterprise/web-frontend/modules/baserow_enterprise/components/notifications/DataScanNewResultsNotification.vue b/enterprise/web-frontend/modules/baserow_enterprise/components/notifications/DataScanNewResultsNotification.vue new file mode 100644 index 0000000000..25fdd9ce20 --- /dev/null +++ b/enterprise/web-frontend/modules/baserow_enterprise/components/notifications/DataScanNewResultsNotification.vue @@ -0,0 +1,43 @@ + + + diff --git a/enterprise/web-frontend/modules/baserow_enterprise/features.js b/enterprise/web-frontend/modules/baserow_enterprise/features.js index 776e68e59f..03f89b719a 100644 --- a/enterprise/web-frontend/modules/baserow_enterprise/features.js +++ b/enterprise/web-frontend/modules/baserow_enterprise/features.js @@ -13,6 +13,7 @@ const EnterpriseFeatures = { ADVANCED_WEBHOOKS: 'ADVANCED_WEBHOOKS', FIELD_LEVEL_PERMISSIONS: 'FIELD_LEVEL_PERMISSIONS', DATE_DEPENDENCY: 'DATE_DEPENDENCY', + DATA_SCANNER: 'DATA_SCANNER', } export default EnterpriseFeatures diff --git a/enterprise/web-frontend/modules/baserow_enterprise/jobTypes.js b/enterprise/web-frontend/modules/baserow_enterprise/jobTypes.js index 3827e8a2f1..4ad18d44da 100644 --- a/enterprise/web-frontend/modules/baserow_enterprise/jobTypes.js +++ b/enterprise/web-frontend/modules/baserow_enterprise/jobTypes.js @@ -9,3 +9,13 @@ export class AuditLogExportJobType extends JobType { return 'audit_log_export' } } + +export class DataScanResultExportJobType extends JobType { + static getType() { + return 'data_scan_result_export' + } + + getName() { + return 'data_scan_result_export' + } +} diff --git a/enterprise/web-frontend/modules/baserow_enterprise/licenseTypes.js b/enterprise/web-frontend/modules/baserow_enterprise/licenseTypes.js index dcf91db63f..d5f6966f92 100644 --- a/enterprise/web-frontend/modules/baserow_enterprise/licenseTypes.js +++ b/enterprise/web-frontend/modules/baserow_enterprise/licenseTypes.js @@ -104,6 +104,7 @@ export class EnterpriseWithoutSupportLicenseType extends AdvancedLicenseType { return [ ...commonAdvancedFeatures, EnterpriseFeaturesObject.ENTERPRISE_SETTINGS, + EnterpriseFeaturesObject.DATA_SCANNER, ] } diff --git a/enterprise/web-frontend/modules/baserow_enterprise/locales/en.json b/enterprise/web-frontend/modules/baserow_enterprise/locales/en.json index 361e46b3f6..c8c23bc0fe 100644 --- a/enterprise/web-frontend/modules/baserow_enterprise/locales/en.json +++ b/enterprise/web-frontend/modules/baserow_enterprise/locales/en.json @@ -61,7 +61,92 @@ }, "adminType": { "AuditLog": "Audit log", - "Authentication": "Authentication" + "Authentication": "Authentication", + "DataScanner": "Data scanner" + }, + "dataScanner": { + "title": "Data scanner", + "scansTab": "Scans", + "resultsTab": "Results", + "createScan": "Create data scanner", + "editScan": "Edit scan", + "nameColumn": "Name", + "typeColumn": "Type", + "frequencyColumn": "Frequency", + "statusColumn": "Status", + "runningSince": "Running since {time}", + "idle": "Idle", + "neverRun": "Never run", + "runNow": "Run now", + "edit": "Edit", + "delete": "Delete", + "nameLabel": "Name", + "namePlaceholder": "Enter scan name", + "scanTypeLabel": "Scan type", + "patternLabel": "Pattern", + "patternPlaceholder": "e.g. \\N\\LDDAAAADDDDDDDDDD", + "patternHelp": "A = any letter, D = digit, X = any character. Use \\ to escape a literal (e.g. \\N for the letter N). Example: \\N\\LDDAAAADDDDDDDDDD for Dutch IBAN (NL00BANK0000000000).", + "listItemsLabel": "List values", + "listItemsPlaceholder": "Enter one value per line", + "listItemsHelp": "Enter one value per line. Each value will be searched for across all scanned workspaces.", + "sourceTableLabel": "Source table", + "selectWorkspace": "Select workspace", + "selectDatabase": "Select database", + "selectTable": "Select table", + "selectField": "Select field", + "noCompatibleFieldsTitle": "No compatible fields", + "noCompatibleFieldsDescription": "This table has no compatible field types. Only text, URL, email, number, autonumber, phone number, and UUID fields can be used as a source.", + "frequencyLabel": "Frequency", + "frequencyManual": "Manual", + "frequencyHourly": "Hourly", + "frequencyDaily": "Daily", + "frequencyWeekly": "Weekly", + "hourlyWarning": "Hourly scans can put significant load on your Baserow instance.", + "workspaceScopeLabel": "Workspaces to scan", + "scanAllWorkspaces": "Scan all workspaces", + "filterByScan": "Filter by scan", + "allScans": "All scans", + "scanNameColumn": "Scan", + "workspaceColumn": "Workspace", + "databaseColumn": "Database", + "tableColumn": "Table", + "fieldColumn": "Field", + "rowIdColumn": "Row ID", + "matchedValueColumn": "Matched value", + "firstIdentifiedColumn": "First identified", + "clearFilters": "Clear filters", + "addWorkspace": "Add a workspace", + "scanTypePattern": "Pattern", + "scanTypeListOfValues": "List of values", + "scanTypeListTable": "Baserow table", + "lastRunColumn": "Last run", + "resultsCountColumn": "Results", + "viewResults": "View results", + "results": "View {count} result|View {count} results", + "noResults": "No results yet", + "resolveResult": "Resolve", + "resultResolved": "Resolved", + "exportToCsv": "Export to CSV", + "exportModalTitle": "Export results to CSV", + "exportFilename": "data_scan_results_{date}.csv", + "exportFailedTitle": "Export failed", + "exportFailedDescription": "The export job failed. Please try again.", + "exportCancelledTitle": "Export cancelled", + "exportCancelledDescription": "The export job was cancelled.", + "emptyTitle": "No data scans yet", + "emptyDescription": "A data scanner allows you to automatically scan the whole instance or specific workspaces to check if patterns (like IBAN) or predefined text (like medical IDs) are being used anywhere.", + "emptyResultsTitle": "No results found", + "emptyResultsDescription": "There are no scan results yet. Results will appear here once a data scan has been run and matches have been found." + }, + "deleteDataScanModal": { + "title": "Delete data scan", + "confirmation": "Are you sure you want to delete this data scan? All scan results will be permanently deleted.", + "delete": "Delete scan" + }, + "dataScanNewResultsNotification": { + "titleSingular": "{count} new result found for {scanName}", + "titlePlural": "{count} new results found for {scanName}", + "description": "Review the results in the admin data scanner." }, "auditLog": { "adminTitle": "Audit log", @@ -327,7 +412,9 @@ "builderCustomCode": "Custom code for applications", "builderCustomCodeContent": "You can add custom CSS/JavaScript code to further customize the look and behaviour of your application. For example, you can integrate external services like analytics or social media.", "dateDependency": "Date dependency", - "dateDependencyContent": "You can define a dependency between two dates and a duration as start/end date and duration. With date dependency, if one value change, other values will be adjusted accordingly." + "dateDependencyContent": "You can define a dependency between two dates and a duration as start/end date and duration. With date dependency, if one value change, other values will be adjusted accordingly.", + "dataScanner": "Data scanner", + "dataScannerContent": "Scan all data in your Baserow instance for sensitive patterns like IBAN numbers or medical IDs, or match against known value lists. Configure automatic scanning schedules and view detailed results." }, "assistantSidebarItem": { "title": "Kuma AI" diff --git a/enterprise/web-frontend/modules/baserow_enterprise/notificationTypes.js b/enterprise/web-frontend/modules/baserow_enterprise/notificationTypes.js index b9f4311e98..7e674671c2 100644 --- a/enterprise/web-frontend/modules/baserow_enterprise/notificationTypes.js +++ b/enterprise/web-frontend/modules/baserow_enterprise/notificationTypes.js @@ -1,5 +1,6 @@ import { NotificationType } from '@baserow/modules/core/notificationTypes' +import DataScanNewResultsNotification from '@baserow_enterprise/components/notifications/DataScanNewResultsNotification' import PeriodicDataSyncDeactivatedNotification from '@baserow_enterprise/components/notifications/PeriodicDataSyncDeactivatedNotification' import TwoWaySyncUpdateFailedNotification from '@baserow_enterprise/components/notifications/TwoWaySyncUpdateFailedNotification' import TwoWaySyncDeactivatedNotification from '@baserow_enterprise/components/notifications/TwoWaySyncDeactivatedNotification' @@ -81,3 +82,27 @@ export class TwoWaySyncDeactivatedNotificationType extends NotificationType { ) } } + +export class DataScanNewResultsNotificationType extends NotificationType { + static getType() { + return 'data_scan_new_results' + } + + getIconComponent() { + return null + } + + getContentComponent() { + return DataScanNewResultsNotification + } + + getRoute(notificationData) { + return { + name: 'admin-data-scanner-results', + query: { + scan_id: notificationData.scan_id, + scan_name: notificationData.scan_name, + }, + } + } +} diff --git a/enterprise/web-frontend/modules/baserow_enterprise/pages/admin/dataScanner.vue b/enterprise/web-frontend/modules/baserow_enterprise/pages/admin/dataScanner.vue new file mode 100644 index 0000000000..334825a409 --- /dev/null +++ b/enterprise/web-frontend/modules/baserow_enterprise/pages/admin/dataScanner.vue @@ -0,0 +1,53 @@ + + + diff --git a/enterprise/web-frontend/modules/baserow_enterprise/pages/admin/dataScanner/results.vue b/enterprise/web-frontend/modules/baserow_enterprise/pages/admin/dataScanner/results.vue new file mode 100644 index 0000000000..f9ed186624 --- /dev/null +++ b/enterprise/web-frontend/modules/baserow_enterprise/pages/admin/dataScanner/results.vue @@ -0,0 +1,22 @@ + + + diff --git a/enterprise/web-frontend/modules/baserow_enterprise/pages/admin/dataScanner/scans.vue b/enterprise/web-frontend/modules/baserow_enterprise/pages/admin/dataScanner/scans.vue new file mode 100644 index 0000000000..a804288e8d --- /dev/null +++ b/enterprise/web-frontend/modules/baserow_enterprise/pages/admin/dataScanner/scans.vue @@ -0,0 +1,20 @@ + + + diff --git a/enterprise/web-frontend/modules/baserow_enterprise/paidFeatures.js b/enterprise/web-frontend/modules/baserow_enterprise/paidFeatures.js index 58c94d99ad..4fbc4b4013 100644 --- a/enterprise/web-frontend/modules/baserow_enterprise/paidFeatures.js +++ b/enterprise/web-frontend/modules/baserow_enterprise/paidFeatures.js @@ -286,6 +286,32 @@ export class BuilderCustomCodePaidFeature extends PaidFeature { } } +export class DataScannerPaidFeature extends PaidFeature { + static getType() { + return 'data_scanner' + } + + getPlan() { + return 'Enterprise' + } + + getIconClass() { + return 'iconoir-search' + } + + getName() { + return this.app.$i18n.t('enterpriseFeatures.dataScanner') + } + + getImage() { + return '/img/features/data_scanner.png' + } + + getContent() { + return this.app.$i18n.t('enterpriseFeatures.dataScannerContent') + } +} + export class DateDependencyPaidFeature extends PaidFeature { static getType() { return 'date_dependency' diff --git a/enterprise/web-frontend/modules/baserow_enterprise/plugin.js b/enterprise/web-frontend/modules/baserow_enterprise/plugin.js index a4bd79f25d..d738a012ef 100644 --- a/enterprise/web-frontend/modules/baserow_enterprise/plugin.js +++ b/enterprise/web-frontend/modules/baserow_enterprise/plugin.js @@ -1,10 +1,17 @@ -import { AuditLogExportJobType } from '@baserow_enterprise/jobTypes' +import { + AuditLogExportJobType, + DataScanResultExportJobType, +} from '@baserow_enterprise/jobTypes' import { registerRealtimeEvents } from '@baserow_enterprise/realtime' import { RolePermissionManagerType, WriteFieldValuesPermissionManagerType, } from '@baserow_enterprise/permissionManagerTypes' -import { AuditLogType, AuthProvidersType } from '@baserow_enterprise/adminTypes' +import { + AuditLogType, + AuthProvidersType, + DataScannerType, +} from '@baserow_enterprise/adminTypes' import authProviderAdminStore from '@baserow_enterprise/store/authProviderAdmin' import assistantStore from '@baserow_enterprise/store/assistant' import { PasswordAuthProviderType as CorePasswordAuthProviderType } from '@baserow/modules/core/authProviderTypes' @@ -56,6 +63,7 @@ import { } from '@baserow_enterprise/dataSyncTypes' import { PeriodicIntervalFieldsConfigureDataSyncType } from '@baserow_enterprise/configureDataSyncTypes' import { + DataScanNewResultsNotificationType, PeriodicDataSyncDeactivatedNotificationType, TwoWayDataSyncUpdateFiledNotificationType, TwoWaySyncDeactivatedNotificationType, @@ -68,6 +76,7 @@ import { BuilderCustomCodePaidFeature, BuilderFileInputElementPaidFeature, CoBrandingPaidFeature, + DataScannerPaidFeature, DataSyncPaidFeature, DateDependencyPaidFeature, FieldLevelPermissionsPaidFeature, @@ -124,6 +133,7 @@ export default defineNuxtPlugin({ ) $registry.register('admin', new AuditLogType(context)) + $registry.register('admin', new DataScannerType(context)) $registry.register('plugin', new EnterprisePlugin(context)) $registry.register( @@ -137,6 +147,7 @@ export default defineNuxtPlugin({ ) $registry.register('job', new AuditLogExportJobType(context)) + $registry.register('job', new DataScanResultExportJobType(context)) $registry.register('license', new AdvancedLicenseType(context)) $registry.register( @@ -190,6 +201,10 @@ export default defineNuxtPlugin({ 'notification', new TwoWaySyncDeactivatedNotificationType(context) ) + $registry.register( + 'notification', + new DataScanNewResultsNotificationType(context) + ) $registry.register( 'configureDataSync', @@ -219,6 +234,7 @@ export default defineNuxtPlugin({ new BuilderFileInputElementPaidFeature(context) ) + $registry.register('paidFeature', new DataScannerPaidFeature(context)) $registry.register('paidFeature', new DateDependencyPaidFeature(context)) $registry.register( 'timelineFieldRules', diff --git a/enterprise/web-frontend/modules/baserow_enterprise/routes.js b/enterprise/web-frontend/modules/baserow_enterprise/routes.js index 85b527efa8..72bd380c3c 100644 --- a/enterprise/web-frontend/modules/baserow_enterprise/routes.js +++ b/enterprise/web-frontend/modules/baserow_enterprise/routes.js @@ -12,6 +12,24 @@ export const rootChildRoutes = [ path: '/admin/audit-log', file: path.resolve(__dirname, 'pages/auditLog.vue'), }, + { + name: 'admin-data-scanner', + path: '/admin/data-scanner', + redirect: '/admin/data-scanner/scans', + file: path.resolve(__dirname, 'pages/admin/dataScanner.vue'), + children: [ + { + name: 'admin-data-scanner-scans', + path: 'scans', + file: path.resolve(__dirname, 'pages/admin/dataScanner/scans.vue'), + }, + { + name: 'admin-data-scanner-results', + path: 'results', + file: path.resolve(__dirname, 'pages/admin/dataScanner/results.vue'), + }, + ], + }, { name: 'workspace-audit-log', path: '/workspace/:workspaceId/audit-log', diff --git a/enterprise/web-frontend/modules/baserow_enterprise/services/adminWorkspaces.js b/enterprise/web-frontend/modules/baserow_enterprise/services/adminWorkspaces.js new file mode 100644 index 0000000000..309004dd95 --- /dev/null +++ b/enterprise/web-frontend/modules/baserow_enterprise/services/adminWorkspaces.js @@ -0,0 +1 @@ +export const ADMIN_WORKSPACE_OPTIONS_URL = '/admin/workspaces/options/' diff --git a/enterprise/web-frontend/modules/baserow_enterprise/services/auditLog.js b/enterprise/web-frontend/modules/baserow_enterprise/services/auditLog.js index bca3c6b006..4024b664d2 100644 --- a/enterprise/web-frontend/modules/baserow_enterprise/services/auditLog.js +++ b/enterprise/web-frontend/modules/baserow_enterprise/services/auditLog.js @@ -1,5 +1,6 @@ import baseService from '@baserow/modules/core/crudTable/baseService' import jobService from '@baserow/modules/core/services/job' +import { ADMIN_WORKSPACE_OPTIONS_URL } from '@baserow_enterprise/services/adminWorkspaces' export default (client) => { return Object.assign(baseService(client, `/audit-log/`), { @@ -13,10 +14,12 @@ export default (client) => { return userPaginatedService.fetch(usersUrl, page, search, [], filters) }, fetchWorkspaces(page, search) { - const workspacesUrl = `/audit-log/workspaces/` - const workspacePaginatedService = baseService(client, workspacesUrl) + const workspacePaginatedService = baseService( + client, + ADMIN_WORKSPACE_OPTIONS_URL + ) return workspacePaginatedService.fetch( - workspacesUrl, + ADMIN_WORKSPACE_OPTIONS_URL, page, search, [], diff --git a/enterprise/web-frontend/modules/baserow_enterprise/services/dataScanner.js b/enterprise/web-frontend/modules/baserow_enterprise/services/dataScanner.js new file mode 100644 index 0000000000..1394122552 --- /dev/null +++ b/enterprise/web-frontend/modules/baserow_enterprise/services/dataScanner.js @@ -0,0 +1,48 @@ +import baseService from '@baserow/modules/core/crudTable/baseService' +import jobService from '@baserow/modules/core/services/job' + +export const DataScannerScansService = (client) => { + const url = '/admin/data-scanner/scans/' + return Object.assign(baseService(client, url), { + create(data) { + return client.post(url, data) + }, + get(scanId) { + return client.get(`${url}${scanId}/`) + }, + update(scanId, data) { + return client.patch(`${url}${scanId}/`, data) + }, + delete(scanId) { + return client.delete(`${url}${scanId}/`) + }, + trigger(scanId) { + return client.post(`${url}${scanId}/trigger/`) + }, + fetchWorkspaceStructure(workspaceId) { + return client.get( + `/admin/data-scanner/workspace-structure/${workspaceId}/` + ) + }, + }) +} + +export const DataScannerResultsService = (client) => { + return Object.assign(baseService(client, '/admin/data-scanner/results/'), { + startExportCsvJob(data) { + return client.post('/admin/data-scanner/results/export/', data) + }, + async getLastExportJobs(maxCount = 4) { + const { data } = await jobService(client).fetchAll({ + states: ['!failed'], + }) + const jobs = data.jobs || [] + return jobs + .filter((job) => job.type === 'data_scan_result_export') + .slice(0, maxCount) + }, + deleteResult(resultId) { + return client.delete(`/admin/data-scanner/results/${resultId}/`) + }, + }) +} diff --git a/premium/backend/src/baserow_premium/locale/en/LC_MESSAGES/django.po b/premium/backend/src/baserow_premium/locale/en/LC_MESSAGES/django.po index 0a79fbb0e6..0c9632227a 100644 --- a/premium/backend/src/baserow_premium/locale/en/LC_MESSAGES/django.po +++ b/premium/backend/src/baserow_premium/locale/en/LC_MESSAGES/django.po @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: PACKAGE VERSION\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2026-03-16 14:50+0000\n" +"POT-Creation-Date: 2026-03-16 21:52+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" diff --git a/web-frontend/modules/automation/components/AutomationHeader.vue b/web-frontend/modules/automation/components/AutomationHeader.vue index 31ed3cd679..6bc45b5770 100644 --- a/web-frontend/modules/automation/components/AutomationHeader.vue +++ b/web-frontend/modules/automation/components/AutomationHeader.vue @@ -1,7 +1,6 @@