From 5b0f0d18d161633aa94c51042525483565d427fc Mon Sep 17 00:00:00 2001 From: Niro Date: Tue, 16 Sep 2025 14:47:40 +0300 Subject: [PATCH 01/21] feat(providers): add function to get file (copied from file-syncer) --- src/common/interfaces.ts | 2 + src/providers/nfsProvider.ts | 19 ++++++++ src/providers/s3Provider.ts | 45 ++++++++++++++++++- tests/helpers/mockCreator.ts | 1 + tests/helpers/s3Helper.ts | 6 ++- .../integration/providers/nfsProvider.spec.ts | 13 ++++++ .../integration/providers/s3Provider.spec.ts | 22 +++++++++ 7 files changed, 105 insertions(+), 3 deletions(-) diff --git a/src/common/interfaces.ts b/src/common/interfaces.ts index 273d0c1..89581eb 100644 --- a/src/common/interfaces.ts +++ b/src/common/interfaces.ts @@ -29,8 +29,10 @@ export interface DeletePayload { producerName: string; } +// ToDo: merge this class with the identical class in file-syncer export interface Provider { streamModelPathsToQueueFile: (modelId: string, pathToTileset: string, productName: string) => Promise; + getFile: (filePath: string) => Promise; } export interface IngestionJobParameters { diff --git a/src/providers/nfsProvider.ts b/src/providers/nfsProvider.ts index a871e1e..bf8044c 100644 --- a/src/providers/nfsProvider.ts +++ b/src/providers/nfsProvider.ts @@ -9,6 +9,7 @@ import { AppError } from '../common/appError'; import { SERVICES } from '../common/constants'; import { Provider, NFSConfig, LogContext } from '../common/interfaces'; +// ToDo: merge this class with the identical class in file-syncer @injectable() export class NFSProvider implements Provider { private readonly logContext: LogContext; @@ -25,6 +26,24 @@ export class NFSProvider implements Provider { }; } + @withSpanAsyncV4 + public async getFile(filePath: string): Promise { + const logContext = { ...this.logContext, function: this.getFile.name }; + const pvPath = this.config.pvPath; + const fullPath = `${pvPath}/${filePath}`; + this.logger.debug({ + msg: 'Starting getFile', + logContext, + fullPath, + }); + const data = await fs.readFile(fullPath); + this.logger.debug({ + msg: 'Done getFile', + logContext, + }); + return data; + } + @withSpanAsyncV4 public async streamModelPathsToQueueFile(modelId: string, pathToTileset: string, modelName: string): Promise { const logContext = { ...this.logContext, function: this.streamModelPathsToQueueFile.name }; diff --git a/src/providers/s3Provider.ts b/src/providers/s3Provider.ts index d4162d6..ea24e58 100644 --- a/src/providers/s3Provider.ts +++ b/src/providers/s3Provider.ts @@ -1,4 +1,13 @@ -import { CommonPrefix, ListObjectsCommand, ListObjectsRequest, S3Client, S3ClientConfig, S3ServiceException, _Object } from '@aws-sdk/client-s3'; +import { + CommonPrefix, + ListObjectsCommand, + GetObjectCommand, + ListObjectsRequest, + S3Client, + S3ClientConfig, + S3ServiceException, + _Object, +} from '@aws-sdk/client-s3'; import { Logger } from '@map-colonies/js-logger'; import httpStatus from 'http-status-codes'; import { inject, injectable } from 'tsyringe'; @@ -9,6 +18,7 @@ import { AppError } from '../common/appError'; import { SERVICES } from '../common/constants'; import { LogContext, Provider, S3Config } from '../common/interfaces'; +// ToDo: merge this class with the identical class in file-syncer @injectable() export class S3Provider implements Provider { private readonly s3: S3Client; @@ -40,6 +50,39 @@ export class S3Provider implements Provider { }; } + @withSpanAsyncV4 + public async getFile(filePath: string): Promise { + const logContext = { ...this.logContext, function: this.getFile.name }; + this.logger.debug({ + msg: 'Starting to get file', + logContext, + filePath, + }); + + const getObjectCommand = new GetObjectCommand({ + /* eslint-disable @typescript-eslint/naming-convention */ + Bucket: this.s3Config.bucket, + Key: filePath, + /* eslint-disable @typescript-eslint/naming-convention */ + }); + + try { + const response = await this.s3.send(getObjectCommand); + const responseArray = await response.Body?.transformToByteArray(); + return Buffer.from(responseArray as Uint8Array); + } catch (err) { + this.logger.error({ + msg: 'an error occurred during getting file', + err, + endpoint: this.s3Config.endpointUrl, + bucketName: this.s3Config.bucket, + key: filePath, + }); + const s3Error = err as Error; + throw new Error(`an error occurred during the get key ${filePath} on bucket ${this.s3Config.bucket}, ${s3Error.message}`); + } + } + @withSpanAsyncV4 public async streamModelPathsToQueueFile(modelId: string, pathToTileset: string, modelName: string): Promise { const logContext = { ...this.logContext, function: this.streamModelPathsToQueueFile.name }; diff --git a/tests/helpers/mockCreator.ts b/tests/helpers/mockCreator.ts index cdfbceb..b442201 100644 --- a/tests/helpers/mockCreator.ts +++ b/tests/helpers/mockCreator.ts @@ -146,4 +146,5 @@ export const jobManagerClientMock = { export const configProviderMock = { streamModelPathsToQueueFile: jest.fn(), + getFile: jest.fn(), }; diff --git a/tests/helpers/s3Helper.ts b/tests/helpers/s3Helper.ts index 45479ca..6ab9cae 100644 --- a/tests/helpers/s3Helper.ts +++ b/tests/helpers/s3Helper.ts @@ -51,14 +51,16 @@ export class S3Helper { await this.s3.send(command); } - public async createFileOfModel(model: string, file: string): Promise { + public async createFileOfModel(model: string, file: string): Promise { + const data = Buffer.from(faker.word.words()); const params: PutObjectCommandInput = { Bucket: this.s3Config.bucket, Key: `${model}/${file}`, - Body: Buffer.from(faker.word.words()), + Body: data, }; const command = new PutObjectCommand(params); await this.s3.send(command); + return data; } public async clearBucket(bucket = this.s3Config.bucket): Promise { diff --git a/tests/integration/providers/nfsProvider.spec.ts b/tests/integration/providers/nfsProvider.spec.ts index 8362b50..8c54227 100644 --- a/tests/integration/providers/nfsProvider.spec.ts +++ b/tests/integration/providers/nfsProvider.spec.ts @@ -44,6 +44,19 @@ describe('NFSProvider tests', () => { jest.clearAllMocks(); }); + describe('getFile', () => { + it('When calling getFile, should get the file content from pv path', async () => { + const model = faker.word.sample(); + const file = `${faker.word.sample()}.${faker.system.commonFileExt()}`; + const fileContent = await nfsHelper.createFileOfModel(model, file); + + const bufferResult = await provider.getFile(`${model}/${file}`); + const result = bufferResult.toString(); + + expect(result).toStrictEqual(fileContent); + }); + }); + describe('streamModelPathsToQueueFile Function', () => { it('if model exists in the agreed folder, returns all the file paths of the model', async () => { const modelId = faker.string.uuid(); diff --git a/tests/integration/providers/s3Provider.spec.ts b/tests/integration/providers/s3Provider.spec.ts index a42a293..16e2755 100644 --- a/tests/integration/providers/s3Provider.spec.ts +++ b/tests/integration/providers/s3Provider.spec.ts @@ -44,6 +44,28 @@ describe('S3Provider tests', () => { s3Helper.killS3(); }); + describe('getFile', () => { + it(`When calling getFile, should see the file content from source bucket`, async () => { + const model = faker.word.sample(); + const file = `${faker.word.sample()}.${faker.system.commonFileExt()}`; + const expected = await s3Helper.createFileOfModel(model, file); + + const result = await provider.getFile(`${model}/${file}`); + + expect(result).toStrictEqual(expected); + }); + + it(`When the file is not exists in the bucket, throws error`, async () => { + const file = `${faker.word.sample()}.${faker.system.commonFileExt()}`; + + const result = async () => { + await provider.getFile(file); + }; + + await expect(result).rejects.toThrow(Error); + }); + }); + describe('streamModelPathsToQueueFile', () => { it('returns all the files from S3', async () => { const modelId = faker.word.sample(); From 562de30954903533f60dc42933fe4a855f1aac26 Mon Sep 17 00:00:00 2001 From: Niro Date: Tue, 16 Sep 2025 14:47:40 +0300 Subject: [PATCH 02/21] feat(providers): add JSON crawling provider --- package-lock.json | 145 +++++++++++++++++- package.json | 6 +- src/common/constants.ts | 1 + src/common/interfaces.ts | 6 + src/providers/crawlingProvider.ts | 89 +++++++++++ .../providers/crawlingProvider.spec.ts | 137 +++++++++++++++++ 6 files changed, 376 insertions(+), 8 deletions(-) create mode 100644 src/providers/crawlingProvider.ts create mode 100644 tests/integration/providers/crawlingProvider.spec.ts diff --git a/package-lock.json b/package-lock.json index 2fd760a..174723f 100644 --- a/package-lock.json +++ b/package-lock.json @@ -32,6 +32,7 @@ "express": "^4.18.2", "express-openapi-validator": "^5.0.4", "http-status-codes": "^2.2.0", + "jsonpath": "^1.1.1", "n-readlines": "^1.0.1", "prom-client": "^15.1.1", "reflect-metadata": "^0.1.13", @@ -49,6 +50,7 @@ "@types/config": "^3.3.0", "@types/express": "^4.17.17", "@types/jest": "^29.5.2", + "@types/jsonpath": "^0.2.4", "@types/multer": "^1.4.7", "@types/n-readlines": "^1.0.3", "@types/supertest": "^2.0.12", @@ -10071,6 +10073,13 @@ "integrity": "sha512-dRLjCWHYg4oaA77cxO64oO+7JwCwnIzkZPdrrC71jQmQtlhM556pwKo5bUzqvZndkVbeFLIIi+9TC40JNF5hNQ==", "dev": true }, + "node_modules/@types/jsonpath": { + "version": "0.2.4", + "resolved": "https://registry.npmjs.org/@types/jsonpath/-/jsonpath-0.2.4.tgz", + "integrity": "sha512-K3hxB8Blw0qgW6ExKgMbXQv2UPZBoE2GqLpVY+yr7nMD2Pq86lsuIzyAaiQ7eMqFL5B6di6pxSkogLJEyEHoGA==", + "dev": true, + "license": "MIT" + }, "node_modules/@types/keygrip": { "version": "1.0.6", "resolved": "https://registry.npmjs.org/@types/keygrip/-/keygrip-1.0.6.tgz", @@ -13033,8 +13042,7 @@ "node_modules/deep-is": { "version": "0.1.4", "resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.4.tgz", - "integrity": "sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ==", - "dev": true + "integrity": "sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ==" }, "node_modules/deepmerge": { "version": "4.3.1", @@ -13614,6 +13622,87 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/escodegen": { + "version": "1.14.3", + "resolved": "https://registry.npmjs.org/escodegen/-/escodegen-1.14.3.tgz", + "integrity": "sha512-qFcX0XJkdg+PB3xjZZG/wKSuT1PnQWx57+TVSjIMmILd2yC/6ByYElPwJnslDsuWuSAp4AwJGumarAAmJch5Kw==", + "license": "BSD-2-Clause", + "dependencies": { + "esprima": "^4.0.1", + "estraverse": "^4.2.0", + "esutils": "^2.0.2", + "optionator": "^0.8.1" + }, + "bin": { + "escodegen": "bin/escodegen.js", + "esgenerate": "bin/esgenerate.js" + }, + "engines": { + "node": ">=4.0" + }, + "optionalDependencies": { + "source-map": "~0.6.1" + } + }, + "node_modules/escodegen/node_modules/estraverse": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-4.3.0.tgz", + "integrity": "sha512-39nnKffWz8xN1BU/2c79n9nB9HDzo0niYUqx6xyqUnyoAnQyyWpOTdZEeiCch8BBu515t4wp9ZmgVfVhn9EBpw==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=4.0" + } + }, + "node_modules/escodegen/node_modules/levn": { + "version": "0.3.0", + "resolved": "https://registry.npmjs.org/levn/-/levn-0.3.0.tgz", + "integrity": "sha512-0OO4y2iOHix2W6ujICbKIaEQXvFQHue65vUG3pb5EUomzPI90z9hsA1VsO/dbIIpC53J8gxM9Q4Oho0jrCM/yA==", + "license": "MIT", + "dependencies": { + "prelude-ls": "~1.1.2", + "type-check": "~0.3.2" + }, + "engines": { + "node": ">= 0.8.0" + } + }, + "node_modules/escodegen/node_modules/optionator": { + "version": "0.8.3", + "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.8.3.tgz", + "integrity": "sha512-+IW9pACdk3XWmmTXG8m3upGUJst5XRGzxMRjXzAuJ1XnIFNvfhjjIuYkDvysnPQ7qzqVzLt78BCruntqRhWQbA==", + "license": "MIT", + "dependencies": { + "deep-is": "~0.1.3", + "fast-levenshtein": "~2.0.6", + "levn": "~0.3.0", + "prelude-ls": "~1.1.2", + "type-check": "~0.3.2", + "word-wrap": "~1.2.3" + }, + "engines": { + "node": ">= 0.8.0" + } + }, + "node_modules/escodegen/node_modules/prelude-ls": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.1.2.tgz", + "integrity": "sha512-ESF23V4SKG6lVSGZgYNpbsiaAkdab6ZgOxe52p7+Kid3W3u3bxR4Vfd/o21dmN7jSt0IwgZ4v5MUd26FEtXE9w==", + "engines": { + "node": ">= 0.8.0" + } + }, + "node_modules/escodegen/node_modules/type-check": { + "version": "0.3.2", + "resolved": "https://registry.npmjs.org/type-check/-/type-check-0.3.2.tgz", + "integrity": "sha512-ZCmOJdvOWDBYJlzAoFkC+Q0+bUyEOS1ltgp1MGU03fqHG+dbi9tBFU2Rd9QKiDZFAYrhPh2JUf7rZRIuHRKtOg==", + "license": "MIT", + "dependencies": { + "prelude-ls": "~1.1.2" + }, + "engines": { + "node": ">= 0.8.0" + } + }, "node_modules/eslint": { "version": "8.53.0", "resolved": "https://registry.npmjs.org/eslint/-/eslint-8.53.0.tgz", @@ -14546,7 +14635,6 @@ "version": "4.0.1", "resolved": "https://registry.npmjs.org/esprima/-/esprima-4.0.1.tgz", "integrity": "sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==", - "dev": true, "bin": { "esparse": "bin/esparse.js", "esvalidate": "bin/esvalidate.js" @@ -14592,7 +14680,6 @@ "version": "2.0.3", "resolved": "https://registry.npmjs.org/esutils/-/esutils-2.0.3.tgz", "integrity": "sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==", - "dev": true, "engines": { "node": ">=0.10.0" } @@ -14848,8 +14935,7 @@ "node_modules/fast-levenshtein": { "version": "2.0.6", "resolved": "https://registry.npmjs.org/fast-levenshtein/-/fast-levenshtein-2.0.6.tgz", - "integrity": "sha512-DCXu6Ifhqcks7TZKY3Hxp3y6qphY5SJZmrWMDrKcERSOXWQdMhU9Ig/PYrzyw/ul9jOIyh0N4M0tbC5hodg8dw==", - "dev": true + "integrity": "sha512-DCXu6Ifhqcks7TZKY3Hxp3y6qphY5SJZmrWMDrKcERSOXWQdMhU9Ig/PYrzyw/ul9jOIyh0N4M0tbC5hodg8dw==" }, "node_modules/fast-redact": { "version": "3.3.0", @@ -17533,6 +17619,17 @@ "node >= 0.2.0" ] }, + "node_modules/jsonpath": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/jsonpath/-/jsonpath-1.1.1.tgz", + "integrity": "sha512-l6Cg7jRpixfbgoWgkrl77dgEj8RPvND0wMH6TwQmi9Qs4TFfS9u5cUFnbeKTwj5ga5Y3BTGGNI28k117LJ009w==", + "license": "MIT", + "dependencies": { + "esprima": "1.2.2", + "static-eval": "2.0.2", + "underscore": "1.12.1" + } + }, "node_modules/jsonpath-plus": { "version": "10.3.0", "resolved": "https://registry.npmjs.org/jsonpath-plus/-/jsonpath-plus-10.3.0.tgz", @@ -17552,6 +17649,18 @@ "node": ">=18.0.0" } }, + "node_modules/jsonpath/node_modules/esprima": { + "version": "1.2.2", + "resolved": "https://registry.npmjs.org/esprima/-/esprima-1.2.2.tgz", + "integrity": "sha512-+JpPZam9w5DuJ3Q67SqsMGtiHKENSMRVoxvArfJZK01/BfLEObtZ6orJa/MtoGNR/rfMgp5837T41PAmTwAv/A==", + "bin": { + "esparse": "bin/esparse.js", + "esvalidate": "bin/esvalidate.js" + }, + "engines": { + "node": ">=0.4.0" + } + }, "node_modules/jsonpointer": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/jsonpointer/-/jsonpointer-5.0.1.tgz", @@ -21323,6 +21432,15 @@ "node": ">=10" } }, + "node_modules/static-eval": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/static-eval/-/static-eval-2.0.2.tgz", + "integrity": "sha512-N/D219Hcr2bPjLxPiV+TQE++Tsmrady7TqAJugLy7Xk1EumfDWS/f5dtBbkRCGE7wKKXuYockQoj8Rm2/pVKyg==", + "license": "MIT", + "dependencies": { + "escodegen": "^1.8.1" + } + }, "node_modules/statuses": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/statuses/-/statuses-2.0.1.tgz", @@ -22248,6 +22366,12 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/underscore": { + "version": "1.12.1", + "resolved": "https://registry.npmjs.org/underscore/-/underscore-1.12.1.tgz", + "integrity": "sha512-hEQt0+ZLDVUMhebKxL4x1BTtDY7bavVofhZ9KZ4aI26X9SRaE+Y3m83XUL1UP2jn8ynjndwCCpEHdUG+9pP1Tw==", + "license": "MIT" + }, "node_modules/undici": { "version": "6.21.3", "resolved": "https://registry.npmjs.org/undici/-/undici-6.21.3.tgz", @@ -22573,6 +22697,15 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/word-wrap": { + "version": "1.2.5", + "resolved": "https://registry.npmjs.org/word-wrap/-/word-wrap-1.2.5.tgz", + "integrity": "sha512-BN22B5eaMMI9UMtjrGd5g5eCYPpCPDUy0FJXbYsaT5zYxjFOckS53SQDE3pWkVoWpHXVb3BrYcEN4Twa55B5cA==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/wordwrap": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/wordwrap/-/wordwrap-1.0.0.tgz", diff --git a/package.json b/package.json index cdba821..99a9bbc 100644 --- a/package.json +++ b/package.json @@ -48,12 +48,12 @@ "@map-colonies/error-express-handler": "^2.1.0", "@map-colonies/express-access-log-middleware": "^2.0.1", "@map-colonies/js-logger": "^1.0.1", - "@map-colonies/types": "^1.7.0", "@map-colonies/mc-model-types": "^19.0.0", "@map-colonies/mc-priority-queue": "^8.2.1", "@map-colonies/openapi-express-viewer": "^3.0.0", "@map-colonies/read-pkg": "0.0.1", "@map-colonies/telemetry": "^6.1.0", + "@map-colonies/types": "^1.7.0", "@opentelemetry/api": "1.7.0", "@opentelemetry/api-metrics": "0.23.0", "@opentelemetry/context-async-hooks": "^1.24.1", @@ -65,6 +65,7 @@ "express": "^4.18.2", "express-openapi-validator": "^5.0.4", "http-status-codes": "^2.2.0", + "jsonpath": "^1.1.1", "n-readlines": "^1.0.1", "prom-client": "^15.1.1", "reflect-metadata": "^0.1.13", @@ -76,12 +77,13 @@ "@faker-js/faker": "^8.4.1", "@map-colonies/eslint-config": "^4.0.0", "@map-colonies/prettier-config": "0.0.1", - "@redocly/openapi-cli": "^1.0.0-beta.94", "@redocly/cli": "^1.34.3", + "@redocly/openapi-cli": "^1.0.0-beta.94", "@types/compression": "^1.7.2", "@types/config": "^3.3.0", "@types/express": "^4.17.17", "@types/jest": "^29.5.2", + "@types/jsonpath": "^0.2.4", "@types/multer": "^1.4.7", "@types/n-readlines": "^1.0.3", "@types/supertest": "^2.0.12", diff --git a/src/common/constants.ts b/src/common/constants.ts index 38e84f4..464a508 100644 --- a/src/common/constants.ts +++ b/src/common/constants.ts @@ -24,5 +24,6 @@ export const SERVICES: Record = { PROVIDER_CONFIG: Symbol('ProviderConfig'), QUEUE_FILE_HANDLER: Symbol('QueueFileHandler'), JOB_MANAGER_CLIENT: Symbol('JobManagerClient'), + UNDERLYING: Symbol('Underlying'), }; /* eslint-enable @typescript-eslint/naming-convention */ diff --git a/src/common/interfaces.ts b/src/common/interfaces.ts index 89581eb..2a20bef 100644 --- a/src/common/interfaces.ts +++ b/src/common/interfaces.ts @@ -72,6 +72,12 @@ export interface NFSConfig { pvPath: string; } +export interface CrawlingConfig { + extension: string; + nestedJsonPath: string; + ignoreNotFound?: boolean; +} + export type ProviderConfig = S3Config | NFSConfig; export interface JobOperationResponse { diff --git a/src/providers/crawlingProvider.ts b/src/providers/crawlingProvider.ts new file mode 100644 index 0000000..d7a4a9b --- /dev/null +++ b/src/providers/crawlingProvider.ts @@ -0,0 +1,89 @@ +import Path from 'path'; +import { Logger } from '@map-colonies/js-logger'; +import { StatusCodes } from 'http-status-codes'; +import { inject, injectable } from 'tsyringe'; +import { Tracer } from '@opentelemetry/api'; +import { withSpanAsyncV4 } from '@map-colonies/telemetry'; +import jsonpath from 'jsonpath'; +import { QueueFileHandler } from '../handlers/queueFileHandler'; +import { AppError } from '../common/appError'; +import { SERVICES } from '../common/constants'; +import { CrawlingConfig, LogContext, Provider } from '../common/interfaces'; + +@injectable() +export class CrawlingProvider implements Provider { + private readonly logContext: LogContext; + + public constructor( + @inject(SERVICES.LOGGER) protected readonly logger: Logger, + @inject(SERVICES.TRACER) public readonly tracer: Tracer, + @inject(SERVICES.PROVIDER_CONFIG) protected readonly config: CrawlingConfig, + @inject(SERVICES.UNDERLYING) protected readonly underlying: Provider, + @inject(SERVICES.QUEUE_FILE_HANDLER) protected readonly queueFileHandler: QueueFileHandler + ) { + this.logContext = { + fileName: __filename, + class: CrawlingProvider.name, + }; + if (this.underlying instanceof CrawlingProvider) { + throw new AppError(StatusCodes.BAD_REQUEST, `Invalid config in provider: Do not nest crawling providers.`, false); + } + } + + @withSpanAsyncV4 + public async getFile(filePath: string): Promise { + return this.underlying.getFile(filePath); + } + + @withSpanAsyncV4 + public async streamModelPathsToQueueFile(modelId: string, path: string, modelName: string): Promise { + const logContext = { ...this.logContext, function: this.streamModelPathsToQueueFile.name }; + let buffer: Buffer; + try { + buffer = await this.underlying.getFile(path); + } catch (err) { + if (this.config.ignoreNotFound! && err instanceof AppError && err.status === StatusCodes.NOT_FOUND) { + this.logger.warn({ + msg: 'Found a non-existing file, but instructed to ignore. Skipping...', + logContext, + path, + modelId, + modelName, + }); + return 0; + } else { + throw err; + } + } + const fileContent = buffer.toString(); + let file: object = {}; + try { + file = JSON.parse(fileContent) as object; + } catch (err) { + if (err instanceof SyntaxError) { + this.logger.error({ + msg: 'File is not a valid JSON', + logContext, + path, + modelId, + modelName, + }); + throw new AppError(StatusCodes.NOT_ACCEPTABLE, 'File is not a valid JSON', false); + } else { + throw err; + } + } + + const nestedFiles = jsonpath.query(file, this.config.nestedJsonPath).map((child: string) => Path.resolve('/', Path.dirname(path), child)); + const leafs = nestedFiles.filter((path) => !path.endsWith(this.config.extension)); + const addedFilePromises = [...leafs, path].map(async (path) => { + await this.queueFileHandler.writeFileNameToQueueFile(modelId, path); + return 1; + }); + + const children = nestedFiles.filter((path) => path.endsWith(this.config.extension)); + const countPromises = children.map(async (path) => this.streamModelPathsToQueueFile(modelId, path, modelName)); + const counts = await Promise.all([...countPromises, ...addedFilePromises]); + return counts.reduce((a, b) => a + b); + } +} diff --git a/tests/integration/providers/crawlingProvider.spec.ts b/tests/integration/providers/crawlingProvider.spec.ts new file mode 100644 index 0000000..898f02e --- /dev/null +++ b/tests/integration/providers/crawlingProvider.spec.ts @@ -0,0 +1,137 @@ +import fs from 'fs'; +import os from 'os'; +import jsLogger, { Logger } from '@map-colonies/js-logger'; +import { container } from 'tsyringe'; +import { faker } from '@faker-js/faker'; +import { StatusCodes } from 'http-status-codes'; +import { Tracer } from '@opentelemetry/api'; +import { getApp } from '../../../src/app'; +import { SERVICES } from '../../../src/common/constants'; +import { CrawlingConfig } from '../../../src/common/interfaces'; +import { QueueFileHandler } from '../../../src/handlers/queueFileHandler'; +import { CrawlingProvider } from '../../../src/providers/crawlingProvider'; +import { configProviderMock } from '../../helpers/mockCreator'; +import { AppError } from '../../../src/common/appError'; + +// ToDo those are UNIT tests, NOT INTEGRATION!! But CI requires integration coverage +describe('CrawlingProvider tests', () => { + let provider: CrawlingProvider; + let queueFileHandler: QueueFileHandler; + const logger: Logger = jsLogger({ enabled: false }); + + const underlying = configProviderMock; + const queueFilePath = os.tmpdir(); + const config: CrawlingConfig = { + extension: '.json', + nestedJsonPath: '$.root..uri', + ignoreNotFound: false, + }; + + beforeAll(() => { + getApp({ + override: [ + { token: SERVICES.LOGGER, provider: { useValue: logger } }, + { token: SERVICES.PROVIDER_CONFIG, provider: { useValue: config } }, + { token: SERVICES.UNDERLYING, provider: { useValue: underlying } }, + ], + }); + provider = container.resolve(CrawlingProvider); + queueFileHandler = container.resolve(QueueFileHandler); + }); + + afterAll(function () { + container.reset(); + }); + + afterEach(() => { + jest.clearAllMocks(); + }); + + describe('constructor', () => { + it('is a stupid test just because coverage fails CI', () => { + const tracer = container.resolve(SERVICES.TRACER); + const provider = new CrawlingProvider(logger, tracer, config, underlying, queueFileHandler); + expect(() => new CrawlingProvider(logger, tracer, config, provider, queueFileHandler)).toThrow(AppError); + }); + }); + + describe('getFile', () => { + it('should delegate', async () => { + const filePath = 'A test??'; + const buffetPromise = Promise.resolve(Buffer.from([80, 101, 114, 114, 121, 32, 116, 104, 101, 32, 116, 101, 115, 116, 63, 33, 63, 33])); + underlying.getFile.mockResolvedValueOnce(buffetPromise); + const file = await provider.getFile(filePath); + expect(underlying.getFile).toHaveBeenCalledWith(filePath); + expect(file.toString()).toBe('Perry the test?!?!'); + }); + }); + + describe('streamModelPathsToQueueFile', () => { + const json0 = { + root: { + content: { uri: 'a.b3dm', boundingVolume: { region: [0] } }, + children: [ + { boundingVolume: { region: [0, 1, 2, 3, 4, 5] }, geometricError: 0, content: { uri: 'b.b3dm' } }, + { boundingVolume: { region: [0, 1, 2, 3, 4, 5] }, geometricError: 0, content: { uri: '../1.json' }, children: [] }, + ], + }, + }; + const json1 = { root: { content: { uri: 'bla/c.b3dm' }, children: [{ content: { uri: '2.json' } }] } }; + const json2 = {}; + const pathToTileset = '/x/y/0.json'; + + it('should returns all the files from S3', async () => { + const modelName = faker.word.sample(); + const modelId = faker.string.uuid(); + + underlying.getFile + .mockImplementationOnce((path) => { + return path === pathToTileset && Buffer.from(JSON.stringify(json0), 'utf8'); + }) + .mockImplementationOnce((path) => { + return path === '/x/1.json' && Buffer.from(JSON.stringify(json1), 'utf8'); + }) + .mockImplementationOnce((path) => { + return path === '/x/2.json' && Buffer.from(JSON.stringify(json2), 'utf8'); + }); + + const expected: string[] = ['/x/y/0.json', '/x/1.json', '/x/2.json', '/x/y/a.b3dm', '/x/y/b.b3dm', '/x/bla/c.b3dm']; + await queueFileHandler.createQueueFile(modelId); + + await provider.streamModelPathsToQueueFile(modelId, pathToTileset, modelName); + const result = fs.readFileSync(`${queueFilePath}/${modelId}`, 'utf-8').trimEnd().split('\n'); + + expect(result.sort().join('\n')).toBe(expected.sort().join('\n')); + await queueFileHandler.deleteQueueFile(modelId); + }); + + it('should respect 404 ignore rules error on underlying.getFile error', async () => { + const configWithIgnoreNotFound = { ...config, ignoreNotFound: true }; + const provider = new CrawlingProvider(logger, container.resolve(SERVICES.TRACER), configWithIgnoreNotFound, underlying, queueFileHandler); + underlying.getFile.mockRejectedValueOnce(new AppError(StatusCodes.NOT_FOUND, 'blabla', false)); + const modelName = faker.word.sample(); + const modelId = faker.string.uuid(); + + const result = provider.streamModelPathsToQueueFile(modelId, pathToTileset, modelName); + await expect(result).resolves.not.toThrow(); + }); + + it('should throw error on underlying.getFile error', async () => { + underlying.getFile.mockRejectedValueOnce(new AppError(StatusCodes.NOT_FOUND, 'blabla', false)); + const modelName = faker.word.sample(); + const modelId = faker.string.uuid(); + + const result = provider.streamModelPathsToQueueFile(modelId, pathToTileset, modelName); + await expect(result).rejects.toThrow(AppError); + }); + + it('should throw error bad file', async () => { + underlying.getFile.mockReturnValueOnce(Buffer.from('}{', 'utf8')); + const modelName = faker.word.sample(); + const modelId = faker.string.uuid(); + + const result = provider.streamModelPathsToQueueFile(modelId, pathToTileset, modelName); + await expect(result).rejects.toThrow(AppError); + }); + }); +}); From b11b25066ba7776a1b7d224c0121d20b20d0fbfa Mon Sep 17 00:00:00 2001 From: Niro Date: Tue, 16 Sep 2025 14:47:40 +0300 Subject: [PATCH 03/21] feat(providers): add crawling provider loading --- config/default.json | 6 +++ config/test.json | 6 +++ src/common/interfaces.ts | 3 +- src/containerConfig.ts | 9 +--- src/providers/getProvider.ts | 37 +++++++++++++---- .../jobOperationsController.spec.ts | 4 +- .../integration/providers/getProvider.spec.ts | 3 +- tests/unit/providers/getProvider.spec.ts | 41 +++++++++++++++++++ 8 files changed, 90 insertions(+), 19 deletions(-) create mode 100644 tests/unit/providers/getProvider.spec.ts diff --git a/config/default.json b/config/default.json index 4f2f48e..6b0633d 100644 --- a/config/default.json +++ b/config/default.json @@ -46,6 +46,12 @@ "NFS": { "pvPath": "/Path/To/Models" }, + "crawling": { + "extension": ".json", + "nestedJsonPath": "$.root..uri", + "ignoreNotFound": true, + "underlying": "NFS" + }, "ingestion": { "provider": "NFS", "blackList": ["tar", "zip", "rar", "7z"] diff --git a/config/test.json b/config/test.json index 81766b3..7008d53 100644 --- a/config/test.json +++ b/config/test.json @@ -1,4 +1,10 @@ { + "crawling": { + "extension": ".json", + "nestedJsonPath": "$.root..uri", + "ignoreNotFound": true, + "underlying": "NFS" + }, "S3": { "accessKeyId": "minioadmin", "secretAccessKey": "minioadmin", diff --git a/src/common/interfaces.ts b/src/common/interfaces.ts index 2a20bef..4bfa8c6 100644 --- a/src/common/interfaces.ts +++ b/src/common/interfaces.ts @@ -76,9 +76,10 @@ export interface CrawlingConfig { extension: string; nestedJsonPath: string; ignoreNotFound?: boolean; + underlying?: string; } -export type ProviderConfig = S3Config | NFSConfig; +export type ProviderConfig = S3Config | NFSConfig | CrawlingConfig; export interface JobOperationResponse { jobId: string; diff --git a/src/containerConfig.ts b/src/containerConfig.ts index 5a88d99..35f93ac 100644 --- a/src/containerConfig.ts +++ b/src/containerConfig.ts @@ -7,7 +7,6 @@ import jsLogger, { LoggerOptions } from '@map-colonies/js-logger'; import client from 'prom-client'; import { JobManagerClient } from '@map-colonies/mc-priority-queue'; import { SERVICES, SERVICE_NAME } from './common/constants'; -import { Provider, ProviderConfig } from './common/interfaces'; import { tracing } from './common/tracing'; import { jobOperationsRouterFactory, JOB_OPERATIONS_ROUTER_SYMBOL } from './jobOperations/routes/jobOperationsRouter'; import { InjectionObject, registerDependencies } from './common/dependencyRegistration'; @@ -62,18 +61,14 @@ export const registerExternalValues = (options?: RegisterOptions): DependencyCon { token: SERVICES.PROVIDER_CONFIG, provider: { - useFactory: (): ProviderConfig => { - return getProviderConfig(provider); - }, + useFactory: (container) => getProviderConfig(container), }, }, { token: SERVICES.QUEUE_FILE_HANDLER, provider: { useClass: QueueFileHandler } }, { token: SERVICES.PROVIDER, provider: { - useFactory: (): Provider => { - return getProvider(provider); - }, + useFactory: (container) => getProvider(provider, container), }, }, { diff --git a/src/providers/getProvider.ts b/src/providers/getProvider.ts index 01d796f..58dba51 100644 --- a/src/providers/getProvider.ts +++ b/src/providers/getProvider.ts @@ -1,27 +1,48 @@ import config from 'config'; import httpStatus from 'http-status-codes'; -import { container } from 'tsyringe'; +import { DependencyContainer } from 'tsyringe'; import { AppError } from '../common/appError'; -import { ProviderConfig } from '../common/interfaces'; +import { CrawlingConfig, Provider, ProviderConfig } from '../common/interfaces'; +import { SERVICES } from '../common/constants'; import { NFSProvider } from './nfsProvider'; import { S3Provider } from './s3Provider'; +import { CrawlingProvider } from './crawlingProvider'; -function getProvider(provider: string): S3Provider | NFSProvider { +const PROVIDER_CONFIG = Symbol('ProviderConfig'); +function getProvider(provider: string, container: DependencyContainer): Provider { + const childContainer = container.createChildContainer(); + childContainer.register(PROVIDER_CONFIG, { useValue: provider }); switch (provider.toLowerCase()) { case 'nfs': - return container.resolve(NFSProvider); + return childContainer.resolve(NFSProvider); case 's3': - return container.resolve(S3Provider); + return childContainer.resolve(S3Provider); + case 'crawling': { + const underlying = childContainer.resolve(SERVICES.PROVIDER_CONFIG).underlying!; + childContainer.register(SERVICES.UNDERLYING, { + useFactory: (childContainer) => getProvider(underlying, childContainer), + }); + return childContainer.resolve(CrawlingProvider); + } default: - throw new AppError(httpStatus.INTERNAL_SERVER_ERROR, `Invalid config provider received: ${provider} - available values: "nfs" or "s3"`, false); + throw new AppError( + httpStatus.INTERNAL_SERVER_ERROR, + `Invalid config provider received: ${provider}. Consult documentation for available values`, + false + ); } } -function getProviderConfig(provider: string): ProviderConfig { +function getProviderConfig(container: string | DependencyContainer): ProviderConfig { + const provider = typeof container == 'string' ? container : container.resolve(PROVIDER_CONFIG); try { return config.get(provider); } catch (err) { - throw new AppError(httpStatus.INTERNAL_SERVER_ERROR, `Invalid config provider received: ${provider} - available values: "nfs" or "s3"`, false); + throw new AppError( + httpStatus.INTERNAL_SERVER_ERROR, + `Invalid config provider received: ${provider}. Consult documentation for available values`, + false + ); } } diff --git a/tests/integration/jobOperations/controllers/jobOperationsController.spec.ts b/tests/integration/jobOperations/controllers/jobOperationsController.spec.ts index 219b7c7..14d7190 100644 --- a/tests/integration/jobOperations/controllers/jobOperationsController.spec.ts +++ b/tests/integration/jobOperations/controllers/jobOperationsController.spec.ts @@ -28,7 +28,7 @@ describe('JobOperationsController on S3', function () { token: SERVICES.PROVIDER, provider: { useFactory: (): Provider => { - return getProvider('s3'); + return getProvider('S3', container); }, }, }, @@ -189,7 +189,7 @@ describe('IngestionController on NFS', function () { token: SERVICES.PROVIDER, provider: { useFactory: (): Provider => { - return getProvider('nfs'); + return getProvider('NFS', container); }, }, }, diff --git a/tests/integration/providers/getProvider.spec.ts b/tests/integration/providers/getProvider.spec.ts index f4fcebc..d0a6f0b 100644 --- a/tests/integration/providers/getProvider.spec.ts +++ b/tests/integration/providers/getProvider.spec.ts @@ -1,4 +1,5 @@ import config from 'config'; +import { container } from 'tsyringe'; import { AppError } from '../../../src/common/appError'; import { NFSConfig, S3Config } from '../../../src/common/interfaces'; import { getProvider, getProviderConfig } from '../../../src/providers/getProvider'; @@ -35,7 +36,7 @@ describe('getProvider tests', () => { it('should throw an error when the provider is nor S3 or NFS', () => { const provider = 'bla'; - const response = () => getProvider(provider); + const response = () => getProvider(provider, container); expect(response).toThrow(AppError); }); diff --git a/tests/unit/providers/getProvider.spec.ts b/tests/unit/providers/getProvider.spec.ts new file mode 100644 index 0000000..35f3600 --- /dev/null +++ b/tests/unit/providers/getProvider.spec.ts @@ -0,0 +1,41 @@ +import config from 'config'; +import jsLogger from '@map-colonies/js-logger'; +import { container } from 'tsyringe'; +import { getProvider } from '../../../src/providers/getProvider'; +import { CrawlingProvider } from '../../../src/providers/crawlingProvider'; +import { NFSProvider } from '../../../src/providers/nfsProvider'; +import { getApp } from '../../../src/app'; +import { SERVICES } from '../../../src/common/constants'; + +describe('getProvider tests', () => { + beforeAll(() => { + getApp({ + override: [ + { token: SERVICES.LOGGER, provider: { useValue: jsLogger({ enabled: false }) } }, + { token: SERVICES.PROVIDER, provider: { useFactory: (container) => getProvider('crawling', container) } }, + ], + }); + }); + + afterAll(function () { + container.reset(); + }); + + afterEach(() => { + jest.clearAllMocks(); + }); + + it('should recursively load provider', () => { + const provider = getProvider('crawling', container); + expect(provider).toBeInstanceOf(CrawlingProvider); + const crawlingProviderInstance = provider as CrawlingProvider; + // @ts-expect-error Accessing protected member + expect(crawlingProviderInstance.config).toEqual(config.get('crawling')); + // @ts-expect-error Accessing protected member + expect(crawlingProviderInstance.underlying).toBeInstanceOf(NFSProvider); + // @ts-expect-error Accessing protected member + const underlying = crawlingProviderInstance.underlying as NFSProvider; + // @ts-expect-error Accessing protected member + expect(underlying.config).toEqual(config.get('NFS')); + }); +}); From f91272d783611159f344286dd9aacd78c865a99c Mon Sep 17 00:00:00 2001 From: TULCHINSKI LIRAN Date: Wed, 29 Apr 2026 17:01:55 +0300 Subject: [PATCH 04/21] refactor: removed blacklist and updated crawling --- config/custom-environment-variables.json | 3 +- config/default.json | 5 +-- config/test.json | 5 +-- helm/templates/configmap.yaml | 1 - helm/values.yaml | 1 - .../crawlingInstance.ts} | 21 +++++----- .../models/jobOperationsManager.ts | 29 +++----------- src/providers/getProvider.ts | 38 ++++++++++++++----- .../providers/crawlingProvider.spec.ts | 25 ++++++------ tests/unit/providers/getProvider.spec.ts | 17 ++------- 10 files changed, 64 insertions(+), 81 deletions(-) rename src/{providers/crawlingProvider.ts => handlers/crawlingInstance.ts} (79%) diff --git a/config/custom-environment-variables.json b/config/custom-environment-variables.json index 701712e..9f28889 100644 --- a/config/custom-environment-variables.json +++ b/config/custom-environment-variables.json @@ -69,8 +69,7 @@ "pvPath": "PV_SOURCE_PATH" }, "ingestion": { - "provider": "PROVIDER_FROM", - "blackList": "BLACK_LIST" + "provider": "PROVIDER_FROM" }, "jobManager": { "url": "JOB_MANAGER_URL", diff --git a/config/default.json b/config/default.json index 6b0633d..60287ef 100644 --- a/config/default.json +++ b/config/default.json @@ -48,13 +48,12 @@ }, "crawling": { "extension": ".json", - "nestedJsonPath": "$.root..uri", + "nestedJsonPath": "$.root..['uri', 'url']", "ignoreNotFound": true, "underlying": "NFS" }, "ingestion": { - "provider": "NFS", - "blackList": ["tar", "zip", "rar", "7z"] + "provider": "NFS" }, "jobManager": { "url": "http://127.0.0.1:8080", diff --git a/config/test.json b/config/test.json index 7008d53..94ced98 100644 --- a/config/test.json +++ b/config/test.json @@ -1,7 +1,7 @@ { "crawling": { "extension": ".json", - "nestedJsonPath": "$.root..uri", + "nestedJsonPath": "$.root..['uri', 'url']", "ignoreNotFound": true, "underlying": "NFS" }, @@ -19,8 +19,7 @@ "pvPath": "./tests/helpers/3DModels" }, "ingestion": { - "provider": "S3", - "blackList": ["tar", "zip", "rar", "7z"] + "provider": "S3" }, "jobManager": { "url": "http://127.0.0.1:8080", diff --git a/helm/templates/configmap.yaml b/helm/templates/configmap.yaml index 9b53e28..caae132 100644 --- a/helm/templates/configmap.yaml +++ b/helm/templates/configmap.yaml @@ -26,7 +26,6 @@ data: REQUESTS_CA_BUNDLE: {{ printf "%s/%s" $ca.path $ca.key | quote }} NODE_EXTRA_CA_CERTS: {{ printf "%s/%s" $ca.path $ca.key | quote }} {{- end }} - BLACK_LIST: {{ .Values.env.blackList | quote }} MAX_CONCURRENCY: {{ .Values.env.maxConcurrency | quote }} {{ if eq $provider "S3" }} {{- $S3 := (include "merged.S3" . ) | fromYaml }} diff --git a/helm/values.yaml b/helm/values.yaml index a260449..bd62d7f 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -130,7 +130,6 @@ env: metrics: enabled: false url: '' - blackList: ["tar", "zip", "rar", "7z"] maxConcurrency: 5 resources: diff --git a/src/providers/crawlingProvider.ts b/src/handlers/crawlingInstance.ts similarity index 79% rename from src/providers/crawlingProvider.ts rename to src/handlers/crawlingInstance.ts index d7a4a9b..724020e 100644 --- a/src/providers/crawlingProvider.ts +++ b/src/handlers/crawlingInstance.ts @@ -1,31 +1,28 @@ import Path from 'path'; import { Logger } from '@map-colonies/js-logger'; import { StatusCodes } from 'http-status-codes'; -import { inject, injectable } from 'tsyringe'; import { Tracer } from '@opentelemetry/api'; import { withSpanAsyncV4 } from '@map-colonies/telemetry'; import jsonpath from 'jsonpath'; -import { QueueFileHandler } from '../handlers/queueFileHandler'; import { AppError } from '../common/appError'; -import { SERVICES } from '../common/constants'; import { CrawlingConfig, LogContext, Provider } from '../common/interfaces'; +import { QueueFileHandler } from './queueFileHandler'; -@injectable() -export class CrawlingProvider implements Provider { +export class CrawlingInstance implements Provider { private readonly logContext: LogContext; public constructor( - @inject(SERVICES.LOGGER) protected readonly logger: Logger, - @inject(SERVICES.TRACER) public readonly tracer: Tracer, - @inject(SERVICES.PROVIDER_CONFIG) protected readonly config: CrawlingConfig, - @inject(SERVICES.UNDERLYING) protected readonly underlying: Provider, - @inject(SERVICES.QUEUE_FILE_HANDLER) protected readonly queueFileHandler: QueueFileHandler + private readonly logger: Logger, + public readonly tracer: Tracer, + public readonly config: CrawlingConfig, + private readonly underlying: Provider, + private readonly queueFileHandler: QueueFileHandler ) { this.logContext = { fileName: __filename, - class: CrawlingProvider.name, + class: CrawlingInstance.name, }; - if (this.underlying instanceof CrawlingProvider) { + if (this.underlying instanceof CrawlingInstance) { throw new AppError(StatusCodes.BAD_REQUEST, `Invalid config in provider: Do not nest crawling providers.`, false); } } diff --git a/src/jobOperations/models/jobOperationsManager.ts b/src/jobOperations/models/jobOperationsManager.ts index f273a25..24134d2 100644 --- a/src/jobOperations/models/jobOperationsManager.ts +++ b/src/jobOperations/models/jobOperationsManager.ts @@ -268,27 +268,17 @@ export class JobOperationsManager { @withSpanV4 private createTasks(batchSize: number, modelId: string): ICreateTaskBody[] { - const logContext = { ...this.logContext, function: this.createTasks.name }; const tasks: ICreateTaskBody[] = []; let chunk: string[] = []; let data: string | null = this.queueFileHandler.readline(modelId); while (data !== null) { - if (this.isFileInBlackList(data)) { - this.logger.warn({ - msg: 'The file is is the black list! Ignored...', - logContext, - file: data, - modelId, - }); - } else { - chunk.push(data); - - if (chunk.length === batchSize) { - const task = this.buildTaskFromChunk(chunk, modelId); - tasks.push(task); - chunk = []; - } + chunk.push(data); + + if (chunk.length === batchSize) { + const task = this.buildTaskFromChunk(chunk, modelId); + tasks.push(task); + chunk = []; } data = this.queueFileHandler.readline(modelId); @@ -314,11 +304,4 @@ export class JobOperationsManager { const parameters: IngestionTaskParameters = { paths: chunk, modelId, lastIndexError: -1 }; return { type: INGESTION_TASK_TYPE, parameters }; } - - private isFileInBlackList(data: string): boolean { - const blackList = this.config.get('ingestion.blackList'); - // eslint-disable-next-line @typescript-eslint/no-magic-numbers - const fileExtension = data.split('.').slice(-1)[0]; - return blackList.includes(fileExtension); - } } diff --git a/src/providers/getProvider.ts b/src/providers/getProvider.ts index 58dba51..d41a053 100644 --- a/src/providers/getProvider.ts +++ b/src/providers/getProvider.ts @@ -1,29 +1,26 @@ import config from 'config'; import httpStatus from 'http-status-codes'; import { DependencyContainer } from 'tsyringe'; +import { CrawlingInstance } from '../handlers/crawlingInstance'; import { AppError } from '../common/appError'; import { CrawlingConfig, Provider, ProviderConfig } from '../common/interfaces'; import { SERVICES } from '../common/constants'; import { NFSProvider } from './nfsProvider'; import { S3Provider } from './s3Provider'; -import { CrawlingProvider } from './crawlingProvider'; const PROVIDER_CONFIG = Symbol('ProviderConfig'); function getProvider(provider: string, container: DependencyContainer): Provider { const childContainer = container.createChildContainer(); childContainer.register(PROVIDER_CONFIG, { useValue: provider }); + + let BaseProvider: Provider; switch (provider.toLowerCase()) { case 'nfs': - return childContainer.resolve(NFSProvider); + BaseProvider = childContainer.resolve(NFSProvider); + break; case 's3': - return childContainer.resolve(S3Provider); - case 'crawling': { - const underlying = childContainer.resolve(SERVICES.PROVIDER_CONFIG).underlying!; - childContainer.register(SERVICES.UNDERLYING, { - useFactory: (childContainer) => getProvider(underlying, childContainer), - }); - return childContainer.resolve(CrawlingProvider); - } + BaseProvider = childContainer.resolve(S3Provider); + break; default: throw new AppError( httpStatus.INTERNAL_SERVER_ERROR, @@ -31,6 +28,27 @@ function getProvider(provider: string, container: DependencyContainer): Provider false ); } + + try { + const crawlingConfig = config.get('crawling'); + if (typeof crawlingConfig.underlying === 'string' && crawlingConfig.underlying.toLowerCase() === provider.toLowerCase()) { + return new CrawlingInstance( + childContainer.resolve(SERVICES.LOGGER), + childContainer.resolve(SERVICES.TRACER), + crawlingConfig, + BaseProvider, + childContainer.resolve(SERVICES.QUEUE_FILE_HANDLER) + ); + } + } catch (err) { + throw new AppError( + httpStatus.INTERNAL_SERVER_ERROR, + `Failed while configuring crawling, Consult documentation for available values`, + false + ); + } + + return BaseProvider; } function getProviderConfig(container: string | DependencyContainer): ProviderConfig { diff --git a/tests/integration/providers/crawlingProvider.spec.ts b/tests/integration/providers/crawlingProvider.spec.ts index 898f02e..63c429d 100644 --- a/tests/integration/providers/crawlingProvider.spec.ts +++ b/tests/integration/providers/crawlingProvider.spec.ts @@ -9,13 +9,13 @@ import { getApp } from '../../../src/app'; import { SERVICES } from '../../../src/common/constants'; import { CrawlingConfig } from '../../../src/common/interfaces'; import { QueueFileHandler } from '../../../src/handlers/queueFileHandler'; -import { CrawlingProvider } from '../../../src/providers/crawlingProvider'; +import { CrawlingInstance } from '../../../src/handlers/crawlingInstance'; import { configProviderMock } from '../../helpers/mockCreator'; import { AppError } from '../../../src/common/appError'; // ToDo those are UNIT tests, NOT INTEGRATION!! But CI requires integration coverage -describe('CrawlingProvider tests', () => { - let provider: CrawlingProvider; +describe('CrawlingInstance tests', () => { + let crawler: CrawlingInstance; let queueFileHandler: QueueFileHandler; const logger: Logger = jsLogger({ enabled: false }); @@ -35,8 +35,9 @@ describe('CrawlingProvider tests', () => { { token: SERVICES.UNDERLYING, provider: { useValue: underlying } }, ], }); - provider = container.resolve(CrawlingProvider); queueFileHandler = container.resolve(QueueFileHandler); + const tracer = container.resolve(SERVICES.TRACER); + crawler = new CrawlingInstance(logger, tracer, config, underlying, queueFileHandler); }); afterAll(function () { @@ -50,8 +51,8 @@ describe('CrawlingProvider tests', () => { describe('constructor', () => { it('is a stupid test just because coverage fails CI', () => { const tracer = container.resolve(SERVICES.TRACER); - const provider = new CrawlingProvider(logger, tracer, config, underlying, queueFileHandler); - expect(() => new CrawlingProvider(logger, tracer, config, provider, queueFileHandler)).toThrow(AppError); + const provider = new CrawlingInstance(logger, tracer, config, underlying, queueFileHandler); + expect(() => new CrawlingInstance(logger, tracer, config, provider, queueFileHandler)).toThrow(AppError); }); }); @@ -60,7 +61,7 @@ describe('CrawlingProvider tests', () => { const filePath = 'A test??'; const buffetPromise = Promise.resolve(Buffer.from([80, 101, 114, 114, 121, 32, 116, 104, 101, 32, 116, 101, 115, 116, 63, 33, 63, 33])); underlying.getFile.mockResolvedValueOnce(buffetPromise); - const file = await provider.getFile(filePath); + const file = await crawler.getFile(filePath); expect(underlying.getFile).toHaveBeenCalledWith(filePath); expect(file.toString()).toBe('Perry the test?!?!'); }); @@ -98,7 +99,7 @@ describe('CrawlingProvider tests', () => { const expected: string[] = ['/x/y/0.json', '/x/1.json', '/x/2.json', '/x/y/a.b3dm', '/x/y/b.b3dm', '/x/bla/c.b3dm']; await queueFileHandler.createQueueFile(modelId); - await provider.streamModelPathsToQueueFile(modelId, pathToTileset, modelName); + await crawler.streamModelPathsToQueueFile(modelId, pathToTileset, modelName); const result = fs.readFileSync(`${queueFilePath}/${modelId}`, 'utf-8').trimEnd().split('\n'); expect(result.sort().join('\n')).toBe(expected.sort().join('\n')); @@ -107,12 +108,12 @@ describe('CrawlingProvider tests', () => { it('should respect 404 ignore rules error on underlying.getFile error', async () => { const configWithIgnoreNotFound = { ...config, ignoreNotFound: true }; - const provider = new CrawlingProvider(logger, container.resolve(SERVICES.TRACER), configWithIgnoreNotFound, underlying, queueFileHandler); + const crawler = new CrawlingInstance(logger, container.resolve(SERVICES.TRACER), configWithIgnoreNotFound, underlying, queueFileHandler); underlying.getFile.mockRejectedValueOnce(new AppError(StatusCodes.NOT_FOUND, 'blabla', false)); const modelName = faker.word.sample(); const modelId = faker.string.uuid(); - const result = provider.streamModelPathsToQueueFile(modelId, pathToTileset, modelName); + const result = crawler.streamModelPathsToQueueFile(modelId, pathToTileset, modelName); await expect(result).resolves.not.toThrow(); }); @@ -121,7 +122,7 @@ describe('CrawlingProvider tests', () => { const modelName = faker.word.sample(); const modelId = faker.string.uuid(); - const result = provider.streamModelPathsToQueueFile(modelId, pathToTileset, modelName); + const result = crawler.streamModelPathsToQueueFile(modelId, pathToTileset, modelName); await expect(result).rejects.toThrow(AppError); }); @@ -130,7 +131,7 @@ describe('CrawlingProvider tests', () => { const modelName = faker.word.sample(); const modelId = faker.string.uuid(); - const result = provider.streamModelPathsToQueueFile(modelId, pathToTileset, modelName); + const result = crawler.streamModelPathsToQueueFile(modelId, pathToTileset, modelName); await expect(result).rejects.toThrow(AppError); }); }); diff --git a/tests/unit/providers/getProvider.spec.ts b/tests/unit/providers/getProvider.spec.ts index 35f3600..4159109 100644 --- a/tests/unit/providers/getProvider.spec.ts +++ b/tests/unit/providers/getProvider.spec.ts @@ -1,9 +1,7 @@ -import config from 'config'; import jsLogger from '@map-colonies/js-logger'; import { container } from 'tsyringe'; import { getProvider } from '../../../src/providers/getProvider'; -import { CrawlingProvider } from '../../../src/providers/crawlingProvider'; -import { NFSProvider } from '../../../src/providers/nfsProvider'; +import { CrawlingInstance } from '../../../src/handlers/crawlingInstance'; import { getApp } from '../../../src/app'; import { SERVICES } from '../../../src/common/constants'; @@ -26,16 +24,7 @@ describe('getProvider tests', () => { }); it('should recursively load provider', () => { - const provider = getProvider('crawling', container); - expect(provider).toBeInstanceOf(CrawlingProvider); - const crawlingProviderInstance = provider as CrawlingProvider; - // @ts-expect-error Accessing protected member - expect(crawlingProviderInstance.config).toEqual(config.get('crawling')); - // @ts-expect-error Accessing protected member - expect(crawlingProviderInstance.underlying).toBeInstanceOf(NFSProvider); - // @ts-expect-error Accessing protected member - const underlying = crawlingProviderInstance.underlying as NFSProvider; - // @ts-expect-error Accessing protected member - expect(underlying.config).toEqual(config.get('NFS')); + const provider = getProvider('nfs', container); + expect(provider).toBeInstanceOf(CrawlingInstance); }); }); From 8d8540e9b93774c40b7149c9f94490bd748c8f94 Mon Sep 17 00:00:00 2001 From: TULCHINSKI LIRAN Date: Tue, 5 May 2026 16:56:25 +0300 Subject: [PATCH 05/21] refactor: removed underlying and made crawling abstracted --- config/default.json | 3 +- config/test.json | 3 +- src/common/constants.ts | 1 - src/common/interfaces.ts | 5 +- src/handlers/crawlingInstance.ts | 86 ------------ src/providers/Crawling.ts | 112 +++++++++++++++ src/providers/getProvider.ts | 39 +----- src/providers/nfsProvider.ts | 103 ++++---------- src/providers/s3Provider.ts | 131 ++---------------- tests/helpers/nfsHelper.ts | 12 +- .../providers/crawlingProvider.spec.ts | 26 ++-- .../integration/providers/nfsProvider.spec.ts | 34 +++-- tests/unit/providers/getProvider.spec.ts | 6 +- 13 files changed, 198 insertions(+), 363 deletions(-) delete mode 100644 src/handlers/crawlingInstance.ts create mode 100644 src/providers/Crawling.ts diff --git a/config/default.json b/config/default.json index 60287ef..b7198e8 100644 --- a/config/default.json +++ b/config/default.json @@ -49,8 +49,7 @@ "crawling": { "extension": ".json", "nestedJsonPath": "$.root..['uri', 'url']", - "ignoreNotFound": true, - "underlying": "NFS" + "ignoreNotFound": true }, "ingestion": { "provider": "NFS" diff --git a/config/test.json b/config/test.json index 94ced98..a383e8b 100644 --- a/config/test.json +++ b/config/test.json @@ -2,8 +2,7 @@ "crawling": { "extension": ".json", "nestedJsonPath": "$.root..['uri', 'url']", - "ignoreNotFound": true, - "underlying": "NFS" + "ignoreNotFound": true }, "S3": { "accessKeyId": "minioadmin", diff --git a/src/common/constants.ts b/src/common/constants.ts index 464a508..38e84f4 100644 --- a/src/common/constants.ts +++ b/src/common/constants.ts @@ -24,6 +24,5 @@ export const SERVICES: Record = { PROVIDER_CONFIG: Symbol('ProviderConfig'), QUEUE_FILE_HANDLER: Symbol('QueueFileHandler'), JOB_MANAGER_CLIENT: Symbol('JobManagerClient'), - UNDERLYING: Symbol('Underlying'), }; /* eslint-enable @typescript-eslint/naming-convention */ diff --git a/src/common/interfaces.ts b/src/common/interfaces.ts index 4bfa8c6..34d08b3 100644 --- a/src/common/interfaces.ts +++ b/src/common/interfaces.ts @@ -58,7 +58,7 @@ export interface DeleteTaskParameters { blockDuplication?: boolean; } -export interface S3Config { +export interface S3Config extends CrawlingConfig { accessKeyId: string; secretAccessKey: string; endpointUrl: string; @@ -68,7 +68,7 @@ export interface S3Config { forcePathStyle: boolean; } -export interface NFSConfig { +export interface NFSConfig extends CrawlingConfig { pvPath: string; } @@ -76,7 +76,6 @@ export interface CrawlingConfig { extension: string; nestedJsonPath: string; ignoreNotFound?: boolean; - underlying?: string; } export type ProviderConfig = S3Config | NFSConfig | CrawlingConfig; diff --git a/src/handlers/crawlingInstance.ts b/src/handlers/crawlingInstance.ts deleted file mode 100644 index 724020e..0000000 --- a/src/handlers/crawlingInstance.ts +++ /dev/null @@ -1,86 +0,0 @@ -import Path from 'path'; -import { Logger } from '@map-colonies/js-logger'; -import { StatusCodes } from 'http-status-codes'; -import { Tracer } from '@opentelemetry/api'; -import { withSpanAsyncV4 } from '@map-colonies/telemetry'; -import jsonpath from 'jsonpath'; -import { AppError } from '../common/appError'; -import { CrawlingConfig, LogContext, Provider } from '../common/interfaces'; -import { QueueFileHandler } from './queueFileHandler'; - -export class CrawlingInstance implements Provider { - private readonly logContext: LogContext; - - public constructor( - private readonly logger: Logger, - public readonly tracer: Tracer, - public readonly config: CrawlingConfig, - private readonly underlying: Provider, - private readonly queueFileHandler: QueueFileHandler - ) { - this.logContext = { - fileName: __filename, - class: CrawlingInstance.name, - }; - if (this.underlying instanceof CrawlingInstance) { - throw new AppError(StatusCodes.BAD_REQUEST, `Invalid config in provider: Do not nest crawling providers.`, false); - } - } - - @withSpanAsyncV4 - public async getFile(filePath: string): Promise { - return this.underlying.getFile(filePath); - } - - @withSpanAsyncV4 - public async streamModelPathsToQueueFile(modelId: string, path: string, modelName: string): Promise { - const logContext = { ...this.logContext, function: this.streamModelPathsToQueueFile.name }; - let buffer: Buffer; - try { - buffer = await this.underlying.getFile(path); - } catch (err) { - if (this.config.ignoreNotFound! && err instanceof AppError && err.status === StatusCodes.NOT_FOUND) { - this.logger.warn({ - msg: 'Found a non-existing file, but instructed to ignore. Skipping...', - logContext, - path, - modelId, - modelName, - }); - return 0; - } else { - throw err; - } - } - const fileContent = buffer.toString(); - let file: object = {}; - try { - file = JSON.parse(fileContent) as object; - } catch (err) { - if (err instanceof SyntaxError) { - this.logger.error({ - msg: 'File is not a valid JSON', - logContext, - path, - modelId, - modelName, - }); - throw new AppError(StatusCodes.NOT_ACCEPTABLE, 'File is not a valid JSON', false); - } else { - throw err; - } - } - - const nestedFiles = jsonpath.query(file, this.config.nestedJsonPath).map((child: string) => Path.resolve('/', Path.dirname(path), child)); - const leafs = nestedFiles.filter((path) => !path.endsWith(this.config.extension)); - const addedFilePromises = [...leafs, path].map(async (path) => { - await this.queueFileHandler.writeFileNameToQueueFile(modelId, path); - return 1; - }); - - const children = nestedFiles.filter((path) => path.endsWith(this.config.extension)); - const countPromises = children.map(async (path) => this.streamModelPathsToQueueFile(modelId, path, modelName)); - const counts = await Promise.all([...countPromises, ...addedFilePromises]); - return counts.reduce((a, b) => a + b); - } -} diff --git a/src/providers/Crawling.ts b/src/providers/Crawling.ts new file mode 100644 index 0000000..7f7cf49 --- /dev/null +++ b/src/providers/Crawling.ts @@ -0,0 +1,112 @@ +import Path from 'path'; +import { Logger } from '@map-colonies/js-logger'; +import { StatusCodes } from 'http-status-codes'; +import { Tracer } from '@opentelemetry/api'; +import { withSpanAsyncV4 } from '@map-colonies/telemetry'; +import jsonpath from 'jsonpath'; +import { AppError } from '../common/appError'; +import { CrawlingConfig, LogContext, Provider } from '../common/interfaces'; +import { QueueFileHandler } from '../handlers/queueFileHandler'; + +export abstract class Crawling implements Provider { + protected readonly logContext: LogContext; + + public constructor( + protected readonly logger: Logger, + public readonly tracer: Tracer, + protected readonly config: T, + protected readonly queueFileHandler: QueueFileHandler + ) { + this.logContext = { + fileName: __filename, + class: Crawling.name, + }; + } + + @withSpanAsyncV4 + public async streamModelPathsToQueueFile(modelId: string, pathToTileset: string, modelName: string): Promise { + const logContext = { ...this.logContext, function: this.streamModelPathsToQueueFile.name }; + + this.logger.info({ + msg: 'Started streaming model paths to queue file', + logContext, + modelName, + modelId, + pathToTileset, + }); + + const visitedFiles = new Set(); + const processingQueue: string[] = [pathToTileset]; + let totalFilesAdded = 0; + + while (processingQueue.length > 0) { + const currentPath = processingQueue.shift()!; + + if (visitedFiles.has(currentPath)) { + continue; + } + visitedFiles.add(currentPath); + + try { + const buffer = await this.getFile(currentPath); + + await this.queueFileHandler.writeFileNameToQueueFile(modelId, currentPath); + totalFilesAdded++; + + if (currentPath.endsWith(this.config.extension)) { + const nestedPaths = this.extractPathsFromJson(buffer, currentPath); + + for (const nestedPath of nestedPaths) { + if (nestedPath.endsWith(this.config.extension)) { + processingQueue.push(nestedPath); + } else if (!visitedFiles.has(nestedPath)) { + await this.queueFileHandler.writeFileNameToQueueFile(modelId, nestedPath); + visitedFiles.add(nestedPath); + totalFilesAdded++; + } + } + } + } catch (err) { + if (this.config.ignoreNotFound! && err instanceof AppError && err.status === StatusCodes.NOT_FOUND) { + this.logger.warn({ msg: 'File not found, skipping...', logContext, path: currentPath, modelName }); + continue; + } + + this.logger.error({ + msg: 'Failed to stream model paths to queue file', + logContext, + modelName, + modelId, + path: currentPath, + err, + }); + throw err; + } + } + + this.logger.info({ + msg: 'Finished streaming model paths to queue file', + logContext, + modelName, + modelId, + totalFilesAdded, + }); + + return totalFilesAdded; + } + + private extractPathsFromJson(buffer: Buffer, currentPath: string): string[] { + try { + const fileContent = buffer.toString(); + const json = JSON.parse(fileContent) as object; + const results = jsonpath.query(json, this.config.nestedJsonPath) as string[]; + + return results.map((child) => Path.resolve('/', Path.dirname(currentPath), child)); + } catch (err) { + this.logger.error({ msg: 'Failed to parse JSON', path: currentPath, err }); + return []; + } + } + + public abstract getFile(filePath: string): Promise; +} diff --git a/src/providers/getProvider.ts b/src/providers/getProvider.ts index d41a053..4e16b74 100644 --- a/src/providers/getProvider.ts +++ b/src/providers/getProvider.ts @@ -1,10 +1,8 @@ import config from 'config'; import httpStatus from 'http-status-codes'; import { DependencyContainer } from 'tsyringe'; -import { CrawlingInstance } from '../handlers/crawlingInstance'; import { AppError } from '../common/appError'; -import { CrawlingConfig, Provider, ProviderConfig } from '../common/interfaces'; -import { SERVICES } from '../common/constants'; +import { Provider, ProviderConfig } from '../common/interfaces'; import { NFSProvider } from './nfsProvider'; import { S3Provider } from './s3Provider'; @@ -12,43 +10,14 @@ const PROVIDER_CONFIG = Symbol('ProviderConfig'); function getProvider(provider: string, container: DependencyContainer): Provider { const childContainer = container.createChildContainer(); childContainer.register(PROVIDER_CONFIG, { useValue: provider }); - - let BaseProvider: Provider; switch (provider.toLowerCase()) { case 'nfs': - BaseProvider = childContainer.resolve(NFSProvider); - break; + return childContainer.resolve(NFSProvider); case 's3': - BaseProvider = childContainer.resolve(S3Provider); - break; + return childContainer.resolve(S3Provider); default: - throw new AppError( - httpStatus.INTERNAL_SERVER_ERROR, - `Invalid config provider received: ${provider}. Consult documentation for available values`, - false - ); + throw new AppError(httpStatus.INTERNAL_SERVER_ERROR, `Invalid config provider received: ${provider} - available values: "nfs" or "s3"`, false); } - - try { - const crawlingConfig = config.get('crawling'); - if (typeof crawlingConfig.underlying === 'string' && crawlingConfig.underlying.toLowerCase() === provider.toLowerCase()) { - return new CrawlingInstance( - childContainer.resolve(SERVICES.LOGGER), - childContainer.resolve(SERVICES.TRACER), - crawlingConfig, - BaseProvider, - childContainer.resolve(SERVICES.QUEUE_FILE_HANDLER) - ); - } - } catch (err) { - throw new AppError( - httpStatus.INTERNAL_SERVER_ERROR, - `Failed while configuring crawling, Consult documentation for available values`, - false - ); - } - - return BaseProvider; } function getProviderConfig(container: string | DependencyContainer): ProviderConfig { diff --git a/src/providers/nfsProvider.ts b/src/providers/nfsProvider.ts index bf8044c..bfdbf91 100644 --- a/src/providers/nfsProvider.ts +++ b/src/providers/nfsProvider.ts @@ -1,18 +1,20 @@ import fs from 'fs/promises'; -import { Logger } from '@map-colonies/js-logger'; +import Path from 'path'; import httpStatus from 'http-status-codes'; import { inject, injectable } from 'tsyringe'; +import { Logger } from '@map-colonies/js-logger'; import { Tracer } from '@opentelemetry/api'; import { withSpanAsyncV4 } from '@map-colonies/telemetry'; import { QueueFileHandler } from '../handlers/queueFileHandler'; -import { AppError } from '../common/appError'; import { SERVICES } from '../common/constants'; -import { Provider, NFSConfig, LogContext } from '../common/interfaces'; +import { NFSConfig, LogContext } from '../common/interfaces'; +import { AppError } from '../common/appError'; +import { Crawling } from './Crawling'; -// ToDo: merge this class with the identical class in file-syncer @injectable() -export class NFSProvider implements Provider { - private readonly logContext: LogContext; +export class NFSProvider extends Crawling { + protected override readonly logContext: LogContext; + private readonly pvPath: string; public constructor( @inject(SERVICES.LOGGER) protected readonly logger: Logger, @@ -20,91 +22,40 @@ export class NFSProvider implements Provider { @inject(SERVICES.PROVIDER_CONFIG) protected readonly config: NFSConfig, @inject(SERVICES.QUEUE_FILE_HANDLER) protected readonly queueFileHandler: QueueFileHandler ) { + super(logger, tracer, config, queueFileHandler); + this.logContext = { fileName: __filename, class: NFSProvider.name, }; + this.pvPath = this.config.pvPath; } @withSpanAsyncV4 - public async getFile(filePath: string): Promise { + public override async getFile(filePath: string): Promise { const logContext = { ...this.logContext, function: this.getFile.name }; - const pvPath = this.config.pvPath; - const fullPath = `${pvPath}/${filePath}`; this.logger.debug({ - msg: 'Starting getFile', + msg: 'Starting to get file', logContext, - fullPath, + filePath, }); - const data = await fs.readFile(fullPath); - this.logger.debug({ - msg: 'Done getFile', - logContext, - }); - return data; - } - @withSpanAsyncV4 - public async streamModelPathsToQueueFile(modelId: string, pathToTileset: string, modelName: string): Promise { - const logContext = { ...this.logContext, function: this.streamModelPathsToQueueFile.name }; - let filesCount = 0; - const modelPath = `${this.config.pvPath}/${pathToTileset}`; + const fullPath = Path.join(this.pvPath, filePath); + try { - await fs.access(modelPath); + const data = await fs.readFile(fullPath); + return data; } catch (err) { - this.logger.error({ - msg: 'failed to access the folder', - logContext, - modelId, - modelName, - err, - }); - throw new AppError(httpStatus.NOT_FOUND, `Model ${modelName} doesn't exists in the agreed folder. Path: ${modelPath}`, true); - } - - const folders: string[] = [pathToTileset]; - - while (folders.length > 0) { - const files = await fs.readdir(`${this.config.pvPath}/${folders[0]}`); - this.logger.debug({ - msg: 'Listing folder', - logContext, - folder: folders[0], - filesCount, - modelId, - modelName, - }); - for (const file of files) { - const fileStats = await fs.stat(`${this.config.pvPath}/${folders[0]}/${file}`); - if (fileStats.isDirectory()) { - folders.push(`${folders[0]}/${file}`); - } else { - try { - await this.queueFileHandler.writeFileNameToQueueFile(modelId, `${folders[0]}/${file}`); - filesCount++; - } catch (err) { - this.logger.error({ - msg: `Didn't write the file: '${folders[0]}/${file}' in FS.`, - logContext, - modelId, - modelName, - err, - }); - throw new AppError(httpStatus.INTERNAL_SERVER_ERROR, 'problem with queueFileHandler', false); - } - } + const error = err as NodeJS.ErrnoException; + + if (error.code === 'ENOENT') { + throw new AppError(httpStatus.NOT_FOUND, `File ${filePath} not found`, true); } - - folders.shift(); + if (error.code === 'EISDIR') { + throw new AppError(httpStatus.BAD_REQUEST, `${filePath} is a directory, expected a file`, true); + } + + throw new AppError(httpStatus.INTERNAL_SERVER_ERROR, `Error reading file: ${error.message}`, true); } - - this.logger.info({ - msg: 'Finished listing the files', - logContext, - filesCount: filesCount, - modelName, - modelId, - }); - return filesCount; } } diff --git a/src/providers/s3Provider.ts b/src/providers/s3Provider.ts index ea24e58..a407728 100644 --- a/src/providers/s3Provider.ts +++ b/src/providers/s3Provider.ts @@ -1,29 +1,21 @@ import { - CommonPrefix, - ListObjectsCommand, GetObjectCommand, - ListObjectsRequest, S3Client, S3ClientConfig, - S3ServiceException, - _Object, } from '@aws-sdk/client-s3'; import { Logger } from '@map-colonies/js-logger'; -import httpStatus from 'http-status-codes'; import { inject, injectable } from 'tsyringe'; import { Tracer } from '@opentelemetry/api'; -import { withSpanAsyncV4, withSpanV4 } from '@map-colonies/telemetry'; +import { withSpanAsyncV4 } from '@map-colonies/telemetry'; import { QueueFileHandler } from '../handlers/queueFileHandler'; -import { AppError } from '../common/appError'; import { SERVICES } from '../common/constants'; -import { LogContext, Provider, S3Config } from '../common/interfaces'; +import { LogContext, S3Config } from '../common/interfaces'; +import { Crawling } from './Crawling'; -// ToDo: merge this class with the identical class in file-syncer @injectable() -export class S3Provider implements Provider { +export class S3Provider extends Crawling { + protected override readonly logContext: LogContext; private readonly s3: S3Client; - private filesCount: number; - private readonly logContext: LogContext; public constructor( @inject(SERVICES.LOGGER) protected readonly logger: Logger, @@ -31,6 +23,8 @@ export class S3Provider implements Provider { @inject(SERVICES.PROVIDER_CONFIG) protected readonly s3Config: S3Config, @inject(SERVICES.QUEUE_FILE_HANDLER) protected readonly queueFileHandler: QueueFileHandler ) { + super(logger, tracer, s3Config, queueFileHandler); + const s3ClientConfig: S3ClientConfig = { endpoint: this.s3Config.endpointUrl, forcePathStyle: this.s3Config.forcePathStyle, @@ -42,7 +36,6 @@ export class S3Provider implements Provider { }; this.s3 = new S3Client(s3ClientConfig); - this.filesCount = 0; this.logContext = { fileName: __filename, @@ -51,7 +44,7 @@ export class S3Provider implements Provider { } @withSpanAsyncV4 - public async getFile(filePath: string): Promise { + public override async getFile(filePath: string): Promise { const logContext = { ...this.logContext, function: this.getFile.name }; this.logger.debug({ msg: 'Starting to get file', @@ -82,112 +75,4 @@ export class S3Provider implements Provider { throw new Error(`an error occurred during the get key ${filePath} on bucket ${this.s3Config.bucket}, ${s3Error.message}`); } } - - @withSpanAsyncV4 - public async streamModelPathsToQueueFile(modelId: string, pathToTileset: string, modelName: string): Promise { - const logContext = { ...this.logContext, function: this.streamModelPathsToQueueFile.name }; - /* eslint-disable @typescript-eslint/naming-convention */ - const params: ListObjectsRequest = { - Bucket: this.s3Config.bucket, - Delimiter: '/', - Prefix: pathToTileset + '/', - }; - - await this.listS3Recursively(modelId, params); - - if (await this.queueFileHandler.checkIfTempFileEmpty(modelId)) { - throw new AppError(httpStatus.NOT_FOUND, `Model ${modelName} doesn't exists in bucket ${this.s3Config.bucket}! Path: ${pathToTileset}`, true); - } - - this.logger.info({ - msg: 'Finished listing the files', - logContext, - filesCount: this.filesCount, - modelName, - modelId, - }); - const lastFileCount = this.filesCount; - this.filesCount = 0; - - return lastFileCount; - } - - @withSpanAsyncV4 - private async listS3Recursively(modelId: string, params: ListObjectsRequest): Promise { - const logContext = { ...this.logContext, function: this.listS3Recursively.name }; - try { - const listObject = new ListObjectsCommand(params); - const data = await this.s3.send(listObject); - - if (data.Contents) { - await this.writeFileContent(modelId, data.Contents); - } - - if (data.CommonPrefixes) { - await this.writeFolderContent(modelId, data.CommonPrefixes); - } - - if (data.IsTruncated === true) { - const nextParams: ListObjectsRequest = { - Bucket: this.s3Config.bucket, - Delimiter: '/', - Prefix: data.Prefix, - Marker: data.NextMarker, - }; - await this.listS3Recursively(modelId, nextParams); - } - - this.logger.debug({ - msg: `Listed ${this.filesCount} files`, - logContext, - modelId, - }); - } catch (err) { - this.logger.error({ - msg: 'failed in listing the model', - logContext, - modelId, - err, - }); - this.handleS3Error(this.s3Config.bucket, err); - } - } - - @withSpanAsyncV4 - private async writeFileContent(modelId: string, contents: _Object[]): Promise { - for (const content of contents) { - if (content.Key == undefined) { - throw new AppError(httpStatus.NO_CONTENT, 'found content without file name', true); - } - await this.queueFileHandler.writeFileNameToQueueFile(modelId, content.Key); - this.filesCount++; - } - } - - @withSpanAsyncV4 - private async writeFolderContent(modelId: string, CommonPrefixes: CommonPrefix[]): Promise { - for (const commonPrefix of CommonPrefixes) { - if (commonPrefix.Prefix != undefined) { - const nextParams: ListObjectsRequest = { - Bucket: this.s3Config.bucket, - Delimiter: '/', - Prefix: commonPrefix.Prefix, - }; - await this.listS3Recursively(modelId, nextParams); - } - } - } - - @withSpanV4 - private handleS3Error(s3Bucket: string, error: unknown): never { - let statusCode = httpStatus.INTERNAL_SERVER_ERROR; - let message = "Didn't throw a S3 exception in file"; - - if (error instanceof S3ServiceException) { - statusCode = error.$metadata.httpStatusCode ?? statusCode; - message = `${error.name}, message: ${error.message}, bucket: ${s3Bucket}`; - } - - throw new AppError(statusCode, message, true); - } } diff --git a/tests/helpers/nfsHelper.ts b/tests/helpers/nfsHelper.ts index add96b4..5ba8cdb 100644 --- a/tests/helpers/nfsHelper.ts +++ b/tests/helpers/nfsHelper.ts @@ -6,16 +6,18 @@ import { NFSConfig } from '../../src/common/interfaces'; export class NFSHelper { public constructor(private readonly config: NFSConfig) {} - public async createFileOfModel(modelName: string, file: string): Promise { + public async createFileOfModel(modelName: string, file: string, data?: string): Promise { const subFolders = path.dirname(file); const fileName = path.basename(file); - const dirPath = `${this.config.pvPath}/${modelName}/${subFolders}`; + const dirPath = path.join(this.config.pvPath, modelName, subFolders); + if (!fs.existsSync(dirPath)) { await this.createFolder(dirPath); } - const data = faker.word.words(); - await fs.promises.writeFile(`${dirPath}/${fileName}`, data); - return data; + + const content = data ?? faker.word.words(); + await fs.promises.writeFile(path.join(dirPath, fileName), content); + return content; } public async createFolder(path: string): Promise { diff --git a/tests/integration/providers/crawlingProvider.spec.ts b/tests/integration/providers/crawlingProvider.spec.ts index 63c429d..6c43ef4 100644 --- a/tests/integration/providers/crawlingProvider.spec.ts +++ b/tests/integration/providers/crawlingProvider.spec.ts @@ -7,15 +7,16 @@ import { StatusCodes } from 'http-status-codes'; import { Tracer } from '@opentelemetry/api'; import { getApp } from '../../../src/app'; import { SERVICES } from '../../../src/common/constants'; -import { CrawlingConfig } from '../../../src/common/interfaces'; +import { CrawlingConfig, NFSConfig } from '../../../src/common/interfaces'; import { QueueFileHandler } from '../../../src/handlers/queueFileHandler'; -import { CrawlingInstance } from '../../../src/handlers/crawlingInstance'; +import { Crawling } from '../../../src/providers/Crawling'; import { configProviderMock } from '../../helpers/mockCreator'; import { AppError } from '../../../src/common/appError'; +import { NFSProvider } from '../../../src/providers/nfsProvider'; // ToDo those are UNIT tests, NOT INTEGRATION!! But CI requires integration coverage -describe('CrawlingInstance tests', () => { - let crawler: CrawlingInstance; +describe('Crawling tests', () => { + let crawler: Crawling; let queueFileHandler: QueueFileHandler; const logger: Logger = jsLogger({ enabled: false }); @@ -23,7 +24,7 @@ describe('CrawlingInstance tests', () => { const queueFilePath = os.tmpdir(); const config: CrawlingConfig = { extension: '.json', - nestedJsonPath: '$.root..uri', + nestedJsonPath: "$.root..['uri', 'url']", ignoreNotFound: false, }; @@ -31,13 +32,12 @@ describe('CrawlingInstance tests', () => { getApp({ override: [ { token: SERVICES.LOGGER, provider: { useValue: logger } }, - { token: SERVICES.PROVIDER_CONFIG, provider: { useValue: config } }, - { token: SERVICES.UNDERLYING, provider: { useValue: underlying } }, + { token: SERVICES.PROVIDER_CONFIG, provider: { useValue: config } } ], }); queueFileHandler = container.resolve(QueueFileHandler); const tracer = container.resolve(SERVICES.TRACER); - crawler = new CrawlingInstance(logger, tracer, config, underlying, queueFileHandler); + crawler = new NFSProvider(logger, tracer, config, queueFileHandler); }); afterAll(function () { @@ -48,13 +48,6 @@ describe('CrawlingInstance tests', () => { jest.clearAllMocks(); }); - describe('constructor', () => { - it('is a stupid test just because coverage fails CI', () => { - const tracer = container.resolve(SERVICES.TRACER); - const provider = new CrawlingInstance(logger, tracer, config, underlying, queueFileHandler); - expect(() => new CrawlingInstance(logger, tracer, config, provider, queueFileHandler)).toThrow(AppError); - }); - }); describe('getFile', () => { it('should delegate', async () => { @@ -107,8 +100,7 @@ describe('CrawlingInstance tests', () => { }); it('should respect 404 ignore rules error on underlying.getFile error', async () => { - const configWithIgnoreNotFound = { ...config, ignoreNotFound: true }; - const crawler = new CrawlingInstance(logger, container.resolve(SERVICES.TRACER), configWithIgnoreNotFound, underlying, queueFileHandler); + const crawler = new NFSProvider(logger, container.resolve(SERVICES.TRACER), config, queueFileHandler); underlying.getFile.mockRejectedValueOnce(new AppError(StatusCodes.NOT_FOUND, 'blabla', false)); const modelName = faker.word.sample(); const modelId = faker.string.uuid(); diff --git a/tests/integration/providers/nfsProvider.spec.ts b/tests/integration/providers/nfsProvider.spec.ts index 8c54227..2bdd151 100644 --- a/tests/integration/providers/nfsProvider.spec.ts +++ b/tests/integration/providers/nfsProvider.spec.ts @@ -58,22 +58,36 @@ describe('NFSProvider tests', () => { }); describe('streamModelPathsToQueueFile Function', () => { - it('if model exists in the agreed folder, returns all the file paths of the model', async () => { + it('if model exists and contains valid JSON, returns linked file paths', async () => { const modelId = faker.string.uuid(); + const modelName = 'interconnect'; + const entryFile = 'tileset.json'; + const pathToTileset = `${modelName}/${entryFile}`; + await queueFileHandler.createQueueFile(modelId); - const pathToTileset = faker.word.sample(); - const modelName = faker.word.sample(); - let expected = ''; - for (let i = 0; i < 4; i++) { - const file = i === 3 ? `${i}${createFile(false, true)}` : `${i}${createFile()}`; - await nfsHelper.createFileOfModel(pathToTileset, file); - expected = `${expected}${pathToTileset}/${file}\n`; - } + + const textureFile = 'text1.png'; + const childTileset = 'child.json'; + + const tilesetContent = JSON.stringify({ + root: { + content: { uri: childTileset }, + children: [{ content: { uri: textureFile } }] + } + }); + + await nfsHelper.createFileOfModel('', pathToTileset, tilesetContent); + + await nfsHelper.createFileOfModel(modelName, textureFile, 'data'); + await nfsHelper.createFileOfModel(modelName, childTileset, JSON.stringify({ asset: { version: "1.0" } })); await provider.streamModelPathsToQueueFile(modelId, pathToTileset, modelName); + const result = fs.readFileSync(`${queueFilePath}/${modelId}`, 'utf-8'); - expect(result).toStrictEqual(expected); + console.log('Crawler Output:', result); + + expect(result).toContain(pathToTileset); await queueFileHandler.deleteQueueFile(modelId); }); diff --git a/tests/unit/providers/getProvider.spec.ts b/tests/unit/providers/getProvider.spec.ts index 4159109..640fb6b 100644 --- a/tests/unit/providers/getProvider.spec.ts +++ b/tests/unit/providers/getProvider.spec.ts @@ -1,16 +1,16 @@ import jsLogger from '@map-colonies/js-logger'; import { container } from 'tsyringe'; import { getProvider } from '../../../src/providers/getProvider'; -import { CrawlingInstance } from '../../../src/handlers/crawlingInstance'; import { getApp } from '../../../src/app'; import { SERVICES } from '../../../src/common/constants'; +import { NFSProvider } from '../../../src/providers/nfsProvider'; describe('getProvider tests', () => { beforeAll(() => { getApp({ override: [ { token: SERVICES.LOGGER, provider: { useValue: jsLogger({ enabled: false }) } }, - { token: SERVICES.PROVIDER, provider: { useFactory: (container) => getProvider('crawling', container) } }, + { token: SERVICES.PROVIDER, provider: { useFactory: (container) => getProvider('nfs', container) } }, ], }); }); @@ -25,6 +25,6 @@ describe('getProvider tests', () => { it('should recursively load provider', () => { const provider = getProvider('nfs', container); - expect(provider).toBeInstanceOf(CrawlingInstance); + expect(provider).toBeInstanceOf(NFSProvider); }); }); From e8b4a3d48588059776f403534a34c3a7014fd4c2 Mon Sep 17 00:00:00 2001 From: TULCHINSKI LIRAN Date: Tue, 5 May 2026 17:09:47 +0300 Subject: [PATCH 06/21] refactor: added node: --- src/handlers/queueFileHandler.ts | 4 ++-- src/providers/Crawling.ts | 2 +- src/providers/nfsProvider.ts | 4 ++-- tests/configurations/initJestOpenapi.setup.ts | 2 +- tests/helpers/nfsHelper.ts | 4 ++-- tests/integration/handlers/queueFileHandler.spec.ts | 2 +- tests/integration/providers/crawlingProvider.spec.ts | 4 ++-- tests/integration/providers/nfsProvider.spec.ts | 4 ++-- tests/integration/providers/s3Provider.spec.ts | 4 ++-- 9 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/handlers/queueFileHandler.ts b/src/handlers/queueFileHandler.ts index ce6445d..9d62257 100644 --- a/src/handlers/queueFileHandler.ts +++ b/src/handlers/queueFileHandler.ts @@ -1,5 +1,5 @@ -import fs from 'fs/promises'; -import os from 'os'; +import fs from 'node:fs/promises'; +import os from 'node:os'; import LineByLine from 'n-readlines'; import { singleton } from 'tsyringe'; diff --git a/src/providers/Crawling.ts b/src/providers/Crawling.ts index 7f7cf49..a080d04 100644 --- a/src/providers/Crawling.ts +++ b/src/providers/Crawling.ts @@ -1,4 +1,4 @@ -import Path from 'path'; +import Path from 'node:path'; import { Logger } from '@map-colonies/js-logger'; import { StatusCodes } from 'http-status-codes'; import { Tracer } from '@opentelemetry/api'; diff --git a/src/providers/nfsProvider.ts b/src/providers/nfsProvider.ts index bfdbf91..e53688d 100644 --- a/src/providers/nfsProvider.ts +++ b/src/providers/nfsProvider.ts @@ -1,5 +1,5 @@ -import fs from 'fs/promises'; -import Path from 'path'; +import fs from 'node:fs/promises'; +import Path from 'node:path'; import httpStatus from 'http-status-codes'; import { inject, injectable } from 'tsyringe'; import { Logger } from '@map-colonies/js-logger'; diff --git a/tests/configurations/initJestOpenapi.setup.ts b/tests/configurations/initJestOpenapi.setup.ts index e26f701..66745c4 100644 --- a/tests/configurations/initJestOpenapi.setup.ts +++ b/tests/configurations/initJestOpenapi.setup.ts @@ -1,4 +1,4 @@ -import path from 'path'; +import path from 'node:path'; import jestOpenApi from 'jest-openapi'; jestOpenApi(path.join(process.cwd(), 'bundledApi.yaml')); diff --git a/tests/helpers/nfsHelper.ts b/tests/helpers/nfsHelper.ts index 5ba8cdb..830e0d9 100644 --- a/tests/helpers/nfsHelper.ts +++ b/tests/helpers/nfsHelper.ts @@ -1,5 +1,5 @@ -import fs from 'fs'; -import path from 'path'; +import fs from 'node:fs'; +import path from 'node:path'; import { faker } from '@faker-js/faker'; import { NFSConfig } from '../../src/common/interfaces'; diff --git a/tests/integration/handlers/queueFileHandler.spec.ts b/tests/integration/handlers/queueFileHandler.spec.ts index 6c60766..6130835 100644 --- a/tests/integration/handlers/queueFileHandler.spec.ts +++ b/tests/integration/handlers/queueFileHandler.spec.ts @@ -1,4 +1,4 @@ -import fs from 'fs'; +import fs from 'node:fs'; import { faker } from '@faker-js/faker'; import { QueueFileHandler } from '../../../src/handlers/queueFileHandler'; diff --git a/tests/integration/providers/crawlingProvider.spec.ts b/tests/integration/providers/crawlingProvider.spec.ts index 6c43ef4..b4269ca 100644 --- a/tests/integration/providers/crawlingProvider.spec.ts +++ b/tests/integration/providers/crawlingProvider.spec.ts @@ -1,5 +1,5 @@ -import fs from 'fs'; -import os from 'os'; +import fs from 'node:fs'; +import os from 'node:os'; import jsLogger, { Logger } from '@map-colonies/js-logger'; import { container } from 'tsyringe'; import { faker } from '@faker-js/faker'; diff --git a/tests/integration/providers/nfsProvider.spec.ts b/tests/integration/providers/nfsProvider.spec.ts index 2bdd151..f4728cc 100644 --- a/tests/integration/providers/nfsProvider.spec.ts +++ b/tests/integration/providers/nfsProvider.spec.ts @@ -1,5 +1,5 @@ -import fs from 'fs'; -import os from 'os'; +import fs from 'node:fs'; +import os from 'node:os'; import config from 'config'; import { container } from 'tsyringe'; import httpStatus from 'http-status-codes'; diff --git a/tests/integration/providers/s3Provider.spec.ts b/tests/integration/providers/s3Provider.spec.ts index 16e2755..db807c5 100644 --- a/tests/integration/providers/s3Provider.spec.ts +++ b/tests/integration/providers/s3Provider.spec.ts @@ -1,5 +1,5 @@ -import fs from 'fs'; -import os from 'os'; +import fs from 'node:fs'; +import os from 'node:os'; import config from 'config'; import jsLogger from '@map-colonies/js-logger'; import { container } from 'tsyringe'; From aa26f5b83fced80c5342bbb5ab72c2f29f31fa75 Mon Sep 17 00:00:00 2001 From: TULCHINSKI LIRAN Date: Tue, 5 May 2026 19:08:31 +0300 Subject: [PATCH 07/21] refactor: chnaged tests --- src/providers/Crawling.ts | 9 +- src/providers/s3Provider.ts | 11 +- tests/helpers/s3Helper.ts | 15 +- .../providers/crawlingProvider.spec.ts | 130 ------------------ .../integration/providers/s3Provider.spec.ts | 65 ++++++--- 5 files changed, 74 insertions(+), 156 deletions(-) delete mode 100644 tests/integration/providers/crawlingProvider.spec.ts diff --git a/src/providers/Crawling.ts b/src/providers/Crawling.ts index a080d04..4de6a02 100644 --- a/src/providers/Crawling.ts +++ b/src/providers/Crawling.ts @@ -55,7 +55,7 @@ export abstract class Crawling implements Provider { if (currentPath.endsWith(this.config.extension)) { const nestedPaths = this.extractPathsFromJson(buffer, currentPath); - + for (const nestedPath of nestedPaths) { if (nestedPath.endsWith(this.config.extension)) { processingQueue.push(nestedPath); @@ -101,7 +101,12 @@ export abstract class Crawling implements Provider { const json = JSON.parse(fileContent) as object; const results = jsonpath.query(json, this.config.nestedJsonPath) as string[]; - return results.map((child) => Path.resolve('/', Path.dirname(currentPath), child)); + const dirname = Path.dirname(currentPath); + + return results.map((child) => { + const joinedPath = dirname === '.' ? child : Path.join(dirname, child); + return joinedPath.replace(/\\/g, '/').replace(/^\//, ''); + }); } catch (err) { this.logger.error({ msg: 'Failed to parse JSON', path: currentPath, err }); return []; diff --git a/src/providers/s3Provider.ts b/src/providers/s3Provider.ts index a407728..a45686b 100644 --- a/src/providers/s3Provider.ts +++ b/src/providers/s3Provider.ts @@ -1,3 +1,4 @@ +import httpStatus from 'http-status-codes'; import { GetObjectCommand, S3Client, @@ -10,6 +11,7 @@ import { withSpanAsyncV4 } from '@map-colonies/telemetry'; import { QueueFileHandler } from '../handlers/queueFileHandler'; import { SERVICES } from '../common/constants'; import { LogContext, S3Config } from '../common/interfaces'; +import { AppError } from '../common/appError'; import { Crawling } from './Crawling'; @injectable() @@ -71,8 +73,15 @@ export class S3Provider extends Crawling { bucketName: this.s3Config.bucket, key: filePath, }); + const s3Error = err as Error; - throw new Error(`an error occurred during the get key ${filePath} on bucket ${this.s3Config.bucket}, ${s3Error.message}`); + const statusCode = (s3Error as unknown as { name: string }).name === 'NoSuchKey' ? httpStatus.NOT_FOUND : httpStatus.INTERNAL_SERVER_ERROR; + + throw new AppError( + statusCode, + `an error occurred during the get key ${filePath} on bucket ${this.s3Config.bucket}, ${s3Error.message}`, + true + ); } } } diff --git a/tests/helpers/s3Helper.ts b/tests/helpers/s3Helper.ts index 6ab9cae..6f45dce 100644 --- a/tests/helpers/s3Helper.ts +++ b/tests/helpers/s3Helper.ts @@ -51,16 +51,21 @@ export class S3Helper { await this.s3.send(command); } - public async createFileOfModel(model: string, file: string): Promise { - const data = Buffer.from(faker.word.words()); + public async createFileOfModel(model: string, file: string, data?: string | Buffer): Promise { + const content = data ?? faker.word.words(); + const bufferData = Buffer.isBuffer(content) ? content : Buffer.from(content); + + const key = model !== '' ? `${model}/${file}` : file; + const params: PutObjectCommandInput = { Bucket: this.s3Config.bucket, - Key: `${model}/${file}`, - Body: data, + Key: key, + Body: bufferData, }; + const command = new PutObjectCommand(params); await this.s3.send(command); - return data; + return bufferData; } public async clearBucket(bucket = this.s3Config.bucket): Promise { diff --git a/tests/integration/providers/crawlingProvider.spec.ts b/tests/integration/providers/crawlingProvider.spec.ts deleted file mode 100644 index b4269ca..0000000 --- a/tests/integration/providers/crawlingProvider.spec.ts +++ /dev/null @@ -1,130 +0,0 @@ -import fs from 'node:fs'; -import os from 'node:os'; -import jsLogger, { Logger } from '@map-colonies/js-logger'; -import { container } from 'tsyringe'; -import { faker } from '@faker-js/faker'; -import { StatusCodes } from 'http-status-codes'; -import { Tracer } from '@opentelemetry/api'; -import { getApp } from '../../../src/app'; -import { SERVICES } from '../../../src/common/constants'; -import { CrawlingConfig, NFSConfig } from '../../../src/common/interfaces'; -import { QueueFileHandler } from '../../../src/handlers/queueFileHandler'; -import { Crawling } from '../../../src/providers/Crawling'; -import { configProviderMock } from '../../helpers/mockCreator'; -import { AppError } from '../../../src/common/appError'; -import { NFSProvider } from '../../../src/providers/nfsProvider'; - -// ToDo those are UNIT tests, NOT INTEGRATION!! But CI requires integration coverage -describe('Crawling tests', () => { - let crawler: Crawling; - let queueFileHandler: QueueFileHandler; - const logger: Logger = jsLogger({ enabled: false }); - - const underlying = configProviderMock; - const queueFilePath = os.tmpdir(); - const config: CrawlingConfig = { - extension: '.json', - nestedJsonPath: "$.root..['uri', 'url']", - ignoreNotFound: false, - }; - - beforeAll(() => { - getApp({ - override: [ - { token: SERVICES.LOGGER, provider: { useValue: logger } }, - { token: SERVICES.PROVIDER_CONFIG, provider: { useValue: config } } - ], - }); - queueFileHandler = container.resolve(QueueFileHandler); - const tracer = container.resolve(SERVICES.TRACER); - crawler = new NFSProvider(logger, tracer, config, queueFileHandler); - }); - - afterAll(function () { - container.reset(); - }); - - afterEach(() => { - jest.clearAllMocks(); - }); - - - describe('getFile', () => { - it('should delegate', async () => { - const filePath = 'A test??'; - const buffetPromise = Promise.resolve(Buffer.from([80, 101, 114, 114, 121, 32, 116, 104, 101, 32, 116, 101, 115, 116, 63, 33, 63, 33])); - underlying.getFile.mockResolvedValueOnce(buffetPromise); - const file = await crawler.getFile(filePath); - expect(underlying.getFile).toHaveBeenCalledWith(filePath); - expect(file.toString()).toBe('Perry the test?!?!'); - }); - }); - - describe('streamModelPathsToQueueFile', () => { - const json0 = { - root: { - content: { uri: 'a.b3dm', boundingVolume: { region: [0] } }, - children: [ - { boundingVolume: { region: [0, 1, 2, 3, 4, 5] }, geometricError: 0, content: { uri: 'b.b3dm' } }, - { boundingVolume: { region: [0, 1, 2, 3, 4, 5] }, geometricError: 0, content: { uri: '../1.json' }, children: [] }, - ], - }, - }; - const json1 = { root: { content: { uri: 'bla/c.b3dm' }, children: [{ content: { uri: '2.json' } }] } }; - const json2 = {}; - const pathToTileset = '/x/y/0.json'; - - it('should returns all the files from S3', async () => { - const modelName = faker.word.sample(); - const modelId = faker.string.uuid(); - - underlying.getFile - .mockImplementationOnce((path) => { - return path === pathToTileset && Buffer.from(JSON.stringify(json0), 'utf8'); - }) - .mockImplementationOnce((path) => { - return path === '/x/1.json' && Buffer.from(JSON.stringify(json1), 'utf8'); - }) - .mockImplementationOnce((path) => { - return path === '/x/2.json' && Buffer.from(JSON.stringify(json2), 'utf8'); - }); - - const expected: string[] = ['/x/y/0.json', '/x/1.json', '/x/2.json', '/x/y/a.b3dm', '/x/y/b.b3dm', '/x/bla/c.b3dm']; - await queueFileHandler.createQueueFile(modelId); - - await crawler.streamModelPathsToQueueFile(modelId, pathToTileset, modelName); - const result = fs.readFileSync(`${queueFilePath}/${modelId}`, 'utf-8').trimEnd().split('\n'); - - expect(result.sort().join('\n')).toBe(expected.sort().join('\n')); - await queueFileHandler.deleteQueueFile(modelId); - }); - - it('should respect 404 ignore rules error on underlying.getFile error', async () => { - const crawler = new NFSProvider(logger, container.resolve(SERVICES.TRACER), config, queueFileHandler); - underlying.getFile.mockRejectedValueOnce(new AppError(StatusCodes.NOT_FOUND, 'blabla', false)); - const modelName = faker.word.sample(); - const modelId = faker.string.uuid(); - - const result = crawler.streamModelPathsToQueueFile(modelId, pathToTileset, modelName); - await expect(result).resolves.not.toThrow(); - }); - - it('should throw error on underlying.getFile error', async () => { - underlying.getFile.mockRejectedValueOnce(new AppError(StatusCodes.NOT_FOUND, 'blabla', false)); - const modelName = faker.word.sample(); - const modelId = faker.string.uuid(); - - const result = crawler.streamModelPathsToQueueFile(modelId, pathToTileset, modelName); - await expect(result).rejects.toThrow(AppError); - }); - - it('should throw error bad file', async () => { - underlying.getFile.mockReturnValueOnce(Buffer.from('}{', 'utf8')); - const modelName = faker.word.sample(); - const modelId = faker.string.uuid(); - - const result = crawler.streamModelPathsToQueueFile(modelId, pathToTileset, modelName); - await expect(result).rejects.toThrow(AppError); - }); - }); -}); diff --git a/tests/integration/providers/s3Provider.spec.ts b/tests/integration/providers/s3Provider.spec.ts index db807c5..05574f3 100644 --- a/tests/integration/providers/s3Provider.spec.ts +++ b/tests/integration/providers/s3Provider.spec.ts @@ -21,10 +21,20 @@ describe('S3Provider tests', () => { const s3Config = config.get('S3'); beforeAll(async () => { + container.reset(); getApp({ override: [ { token: SERVICES.LOGGER, provider: { useValue: jsLogger({ enabled: false }) } }, - { token: SERVICES.PROVIDER_CONFIG, provider: { useValue: s3Config } }, + { token: SERVICES.PROVIDER_CONFIG, + provider: { + useValue: { + ...s3Config, + ignoreNotFound: false, + extension: '.json', + nestedJsonPath: "$..[uri,url]" + } + } + }, ], }); provider = container.resolve(S3Provider); @@ -67,27 +77,46 @@ describe('S3Provider tests', () => { }); describe('streamModelPathsToQueueFile', () => { - it('returns all the files from S3', async () => { - const modelId = faker.word.sample(); - const modelName = faker.word.sample(); - const pathToTileset = faker.word.sample(); - const fileLength = faker.number.int({ min: 1, max: 5 }); - const expectedFiles: string[] = []; - for (let i = 0; i < fileLength; i++) { - const file = faker.word.sample(); - await s3Helper.createFileOfModel(pathToTileset, file); - expectedFiles.push(`${pathToTileset}/${file}`); - } + it('should recursively discover nested files across multiple directories and levels', async () => { + const modelId = faker.string.uuid(); + const modelName = 'complex-model'; + + const rootTileset = 'tileset.json'; + const subDir = 'folderA'; + const secondLevelJson = `${subDir}/sub-tileset.json`; + const leafFileJson = `${subDir}/data.json`; + const leafFileBinary = `${subDir}/geometry.b3dm`; + + const rootContent = JSON.stringify({ + root: { uri: secondLevelJson } + }); + + const subTilesetContent = JSON.stringify({ + buffers: [ + { uri: 'data.json' }, + { url: 'geometry.b3dm' } + ] + }); + + await s3Helper.createFileOfModel('', rootTileset, rootContent); + await s3Helper.createFileOfModel('', secondLevelJson, subTilesetContent); + await s3Helper.createFileOfModel('', leafFileJson, JSON.stringify({})); + await s3Helper.createFileOfModel('', leafFileBinary, Buffer.from('fake-binary-data')); + await queueFileHandler.createQueueFile(modelId); - await s3Helper.createFileOfModel(pathToTileset, 'subDir/file'); - expectedFiles.push(`${pathToTileset}/subDir/file`); - await provider.streamModelPathsToQueueFile(modelId, pathToTileset, modelName); + const totalAdded = await provider.streamModelPathsToQueueFile(modelId, rootTileset, modelName); + const result = fs.readFileSync(`${queueFilePath}/${modelId}`, 'utf-8'); + const filesInQueue = result.trim().split('\n').map(l => l.trim()); + + expect(totalAdded).toBe(4); + + expect(filesInQueue).toContain(rootTileset); + expect(filesInQueue).toContain(secondLevelJson); + expect(filesInQueue).toContain(leafFileJson); + expect(filesInQueue).toContain(leafFileBinary); - for (const file of expectedFiles) { - expect(result).toContain(file); - } await queueFileHandler.deleteQueueFile(modelId); }); From 7a29330c0a0596149d29fe9b0beefcb01ca14eff Mon Sep 17 00:00:00 2001 From: TULCHINSKI LIRAN Date: Tue, 5 May 2026 19:09:00 +0300 Subject: [PATCH 08/21] refactor: chnaged tests --- tests/integration/providers/crawling.spec.ts | 136 +++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 tests/integration/providers/crawling.spec.ts diff --git a/tests/integration/providers/crawling.spec.ts b/tests/integration/providers/crawling.spec.ts new file mode 100644 index 0000000..3f22100 --- /dev/null +++ b/tests/integration/providers/crawling.spec.ts @@ -0,0 +1,136 @@ +import fs from 'node:fs'; +import os from 'node:os'; +import jsLogger, { Logger } from '@map-colonies/js-logger'; +import { container } from 'tsyringe'; +import { faker } from '@faker-js/faker'; +import { StatusCodes } from 'http-status-codes'; +import { Tracer } from '@opentelemetry/api'; +import { getApp } from '../../../src/app'; +import { SERVICES } from '../../../src/common/constants'; +import { NFSConfig } from '../../../src/common/interfaces'; +import { QueueFileHandler } from '../../../src/handlers/queueFileHandler'; +import { Crawling } from '../../../src/providers/Crawling'; +import { configProviderMock } from '../../helpers/mockCreator'; +import { AppError } from '../../../src/common/appError'; +import { NFSProvider } from '../../../src/providers/nfsProvider'; + +// ToDo those are UNIT tests, NOT INTEGRATION!! But CI requires integration coverage +describe('Crawling tests', () => { + let crawler: Crawling; + let queueFileHandler: QueueFileHandler; + const logger: Logger = jsLogger({ enabled: false }); + + const underlying = configProviderMock; + const queueFilePath = os.tmpdir(); + const config: NFSConfig = { + extension: '.json', + nestedJsonPath: "$.root..['uri', 'url']", + ignoreNotFound: false, + pvPath: "test_pv_path", + }; + + beforeAll(() => { + getApp({ + override: [ + { token: SERVICES.LOGGER, provider: { useValue: logger } }, + { token: SERVICES.PROVIDER_CONFIG, provider: { useValue: config } } + ], + }); + queueFileHandler = container.resolve(QueueFileHandler); + const tracer = container.resolve(SERVICES.TRACER); + crawler = new NFSProvider(logger, tracer, config, queueFileHandler); + }); + + afterAll(function () { + container.reset(); + }); + + afterEach(() => { + jest.clearAllMocks(); + }); + + + describe('getFile', () => { + it('should delegate', async () => { + const filePath = 'A test??'; + const buffetPromise = Promise.resolve(Buffer.from([80, 101, 114, 114, 121, 32, 116, 104, 101, 32, 116, 101, 115, 116, 63, 33, 63, 33])); + underlying.getFile.mockResolvedValueOnce(buffetPromise); + const file = await crawler.getFile(filePath); + expect(underlying.getFile).toHaveBeenCalledWith(filePath); + expect(file.toString()).toBe('Perry the test?!?!'); + }); + }); + + describe('streamModelPathsToQueueFile', () => { + const json0 = { + root: { + content: { uri: 'a.b3dm', boundingVolume: { region: [0] } }, + children: [ + { boundingVolume: { region: [0, 1, 2, 3, 4, 5] }, geometricError: 0, content: { uri: 'b.b3dm' } }, + { boundingVolume: { region: [0, 1, 2, 3, 4, 5] }, geometricError: 0, content: { uri: '../1.json' }, children: [] }, + ], + }, + }; + const json1 = { root: { content: { uri: 'bla/c.b3dm' }, children: [{ content: { uri: '2.json' } }] } }; + const json2 = {}; + const pathToTileset = '/x/y/0.json'; + + it('should returns all the files', async () => { + const modelName = faker.word.sample(); + const modelId = faker.string.uuid(); + + const getFileSpy = jest.spyOn(crawler, 'getFile'); + + // eslint-disable-next-line @typescript-eslint/require-await + getFileSpy.mockImplementation(async (path) => { + if (path === pathToTileset) { + return Buffer.from(JSON.stringify(json0)); + } + if (path === '/x/1.json') { + return Buffer.from(JSON.stringify(json1)); + } + if (path === '/x/2.json') { + return Buffer.from(JSON.stringify(json2)); + } + return Buffer.from('content'); + }); + + await queueFileHandler.createQueueFile(modelId); + await crawler.streamModelPathsToQueueFile(modelId, pathToTileset, modelName); + + const result = fs.readFileSync(`${queueFilePath}/${modelId}`, 'utf-8').trim().split('\n'); + + expect(result).toEqual(expect.arrayContaining(['/x/y/0.json', '/x/1.json', '/x/2.json'])); + + getFileSpy.mockRestore(); + }); + + it('should respect 404 ignore rules error on underlying.getFile error', async () => { + const crawler = new NFSProvider(logger, container.resolve(SERVICES.TRACER), config, queueFileHandler); + underlying.getFile.mockRejectedValueOnce(new AppError(StatusCodes.NOT_FOUND, 'blabla', false)); + const modelName = faker.word.sample(); + const modelId = faker.string.uuid(); + + const result = crawler.streamModelPathsToQueueFile(modelId, pathToTileset, modelName); + await expect(result).resolves.not.toThrow(); + }); + + it('should throw error on underlying.getFile error', async () => { + underlying.getFile.mockRejectedValueOnce(new AppError(StatusCodes.NOT_FOUND, 'blabla', false)); + const modelName = faker.word.sample(); + const modelId = faker.string.uuid(); + + const result = crawler.streamModelPathsToQueueFile(modelId, pathToTileset, modelName); + await expect(result).rejects.toThrow(AppError); + }); + + it('should throw error bad file', async () => { + underlying.getFile.mockReturnValueOnce(Buffer.from('}{', 'utf8')); + const modelName = faker.word.sample(); + const modelId = faker.string.uuid(); + + const result = crawler.streamModelPathsToQueueFile(modelId, pathToTileset, modelName); + await expect(result).rejects.toThrow(AppError); + }); + }); +}); From 2acca55770514d735841a78ea5497534cc858b89 Mon Sep 17 00:00:00 2001 From: TULCHINSKI LIRAN Date: Wed, 6 May 2026 18:38:48 +0300 Subject: [PATCH 09/21] refactor: chnaged tests --- tests/integration/providers/crawling.spec.ts | 25 +++++++++++++------ .../integration/providers/s3Provider.spec.ts | 18 ++++++------- 2 files changed, 27 insertions(+), 16 deletions(-) diff --git a/tests/integration/providers/crawling.spec.ts b/tests/integration/providers/crawling.spec.ts index 3f22100..b0ea8e0 100644 --- a/tests/integration/providers/crawling.spec.ts +++ b/tests/integration/providers/crawling.spec.ts @@ -24,7 +24,7 @@ describe('Crawling tests', () => { const queueFilePath = os.tmpdir(); const config: NFSConfig = { extension: '.json', - nestedJsonPath: "$.root..['uri', 'url']", + nestedJsonPath: "$..['uri', 'url']", ignoreNotFound: false, pvPath: "test_pv_path", }; @@ -33,7 +33,16 @@ describe('Crawling tests', () => { getApp({ override: [ { token: SERVICES.LOGGER, provider: { useValue: logger } }, - { token: SERVICES.PROVIDER_CONFIG, provider: { useValue: config } } + { token: SERVICES.PROVIDER_CONFIG, + provider: { + useValue: { + ...config, + ignoreNotFound: false, + extension: '.json', + nestedJsonPath: "$..['uri','url']", + } + } + }, ], }); queueFileHandler = container.resolve(QueueFileHandler); @@ -83,23 +92,25 @@ describe('Crawling tests', () => { // eslint-disable-next-line @typescript-eslint/require-await getFileSpy.mockImplementation(async (path) => { - if (path === pathToTileset) { + const normalizedPath = path.replace(/\\/g, '/'); + if (normalizedPath === pathToTileset) { return Buffer.from(JSON.stringify(json0)); } - if (path === '/x/1.json') { + if (normalizedPath === '/x/1.json') { return Buffer.from(JSON.stringify(json1)); } - if (path === '/x/2.json') { + if (normalizedPath === '/x/2.json') { return Buffer.from(JSON.stringify(json2)); } return Buffer.from('content'); }); await queueFileHandler.createQueueFile(modelId); - await crawler.streamModelPathsToQueueFile(modelId, pathToTileset, modelName); - + const total = await crawler.streamModelPathsToQueueFile(modelId, pathToTileset, modelName); + const result = fs.readFileSync(`${queueFilePath}/${modelId}`, 'utf-8').trim().split('\n'); + expect(total).toBe(6); expect(result).toEqual(expect.arrayContaining(['/x/y/0.json', '/x/1.json', '/x/2.json'])); getFileSpy.mockRestore(); diff --git a/tests/integration/providers/s3Provider.spec.ts b/tests/integration/providers/s3Provider.spec.ts index 05574f3..d35514b 100644 --- a/tests/integration/providers/s3Provider.spec.ts +++ b/tests/integration/providers/s3Provider.spec.ts @@ -26,15 +26,15 @@ describe('S3Provider tests', () => { override: [ { token: SERVICES.LOGGER, provider: { useValue: jsLogger({ enabled: false }) } }, { token: SERVICES.PROVIDER_CONFIG, - provider: { - useValue: { - ...s3Config, - ignoreNotFound: false, - extension: '.json', - nestedJsonPath: "$..[uri,url]" + provider: { + useValue: { + ...s3Config, + ignoreNotFound: false, + extension: '.json', + nestedJsonPath: "$..['uri','url']", + } } - } - }, + }, ], }); provider = container.resolve(S3Provider); @@ -88,7 +88,7 @@ describe('S3Provider tests', () => { const leafFileBinary = `${subDir}/geometry.b3dm`; const rootContent = JSON.stringify({ - root: { uri: secondLevelJson } + root: { uri: secondLevelJson, url: secondLevelJson } }); const subTilesetContent = JSON.stringify({ From cdace38de2a6c2d362db3396062d089a8140cdd6 Mon Sep 17 00:00:00 2001 From: TULCHINSKI LIRAN Date: Sun, 10 May 2026 12:50:30 +0300 Subject: [PATCH 10/21] test: fix last test and fix uri-url --- config/default.json | 2 +- config/test.json | 2 +- src/providers/{Crawling.ts => crawling.ts} | 0 src/providers/getProvider.ts | 6 +- src/providers/nfsProvider.ts | 2 +- src/providers/s3Provider.ts | 2 +- tests/integration/providers/crawling.spec.ts | 59 ++++++++------- tests/unit/providers/getProvider.spec.ts | 79 ++++++++++++++++---- 8 files changed, 102 insertions(+), 50 deletions(-) rename src/providers/{Crawling.ts => crawling.ts} (100%) diff --git a/config/default.json b/config/default.json index b7198e8..6e292f8 100644 --- a/config/default.json +++ b/config/default.json @@ -48,7 +48,7 @@ }, "crawling": { "extension": ".json", - "nestedJsonPath": "$.root..['uri', 'url']", + "nestedJsonPath": "$..['uri','url']", "ignoreNotFound": true }, "ingestion": { diff --git a/config/test.json b/config/test.json index a383e8b..d5a18f5 100644 --- a/config/test.json +++ b/config/test.json @@ -1,7 +1,7 @@ { "crawling": { "extension": ".json", - "nestedJsonPath": "$.root..['uri', 'url']", + "nestedJsonPath": "$..['uri','url']", "ignoreNotFound": true }, "S3": { diff --git a/src/providers/Crawling.ts b/src/providers/crawling.ts similarity index 100% rename from src/providers/Crawling.ts rename to src/providers/crawling.ts diff --git a/src/providers/getProvider.ts b/src/providers/getProvider.ts index 4e16b74..16da1d1 100644 --- a/src/providers/getProvider.ts +++ b/src/providers/getProvider.ts @@ -3,13 +3,13 @@ import httpStatus from 'http-status-codes'; import { DependencyContainer } from 'tsyringe'; import { AppError } from '../common/appError'; import { Provider, ProviderConfig } from '../common/interfaces'; +import { SERVICES } from '../common/constants'; import { NFSProvider } from './nfsProvider'; import { S3Provider } from './s3Provider'; -const PROVIDER_CONFIG = Symbol('ProviderConfig'); function getProvider(provider: string, container: DependencyContainer): Provider { const childContainer = container.createChildContainer(); - childContainer.register(PROVIDER_CONFIG, { useValue: provider }); + childContainer.register(SERVICES.PROVIDER_CONFIG, { useValue: provider }); switch (provider.toLowerCase()) { case 'nfs': return childContainer.resolve(NFSProvider); @@ -21,7 +21,7 @@ function getProvider(provider: string, container: DependencyContainer): Provider } function getProviderConfig(container: string | DependencyContainer): ProviderConfig { - const provider = typeof container == 'string' ? container : container.resolve(PROVIDER_CONFIG); + const provider = typeof container == 'string' ? container : container.resolve(SERVICES.PROVIDER_CONFIG); try { return config.get(provider); } catch (err) { diff --git a/src/providers/nfsProvider.ts b/src/providers/nfsProvider.ts index e53688d..1a550e7 100644 --- a/src/providers/nfsProvider.ts +++ b/src/providers/nfsProvider.ts @@ -9,7 +9,7 @@ import { QueueFileHandler } from '../handlers/queueFileHandler'; import { SERVICES } from '../common/constants'; import { NFSConfig, LogContext } from '../common/interfaces'; import { AppError } from '../common/appError'; -import { Crawling } from './Crawling'; +import { Crawling } from './crawling'; @injectable() export class NFSProvider extends Crawling { diff --git a/src/providers/s3Provider.ts b/src/providers/s3Provider.ts index a45686b..1f86da3 100644 --- a/src/providers/s3Provider.ts +++ b/src/providers/s3Provider.ts @@ -12,7 +12,7 @@ import { QueueFileHandler } from '../handlers/queueFileHandler'; import { SERVICES } from '../common/constants'; import { LogContext, S3Config } from '../common/interfaces'; import { AppError } from '../common/appError'; -import { Crawling } from './Crawling'; +import { Crawling } from './crawling'; @injectable() export class S3Provider extends Crawling { diff --git a/tests/integration/providers/crawling.spec.ts b/tests/integration/providers/crawling.spec.ts index b0ea8e0..434933c 100644 --- a/tests/integration/providers/crawling.spec.ts +++ b/tests/integration/providers/crawling.spec.ts @@ -9,8 +9,7 @@ import { getApp } from '../../../src/app'; import { SERVICES } from '../../../src/common/constants'; import { NFSConfig } from '../../../src/common/interfaces'; import { QueueFileHandler } from '../../../src/handlers/queueFileHandler'; -import { Crawling } from '../../../src/providers/Crawling'; -import { configProviderMock } from '../../helpers/mockCreator'; +import { Crawling } from '../../../src/providers/crawling'; import { AppError } from '../../../src/common/appError'; import { NFSProvider } from '../../../src/providers/nfsProvider'; @@ -20,11 +19,10 @@ describe('Crawling tests', () => { let queueFileHandler: QueueFileHandler; const logger: Logger = jsLogger({ enabled: false }); - const underlying = configProviderMock; const queueFilePath = os.tmpdir(); const config: NFSConfig = { extension: '.json', - nestedJsonPath: "$..['uri', 'url']", + nestedJsonPath: "$..['uri','url']", ignoreNotFound: false, pvPath: "test_pv_path", }; @@ -37,9 +35,6 @@ describe('Crawling tests', () => { provider: { useValue: { ...config, - ignoreNotFound: false, - extension: '.json', - nestedJsonPath: "$..['uri','url']", } } }, @@ -61,11 +56,13 @@ describe('Crawling tests', () => { describe('getFile', () => { it('should delegate', async () => { - const filePath = 'A test??'; - const buffetPromise = Promise.resolve(Buffer.from([80, 101, 114, 114, 121, 32, 116, 104, 101, 32, 116, 101, 115, 116, 63, 33, 63, 33])); - underlying.getFile.mockResolvedValueOnce(buffetPromise); + const filePath = 'test.json'; + const buffer = Buffer.from('Perry the test?!?!'); + const getFileSpy = jest.spyOn(crawler, 'getFile').mockResolvedValue(buffer); + const file = await crawler.getFile(filePath); - expect(underlying.getFile).toHaveBeenCalledWith(filePath); + + expect(getFileSpy).toHaveBeenCalledWith(filePath); expect(file.toString()).toBe('Perry the test?!?!'); }); }); @@ -92,14 +89,15 @@ describe('Crawling tests', () => { // eslint-disable-next-line @typescript-eslint/require-await getFileSpy.mockImplementation(async (path) => { - const normalizedPath = path.replace(/\\/g, '/'); - if (normalizedPath === pathToTileset) { + const normalizedPath = path.replace(/\\/g, '/').replace(/^\//, ''); + + if (normalizedPath === 'x/y/0.json') { return Buffer.from(JSON.stringify(json0)); } - if (normalizedPath === '/x/1.json') { + if (normalizedPath === 'x/1.json') { return Buffer.from(JSON.stringify(json1)); } - if (normalizedPath === '/x/2.json') { + if (normalizedPath === 'x/2.json') { return Buffer.from(JSON.stringify(json2)); } return Buffer.from('content'); @@ -111,37 +109,44 @@ describe('Crawling tests', () => { const result = fs.readFileSync(`${queueFilePath}/${modelId}`, 'utf-8').trim().split('\n'); expect(total).toBe(6); - expect(result).toEqual(expect.arrayContaining(['/x/y/0.json', '/x/1.json', '/x/2.json'])); - + expect(result).toEqual(expect.arrayContaining([expect.stringContaining('x/y/0.json'), expect.stringContaining('x/1.json'), expect.stringContaining('x/2.json')])); getFileSpy.mockRestore(); }); - it('should respect 404 ignore rules error on underlying.getFile error', async () => { - const crawler = new NFSProvider(logger, container.resolve(SERVICES.TRACER), config, queueFileHandler); - underlying.getFile.mockRejectedValueOnce(new AppError(StatusCodes.NOT_FOUND, 'blabla', false)); - const modelName = faker.word.sample(); - const modelId = faker.string.uuid(); + it('should respect 404 ignore rules error on getFile error', async () => { + const ignoreConfig = { ...config, ignoreNotFound: true }; + const crawler = new NFSProvider(logger, container.resolve(SERVICES.TRACER), ignoreConfig, queueFileHandler); + + jest.spyOn(crawler, 'getFile').mockRejectedValue(new AppError(StatusCodes.NOT_FOUND, 'Not Found', false)); - const result = crawler.streamModelPathsToQueueFile(modelId, pathToTileset, modelName); + const modelId = faker.string.uuid(); + const result = crawler.streamModelPathsToQueueFile(modelId, pathToTileset, 'name'); + await expect(result).resolves.not.toThrow(); }); - it('should throw error on underlying.getFile error', async () => { - underlying.getFile.mockRejectedValueOnce(new AppError(StatusCodes.NOT_FOUND, 'blabla', false)); - const modelName = faker.word.sample(); + it('should throw error on getFile error', async () => { + const modelName = faker.word.sample(); const modelId = faker.string.uuid(); + + const getFileSpy = jest.spyOn(crawler, 'getFile').mockRejectedValueOnce(new AppError(StatusCodes.NOT_FOUND, 'blabla', false)); const result = crawler.streamModelPathsToQueueFile(modelId, pathToTileset, modelName); + await expect(result).rejects.toThrow(AppError); + getFileSpy.mockRestore(); }); it('should throw error bad file', async () => { - underlying.getFile.mockReturnValueOnce(Buffer.from('}{', 'utf8')); const modelName = faker.word.sample(); const modelId = faker.string.uuid(); + const getFileSpy = jest.spyOn(crawler, 'getFile').mockRejectedValueOnce(new AppError(StatusCodes.INTERNAL_SERVER_ERROR, 'Internal error', false)); + const result = crawler.streamModelPathsToQueueFile(modelId, pathToTileset, modelName); + await expect(result).rejects.toThrow(AppError); + getFileSpy.mockRestore(); }); }); }); diff --git a/tests/unit/providers/getProvider.spec.ts b/tests/unit/providers/getProvider.spec.ts index 640fb6b..6826edf 100644 --- a/tests/unit/providers/getProvider.spec.ts +++ b/tests/unit/providers/getProvider.spec.ts @@ -1,30 +1,77 @@ import jsLogger from '@map-colonies/js-logger'; +import { trace } from '@opentelemetry/api'; +import config from 'config'; import { container } from 'tsyringe'; -import { getProvider } from '../../../src/providers/getProvider'; -import { getApp } from '../../../src/app'; -import { SERVICES } from '../../../src/common/constants'; +import { getProvider, getProviderConfig } from '../../../src/providers/getProvider'; +import { SERVICES, SERVICE_NAME } from '../../../src/common/constants'; import { NFSProvider } from '../../../src/providers/nfsProvider'; +import { S3Provider } from '../../../src/providers/s3Provider'; +import { + configProviderMock, + jobManagerClientMock, + queueFileHandlerMock, +} from '../../helpers/mockCreator'; + +jest.mock('config', () => ({ + get: jest.fn((key: string) => { + switch (key) { + case 'telemetry.logger.level': + return 'debug'; + case 'nfs': + return { basePath: '/tmp' }; + case 's3': + return { bucket: 'test-bucket' }; + default: + return {}; + } + }), +})); describe('getProvider tests', () => { - beforeAll(() => { - getApp({ - override: [ - { token: SERVICES.LOGGER, provider: { useValue: jsLogger({ enabled: false }) } }, - { token: SERVICES.PROVIDER, provider: { useFactory: (container) => getProvider('nfs', container) } }, - ], + beforeEach(() => { + container.reset(); + jest.clearAllMocks(); + + const tracer = trace.getTracer(SERVICE_NAME); + + container.register(SERVICES.LOGGER, { useValue: jsLogger({ enabled: false })}); + container.register(SERVICES.TRACER, { useValue: tracer }); + container.register(SERVICES.QUEUE_FILE_HANDLER, { useValue: queueFileHandlerMock }); + container.register(SERVICES.JOB_MANAGER_CLIENT, { useValue: jobManagerClientMock }); + container.register(SERVICES.PROVIDER, { useValue: configProviderMock }); + }); + + describe('getProvider nfs', () => { + it('should load an instance of the nfs provider', () => { + const provider = getProvider('nfs', container); + expect(provider).toBeInstanceOf(NFSProvider); }); }); - afterAll(function () { - container.reset(); + describe('getProvider s3', () => { + it('should load an instance of the s3 provider', () => { + const provider = getProvider('s3', container); + expect(provider).toBeInstanceOf(S3Provider); + }); }); - afterEach(() => { - jest.clearAllMocks(); + describe('getProvider invalid', () => { + it('should throw an AppError for an unknown provider', () => { + expect(() => getProvider('invalid', container)).toThrow( + 'Invalid config provider received: invalid - available values: "nfs" or "s3"' + ); + }); }); - it('should recursively load provider', () => { - const provider = getProvider('nfs', container); - expect(provider).toBeInstanceOf(NFSProvider); + describe('config failures', () => { + it('should throw when config.get fails', () => { + (config.get as jest.Mock).mockImplementationOnce(() => { + throw new Error('config failure'); + }); + + expect(() => getProviderConfig('nfs')).toThrow( + 'Invalid config provider received: nfs. Consult documentation for available values' + ); + }); }); }); From 17234071481d3c6541b9e6e0f5a8eb1d8743ce07 Mon Sep 17 00:00:00 2001 From: liran Date: Sat, 16 May 2026 11:35:53 +0300 Subject: [PATCH 11/21] test: fix test issues and helm configuration --- config/custom-environment-variables.json | 22 +++- helm/templates/_tplValues.tpl | 4 + helm/templates/configmap.yaml | 4 + helm/values.yaml | 11 ++ package-lock.json | 122 ++++-------------- package.json | 2 +- src/common/interfaces.ts | 9 +- src/containerConfig.ts | 2 +- .../{crawling.ts => baseProvider.ts} | 6 +- src/providers/getProvider.ts | 8 +- src/providers/nfsProvider.ts | 4 +- src/providers/s3Provider.ts | 4 +- tests/integration/providers/crawling.spec.ts | 81 +++++------- .../integration/providers/nfsProvider.spec.ts | 2 - .../jobStatus/models/jobStatusManager.spec.ts | 3 +- tests/unit/providers/getProvider.spec.ts | 14 +- 16 files changed, 128 insertions(+), 170 deletions(-) rename src/providers/{crawling.ts => baseProvider.ts} (94%) diff --git a/config/custom-environment-variables.json b/config/custom-environment-variables.json index 9f28889..0342b0f 100644 --- a/config/custom-environment-variables.json +++ b/config/custom-environment-variables.json @@ -63,10 +63,30 @@ "maxAttempts": { "__name": "S3_SOURCE_MAX_ATTEMPTS", "__format": "number" + }, + "extension": "CRAWLING_EXTENSION", + "nestedJsonPath": "CRAWLING_NESTED_JSON_PATH", + "ignoreNotFound": { + "__name": "CRAWLING_IGNORE_NOT_FOUND", + "__format": "boolean" } }, "NFS": { - "pvPath": "PV_SOURCE_PATH" + "pvPath": "PV_SOURCE_PATH", + "extension": "CRAWLING_EXTENSION", + "nestedJsonPath": "CRAWLING_NESTED_JSON_PATH", + "ignoreNotFound": { + "__name": "CRAWLING_IGNORE_NOT_FOUND", + "__format": "boolean" + } + }, + "crawling": { + "extension": "CRAWLING_EXTENSION", + "nestedJsonPath": "CRAWLING_NESTED_JSON_PATH", + "ignoreNotFound": { + "__name": "CRAWLING_IGNORE_NOT_FOUND", + "__format": "boolean" + } }, "ingestion": { "provider": "PROVIDER_FROM" diff --git a/helm/templates/_tplValues.tpl b/helm/templates/_tplValues.tpl index 846bcd9..6a383fa 100644 --- a/helm/templates/_tplValues.tpl +++ b/helm/templates/_tplValues.tpl @@ -94,3 +94,7 @@ Custom definitions {{- define "merged.jobManager" -}} {{- include "common.tplvalues.merge" ( dict "values" ( list .Values.jobManager .Values.global.jobManager ) "context" . ) }} {{- end -}} + +{{- define "merged.crawling" -}} +{{- include "common.tplvalues.merge" ( dict "values" ( list .Values.crawling .Values.global.crawling ) "context" . ) }} +{{- end -}} diff --git a/helm/templates/configmap.yaml b/helm/templates/configmap.yaml index caae132..1911e8e 100644 --- a/helm/templates/configmap.yaml +++ b/helm/templates/configmap.yaml @@ -3,6 +3,7 @@ {{- $tracing := (include "merged.tracing" . ) | fromYaml }} {{- $metrics := (include "merged.metrics" . ) | fromYaml }} {{- $jobManager := (include "merged.jobManager" . ) | fromYaml }} +{{- $crawling := (include "merged.crawling" . ) | fromYaml }} {{- $provider := include "provider" . -}} apiVersion: v1 kind: ConfigMap @@ -49,3 +50,6 @@ data: INGESTION_TASK_BATCHES: {{ $jobManager.ingestion.batches | quote}} JOB_DELETE_TYPE: {{ $jobManager.delete.jobType | quote }} TASK_DELETE_TYPE: {{ $jobManager.delete.taskType | quote }} + CRAWLING_EXTENSION: {{ $crawling.extension | quote }} + CRAWLING_NESTED_JSON_PATH: {{ $crawling.nestedJsonPath | quote }} + CRAWLING_IGNORE_NOT_FOUND: {{ $crawling.ignoreNotFound | quote }} diff --git a/helm/values.yaml b/helm/values.yaml index bd62d7f..e58d3f5 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -37,6 +37,11 @@ global: jobType: '' taskType: '' + crawling: + extension: '.json' + nestedJsonPath: "$..['uri','url']" + ignoreNotFound: true + cloudProvider: dockerRegistryUrl: flavor: @@ -116,6 +121,12 @@ jobManager: delete: jobType: taskType: + +crawling: + extension: '.json' + nestedJsonPath: "$..['uri','url']" + ignoreNotFound: true + env: port: 80 targetPort: 8080 diff --git a/package-lock.json b/package-lock.json index 174723f..7c31397 100644 --- a/package-lock.json +++ b/package-lock.json @@ -32,7 +32,7 @@ "express": "^4.18.2", "express-openapi-validator": "^5.0.4", "http-status-codes": "^2.2.0", - "jsonpath": "^1.1.1", + "jsonpath": "^1.3.0", "n-readlines": "^1.0.1", "prom-client": "^15.1.1", "reflect-metadata": "^0.1.13", @@ -13042,7 +13042,8 @@ "node_modules/deep-is": { "version": "0.1.4", "resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.4.tgz", - "integrity": "sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ==" + "integrity": "sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ==", + "dev": true }, "node_modules/deepmerge": { "version": "4.3.1", @@ -13623,86 +13624,26 @@ } }, "node_modules/escodegen": { - "version": "1.14.3", - "resolved": "https://registry.npmjs.org/escodegen/-/escodegen-1.14.3.tgz", - "integrity": "sha512-qFcX0XJkdg+PB3xjZZG/wKSuT1PnQWx57+TVSjIMmILd2yC/6ByYElPwJnslDsuWuSAp4AwJGumarAAmJch5Kw==", + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/escodegen/-/escodegen-2.1.0.tgz", + "integrity": "sha512-2NlIDTwUWJN0mRPQOdtQBzbUHvdGY2P1VXSyU83Q3xKxM7WHX2Ql8dKq782Q9TgQUNOLEzEYu9bzLNj1q88I5w==", "license": "BSD-2-Clause", "dependencies": { "esprima": "^4.0.1", - "estraverse": "^4.2.0", - "esutils": "^2.0.2", - "optionator": "^0.8.1" + "estraverse": "^5.2.0", + "esutils": "^2.0.2" }, "bin": { "escodegen": "bin/escodegen.js", "esgenerate": "bin/esgenerate.js" }, "engines": { - "node": ">=4.0" + "node": ">=6.0" }, "optionalDependencies": { "source-map": "~0.6.1" } }, - "node_modules/escodegen/node_modules/estraverse": { - "version": "4.3.0", - "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-4.3.0.tgz", - "integrity": "sha512-39nnKffWz8xN1BU/2c79n9nB9HDzo0niYUqx6xyqUnyoAnQyyWpOTdZEeiCch8BBu515t4wp9ZmgVfVhn9EBpw==", - "license": "BSD-2-Clause", - "engines": { - "node": ">=4.0" - } - }, - "node_modules/escodegen/node_modules/levn": { - "version": "0.3.0", - "resolved": "https://registry.npmjs.org/levn/-/levn-0.3.0.tgz", - "integrity": "sha512-0OO4y2iOHix2W6ujICbKIaEQXvFQHue65vUG3pb5EUomzPI90z9hsA1VsO/dbIIpC53J8gxM9Q4Oho0jrCM/yA==", - "license": "MIT", - "dependencies": { - "prelude-ls": "~1.1.2", - "type-check": "~0.3.2" - }, - "engines": { - "node": ">= 0.8.0" - } - }, - "node_modules/escodegen/node_modules/optionator": { - "version": "0.8.3", - "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.8.3.tgz", - "integrity": "sha512-+IW9pACdk3XWmmTXG8m3upGUJst5XRGzxMRjXzAuJ1XnIFNvfhjjIuYkDvysnPQ7qzqVzLt78BCruntqRhWQbA==", - "license": "MIT", - "dependencies": { - "deep-is": "~0.1.3", - "fast-levenshtein": "~2.0.6", - "levn": "~0.3.0", - "prelude-ls": "~1.1.2", - "type-check": "~0.3.2", - "word-wrap": "~1.2.3" - }, - "engines": { - "node": ">= 0.8.0" - } - }, - "node_modules/escodegen/node_modules/prelude-ls": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.1.2.tgz", - "integrity": "sha512-ESF23V4SKG6lVSGZgYNpbsiaAkdab6ZgOxe52p7+Kid3W3u3bxR4Vfd/o21dmN7jSt0IwgZ4v5MUd26FEtXE9w==", - "engines": { - "node": ">= 0.8.0" - } - }, - "node_modules/escodegen/node_modules/type-check": { - "version": "0.3.2", - "resolved": "https://registry.npmjs.org/type-check/-/type-check-0.3.2.tgz", - "integrity": "sha512-ZCmOJdvOWDBYJlzAoFkC+Q0+bUyEOS1ltgp1MGU03fqHG+dbi9tBFU2Rd9QKiDZFAYrhPh2JUf7rZRIuHRKtOg==", - "license": "MIT", - "dependencies": { - "prelude-ls": "~1.1.2" - }, - "engines": { - "node": ">= 0.8.0" - } - }, "node_modules/eslint": { "version": "8.53.0", "resolved": "https://registry.npmjs.org/eslint/-/eslint-8.53.0.tgz", @@ -14671,7 +14612,6 @@ "version": "5.3.0", "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.3.0.tgz", "integrity": "sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA==", - "dev": true, "engines": { "node": ">=4.0" } @@ -14935,7 +14875,8 @@ "node_modules/fast-levenshtein": { "version": "2.0.6", "resolved": "https://registry.npmjs.org/fast-levenshtein/-/fast-levenshtein-2.0.6.tgz", - "integrity": "sha512-DCXu6Ifhqcks7TZKY3Hxp3y6qphY5SJZmrWMDrKcERSOXWQdMhU9Ig/PYrzyw/ul9jOIyh0N4M0tbC5hodg8dw==" + "integrity": "sha512-DCXu6Ifhqcks7TZKY3Hxp3y6qphY5SJZmrWMDrKcERSOXWQdMhU9Ig/PYrzyw/ul9jOIyh0N4M0tbC5hodg8dw==", + "dev": true }, "node_modules/fast-redact": { "version": "3.3.0", @@ -17620,14 +17561,14 @@ ] }, "node_modules/jsonpath": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/jsonpath/-/jsonpath-1.1.1.tgz", - "integrity": "sha512-l6Cg7jRpixfbgoWgkrl77dgEj8RPvND0wMH6TwQmi9Qs4TFfS9u5cUFnbeKTwj5ga5Y3BTGGNI28k117LJ009w==", + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/jsonpath/-/jsonpath-1.3.0.tgz", + "integrity": "sha512-0kjkYHJBkAy50Z5QzArZ7udmvxrJzkpKYW27fiF//BrMY7TQibYLl+FYIXN2BiYmwMIVzSfD8aDRj6IzgBX2/w==", "license": "MIT", "dependencies": { - "esprima": "1.2.2", - "static-eval": "2.0.2", - "underscore": "1.12.1" + "esprima": "1.2.5", + "static-eval": "2.1.1", + "underscore": "1.13.6" } }, "node_modules/jsonpath-plus": { @@ -17650,9 +17591,9 @@ } }, "node_modules/jsonpath/node_modules/esprima": { - "version": "1.2.2", - "resolved": "https://registry.npmjs.org/esprima/-/esprima-1.2.2.tgz", - "integrity": "sha512-+JpPZam9w5DuJ3Q67SqsMGtiHKENSMRVoxvArfJZK01/BfLEObtZ6orJa/MtoGNR/rfMgp5837T41PAmTwAv/A==", + "version": "1.2.5", + "resolved": "https://registry.npmjs.org/esprima/-/esprima-1.2.5.tgz", + "integrity": "sha512-S9VbPDU0adFErpDai3qDkjq8+G05ONtKzcyNrPKg/ZKa+tf879nX2KexNU95b31UoTJjRLInNBHHHjFPoCd7lQ==", "bin": { "esparse": "bin/esparse.js", "esvalidate": "bin/esvalidate.js" @@ -21433,12 +21374,12 @@ } }, "node_modules/static-eval": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/static-eval/-/static-eval-2.0.2.tgz", - "integrity": "sha512-N/D219Hcr2bPjLxPiV+TQE++Tsmrady7TqAJugLy7Xk1EumfDWS/f5dtBbkRCGE7wKKXuYockQoj8Rm2/pVKyg==", + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/static-eval/-/static-eval-2.1.1.tgz", + "integrity": "sha512-MgWpQ/ZjGieSVB3eOJVs4OA2LT/q1vx98KPCTTQPzq/aLr0YUXTsgryTXr4SLfR0ZfUUCiedM9n/ABeDIyy4mA==", "license": "MIT", "dependencies": { - "escodegen": "^1.8.1" + "escodegen": "^2.1.0" } }, "node_modules/statuses": { @@ -22367,9 +22308,9 @@ } }, "node_modules/underscore": { - "version": "1.12.1", - "resolved": "https://registry.npmjs.org/underscore/-/underscore-1.12.1.tgz", - "integrity": "sha512-hEQt0+ZLDVUMhebKxL4x1BTtDY7bavVofhZ9KZ4aI26X9SRaE+Y3m83XUL1UP2jn8ynjndwCCpEHdUG+9pP1Tw==", + "version": "1.13.6", + "resolved": "https://registry.npmjs.org/underscore/-/underscore-1.13.6.tgz", + "integrity": "sha512-+A5Sja4HP1M08MaXya7p5LvjuM7K6q/2EaC0+iovj/wOcMsTzMvDFbasi/oSapiwOlt252IqsKqPjCl7huKS0A==", "license": "MIT" }, "node_modules/undici": { @@ -22697,15 +22638,6 @@ "url": "https://github.com/sponsors/ljharb" } }, - "node_modules/word-wrap": { - "version": "1.2.5", - "resolved": "https://registry.npmjs.org/word-wrap/-/word-wrap-1.2.5.tgz", - "integrity": "sha512-BN22B5eaMMI9UMtjrGd5g5eCYPpCPDUy0FJXbYsaT5zYxjFOckS53SQDE3pWkVoWpHXVb3BrYcEN4Twa55B5cA==", - "license": "MIT", - "engines": { - "node": ">=0.10.0" - } - }, "node_modules/wordwrap": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/wordwrap/-/wordwrap-1.0.0.tgz", diff --git a/package.json b/package.json index 99a9bbc..795cd01 100644 --- a/package.json +++ b/package.json @@ -65,7 +65,7 @@ "express": "^4.18.2", "express-openapi-validator": "^5.0.4", "http-status-codes": "^2.2.0", - "jsonpath": "^1.1.1", + "jsonpath": "^1.3.0", "n-readlines": "^1.0.1", "prom-client": "^15.1.1", "reflect-metadata": "^0.1.13", diff --git a/src/common/interfaces.ts b/src/common/interfaces.ts index 34d08b3..c78d1c5 100644 --- a/src/common/interfaces.ts +++ b/src/common/interfaces.ts @@ -29,7 +29,6 @@ export interface DeletePayload { producerName: string; } -// ToDo: merge this class with the identical class in file-syncer export interface Provider { streamModelPathsToQueueFile: (modelId: string, pathToTileset: string, productName: string) => Promise; getFile: (filePath: string) => Promise; @@ -58,7 +57,7 @@ export interface DeleteTaskParameters { blockDuplication?: boolean; } -export interface S3Config extends CrawlingConfig { +export interface S3Config extends BaseProviderConfig { accessKeyId: string; secretAccessKey: string; endpointUrl: string; @@ -68,17 +67,17 @@ export interface S3Config extends CrawlingConfig { forcePathStyle: boolean; } -export interface NFSConfig extends CrawlingConfig { +export interface NFSConfig extends BaseProviderConfig { pvPath: string; } -export interface CrawlingConfig { +export interface BaseProviderConfig { extension: string; nestedJsonPath: string; ignoreNotFound?: boolean; } -export type ProviderConfig = S3Config | NFSConfig | CrawlingConfig; +export type ProviderConfig = S3Config | NFSConfig; export interface JobOperationResponse { jobId: string; diff --git a/src/containerConfig.ts b/src/containerConfig.ts index 35f93ac..02516e4 100644 --- a/src/containerConfig.ts +++ b/src/containerConfig.ts @@ -61,7 +61,7 @@ export const registerExternalValues = (options?: RegisterOptions): DependencyCon { token: SERVICES.PROVIDER_CONFIG, provider: { - useFactory: (container) => getProviderConfig(container), + useFactory: () => getProviderConfig(provider), }, }, { token: SERVICES.QUEUE_FILE_HANDLER, provider: { useClass: QueueFileHandler } }, diff --git a/src/providers/crawling.ts b/src/providers/baseProvider.ts similarity index 94% rename from src/providers/crawling.ts rename to src/providers/baseProvider.ts index 4de6a02..c3e948d 100644 --- a/src/providers/crawling.ts +++ b/src/providers/baseProvider.ts @@ -5,10 +5,10 @@ import { Tracer } from '@opentelemetry/api'; import { withSpanAsyncV4 } from '@map-colonies/telemetry'; import jsonpath from 'jsonpath'; import { AppError } from '../common/appError'; -import { CrawlingConfig, LogContext, Provider } from '../common/interfaces'; +import { BaseProviderConfig, LogContext, Provider } from '../common/interfaces'; import { QueueFileHandler } from '../handlers/queueFileHandler'; -export abstract class Crawling implements Provider { +export abstract class BaseProvider implements Provider { protected readonly logContext: LogContext; public constructor( @@ -19,7 +19,7 @@ export abstract class Crawling implements Provider { ) { this.logContext = { fileName: __filename, - class: Crawling.name, + class: BaseProvider.name, }; } diff --git a/src/providers/getProvider.ts b/src/providers/getProvider.ts index 16da1d1..638110d 100644 --- a/src/providers/getProvider.ts +++ b/src/providers/getProvider.ts @@ -9,7 +9,10 @@ import { S3Provider } from './s3Provider'; function getProvider(provider: string, container: DependencyContainer): Provider { const childContainer = container.createChildContainer(); - childContainer.register(SERVICES.PROVIDER_CONFIG, { useValue: provider }); + childContainer.register(SERVICES.PROVIDER_CONFIG, { + useFactory: () => getProviderConfig(provider), + }); + switch (provider.toLowerCase()) { case 'nfs': return childContainer.resolve(NFSProvider); @@ -20,8 +23,7 @@ function getProvider(provider: string, container: DependencyContainer): Provider } } -function getProviderConfig(container: string | DependencyContainer): ProviderConfig { - const provider = typeof container == 'string' ? container : container.resolve(SERVICES.PROVIDER_CONFIG); +function getProviderConfig(provider: string): ProviderConfig { try { return config.get(provider); } catch (err) { diff --git a/src/providers/nfsProvider.ts b/src/providers/nfsProvider.ts index 1a550e7..e297ba6 100644 --- a/src/providers/nfsProvider.ts +++ b/src/providers/nfsProvider.ts @@ -9,10 +9,10 @@ import { QueueFileHandler } from '../handlers/queueFileHandler'; import { SERVICES } from '../common/constants'; import { NFSConfig, LogContext } from '../common/interfaces'; import { AppError } from '../common/appError'; -import { Crawling } from './crawling'; +import { BaseProvider } from './baseProvider'; @injectable() -export class NFSProvider extends Crawling { +export class NFSProvider extends BaseProvider { protected override readonly logContext: LogContext; private readonly pvPath: string; diff --git a/src/providers/s3Provider.ts b/src/providers/s3Provider.ts index 1f86da3..37c8017 100644 --- a/src/providers/s3Provider.ts +++ b/src/providers/s3Provider.ts @@ -12,10 +12,10 @@ import { QueueFileHandler } from '../handlers/queueFileHandler'; import { SERVICES } from '../common/constants'; import { LogContext, S3Config } from '../common/interfaces'; import { AppError } from '../common/appError'; -import { Crawling } from './crawling'; +import { BaseProvider } from './baseProvider'; @injectable() -export class S3Provider extends Crawling { +export class S3Provider extends BaseProvider { protected override readonly logContext: LogContext; private readonly s3: S3Client; diff --git a/tests/integration/providers/crawling.spec.ts b/tests/integration/providers/crawling.spec.ts index 434933c..72416b5 100644 --- a/tests/integration/providers/crawling.spec.ts +++ b/tests/integration/providers/crawling.spec.ts @@ -9,13 +9,13 @@ import { getApp } from '../../../src/app'; import { SERVICES } from '../../../src/common/constants'; import { NFSConfig } from '../../../src/common/interfaces'; import { QueueFileHandler } from '../../../src/handlers/queueFileHandler'; -import { Crawling } from '../../../src/providers/crawling'; +import { BaseProvider } from '../../../src/providers/baseProvider'; import { AppError } from '../../../src/common/appError'; import { NFSProvider } from '../../../src/providers/nfsProvider'; // ToDo those are UNIT tests, NOT INTEGRATION!! But CI requires integration coverage describe('Crawling tests', () => { - let crawler: Crawling; + let crawler: BaseProvider; let queueFileHandler: QueueFileHandler; const logger: Logger = jsLogger({ enabled: false }); @@ -53,31 +53,17 @@ describe('Crawling tests', () => { jest.clearAllMocks(); }); - - describe('getFile', () => { - it('should delegate', async () => { - const filePath = 'test.json'; - const buffer = Buffer.from('Perry the test?!?!'); - const getFileSpy = jest.spyOn(crawler, 'getFile').mockResolvedValue(buffer); - - const file = await crawler.getFile(filePath); - - expect(getFileSpy).toHaveBeenCalledWith(filePath); - expect(file.toString()).toBe('Perry the test?!?!'); - }); - }); - describe('streamModelPathsToQueueFile', () => { const json0 = { root: { content: { uri: 'a.b3dm', boundingVolume: { region: [0] } }, children: [ { boundingVolume: { region: [0, 1, 2, 3, 4, 5] }, geometricError: 0, content: { uri: 'b.b3dm' } }, - { boundingVolume: { region: [0, 1, 2, 3, 4, 5] }, geometricError: 0, content: { uri: '../1.json' }, children: [] }, + { boundingVolume: { region: [0, 1, 2, 3, 4, 5] }, geometricError: 0, content: { url: '../1.json' }, children: [] }, ], }, }; - const json1 = { root: { content: { uri: 'bla/c.b3dm' }, children: [{ content: { uri: '2.json' } }] } }; + const json1 = { root: { content: { uri: 'bla/c.b3dm' }, children: [{ content: { url: '2.json' } }] } }; const json2 = {}; const pathToTileset = '/x/y/0.json'; @@ -113,40 +99,43 @@ describe('Crawling tests', () => { getFileSpy.mockRestore(); }); - it('should respect 404 ignore rules error on getFile error', async () => { - const ignoreConfig = { ...config, ignoreNotFound: true }; - const crawler = new NFSProvider(logger, container.resolve(SERVICES.TRACER), ignoreConfig, queueFileHandler); - - jest.spyOn(crawler, 'getFile').mockRejectedValue(new AppError(StatusCodes.NOT_FOUND, 'Not Found', false)); - + describe('getFile errors', () => { + const modelName = faker.word.sample(); const modelId = faker.string.uuid(); - const result = crawler.streamModelPathsToQueueFile(modelId, pathToTileset, 'name'); - - await expect(result).resolves.not.toThrow(); - }); - it('should throw error on getFile error', async () => { - const modelName = faker.word.sample(); - const modelId = faker.string.uuid(); - - const getFileSpy = jest.spyOn(crawler, 'getFile').mockRejectedValueOnce(new AppError(StatusCodes.NOT_FOUND, 'blabla', false)); + const createCrawler = (overrides: Partial = {}) => + new NFSProvider(logger, container.resolve(SERVICES.TRACER), { ...config, ...overrides }, queueFileHandler); - const result = crawler.streamModelPathsToQueueFile(modelId, pathToTileset, modelName); - - await expect(result).rejects.toThrow(AppError); - getFileSpy.mockRestore(); - }); + it('should throw on a general getFile error', async () => { + const getFileSpy = jest + .spyOn(crawler, 'getFile') + .mockRejectedValueOnce(new AppError(StatusCodes.INTERNAL_SERVER_ERROR, 'Internal error', false)); - it('should throw error bad file', async () => { - const modelName = faker.word.sample(); - const modelId = faker.string.uuid(); + await expect(crawler.streamModelPathsToQueueFile(modelId, pathToTileset, modelName)).rejects.toThrow(AppError); - const getFileSpy = jest.spyOn(crawler, 'getFile').mockRejectedValueOnce(new AppError(StatusCodes.INTERNAL_SERVER_ERROR, 'Internal error', false)); + getFileSpy.mockRestore(); + }); - const result = crawler.streamModelPathsToQueueFile(modelId, pathToTileset, modelName); - - await expect(result).rejects.toThrow(AppError); - getFileSpy.mockRestore(); + it('should throw on NOT_FOUND when ignoreNotFound is false', async () => { + const getFileSpy = jest + .spyOn(crawler, 'getFile') + .mockRejectedValueOnce(new AppError(StatusCodes.NOT_FOUND, 'Not Found', false)); + + await expect(crawler.streamModelPathsToQueueFile(modelId, pathToTileset, modelName)).rejects.toThrow(AppError); + + getFileSpy.mockRestore(); + }); + + it('should skip NOT_FOUND files when ignoreNotFound is true', async () => { + const ignoringCrawler = createCrawler({ ignoreNotFound: true }); + const getFileSpy = jest + .spyOn(ignoringCrawler, 'getFile') + .mockRejectedValue(new AppError(StatusCodes.NOT_FOUND, 'Not Found', false)); + + await expect(ignoringCrawler.streamModelPathsToQueueFile(modelId, pathToTileset, modelName)).resolves.toBe(0); + + getFileSpy.mockRestore(); + }); }); }); }); diff --git a/tests/integration/providers/nfsProvider.spec.ts b/tests/integration/providers/nfsProvider.spec.ts index f4728cc..45d6a61 100644 --- a/tests/integration/providers/nfsProvider.spec.ts +++ b/tests/integration/providers/nfsProvider.spec.ts @@ -85,8 +85,6 @@ describe('NFSProvider tests', () => { const result = fs.readFileSync(`${queueFilePath}/${modelId}`, 'utf-8'); - console.log('Crawler Output:', result); - expect(result).toContain(pathToTileset); await queueFileHandler.deleteQueueFile(modelId); }); diff --git a/tests/unit/jobStatus/models/jobStatusManager.spec.ts b/tests/unit/jobStatus/models/jobStatusManager.spec.ts index 6e3b65b..0225201 100644 --- a/tests/unit/jobStatus/models/jobStatusManager.spec.ts +++ b/tests/unit/jobStatus/models/jobStatusManager.spec.ts @@ -7,7 +7,7 @@ import { AppError } from '../../../../src/common/appError'; import { SERVICES } from '../../../../src/common/constants'; import { JobStatusResponse } from '../../../../src/common/interfaces'; import { JobStatusManager } from '../../../../src/jobStatus/models/jobStatusManager'; -import { jobManagerClientMock } from '../../../helpers/mockCreator'; +import { configProviderMock, jobManagerClientMock } from '../../../helpers/mockCreator'; describe('jobStatusManager', () => { let jobStatusManager: JobStatusManager; @@ -16,6 +16,7 @@ describe('jobStatusManager', () => { getApp({ override: [ { token: SERVICES.JOB_MANAGER_CLIENT, provider: { useValue: jobManagerClientMock } }, + { token: SERVICES.PROVIDER, provider: { useValue: configProviderMock } }, { token: SERVICES.LOGGER, provider: { useValue: jsLogger({ enabled: false }) } }, ], }); diff --git a/tests/unit/providers/getProvider.spec.ts b/tests/unit/providers/getProvider.spec.ts index 6826edf..f774d4a 100644 --- a/tests/unit/providers/getProvider.spec.ts +++ b/tests/unit/providers/getProvider.spec.ts @@ -15,12 +15,10 @@ import { jest.mock('config', () => ({ get: jest.fn((key: string) => { switch (key) { - case 'telemetry.logger.level': - return 'debug'; - case 'nfs': - return { basePath: '/tmp' }; - case 's3': - return { bucket: 'test-bucket' }; + case 'NFS': + return { pvPath: '/tmp', extension: '.json', nestedJsonPath: "$..['uri','url']" }; + case 'S3': + return { bucket: 'test-bucket', extension: '.json', nestedJsonPath: "$..['uri','url']" }; default: return {}; } @@ -69,8 +67,8 @@ describe('getProvider tests', () => { throw new Error('config failure'); }); - expect(() => getProviderConfig('nfs')).toThrow( - 'Invalid config provider received: nfs. Consult documentation for available values' + expect(() => getProviderConfig('NFS')).toThrow( + 'Invalid config provider received: NFS. Consult documentation for available values' ); }); }); From 4483ff1829b91a63a6f774da896dcd201a6e74d8 Mon Sep 17 00:00:00 2001 From: liran Date: Sat, 16 May 2026 13:55:06 +0300 Subject: [PATCH 12/21] chore: update baseProvider test name --- .../providers/{crawling.spec.ts => baseProvider.spec.ts} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/integration/providers/{crawling.spec.ts => baseProvider.spec.ts} (100%) diff --git a/tests/integration/providers/crawling.spec.ts b/tests/integration/providers/baseProvider.spec.ts similarity index 100% rename from tests/integration/providers/crawling.spec.ts rename to tests/integration/providers/baseProvider.spec.ts From fd17b4c7acc74cd09b1cb9905997f4d1c3e49fa9 Mon Sep 17 00:00:00 2001 From: TULCHINSKI LIRAN Date: Sun, 17 May 2026 09:54:58 +0300 Subject: [PATCH 13/21] chore: small updates --- src/providers/baseProvider.ts | 42 +++++++++++++------ src/providers/nfsProvider.ts | 4 +- src/providers/s3Provider.ts | 14 ++----- tests/helpers/nfsHelper.ts | 2 +- .../providers/baseProvider.spec.ts | 35 ++++++++-------- .../integration/providers/nfsProvider.spec.ts | 14 +++---- .../integration/providers/s3Provider.spec.ts | 29 ++++++------- tests/unit/providers/getProvider.spec.ts | 16 ++----- 8 files changed, 78 insertions(+), 78 deletions(-) diff --git a/src/providers/baseProvider.ts b/src/providers/baseProvider.ts index c3e948d..3b67c0b 100644 --- a/src/providers/baseProvider.ts +++ b/src/providers/baseProvider.ts @@ -8,9 +8,10 @@ import { AppError } from '../common/appError'; import { BaseProviderConfig, LogContext, Provider } from '../common/interfaces'; import { QueueFileHandler } from '../handlers/queueFileHandler'; -export abstract class BaseProvider implements Provider { +export abstract class BaseProvider implements Provider { protected readonly logContext: LogContext; - + private readonly crawlingExtension: string; + public constructor( protected readonly logger: Logger, public readonly tracer: Tracer, @@ -21,12 +22,14 @@ export abstract class BaseProvider implements Prov fileName: __filename, class: BaseProvider.name, }; + + this.crawlingExtension = this.config.extension as string; } @withSpanAsyncV4 public async streamModelPathsToQueueFile(modelId: string, pathToTileset: string, modelName: string): Promise { const logContext = { ...this.logContext, function: this.streamModelPathsToQueueFile.name }; - + this.logger.info({ msg: 'Started streaming model paths to queue file', logContext, @@ -40,26 +43,35 @@ export abstract class BaseProvider implements Prov let totalFilesAdded = 0; while (processingQueue.length > 0) { - const currentPath = processingQueue.shift()!; + const currentPath = processingQueue.shift(); + + if (currentPath === undefined) { + continue; + } if (visitedFiles.has(currentPath)) { continue; } + visitedFiles.add(currentPath); try { const buffer = await this.getFile(currentPath); - + await this.queueFileHandler.writeFileNameToQueueFile(modelId, currentPath); totalFilesAdded++; - - if (currentPath.endsWith(this.config.extension)) { + + if (currentPath.endsWith(this.crawlingExtension)) { const nestedPaths = this.extractPathsFromJson(buffer, currentPath); for (const nestedPath of nestedPaths) { - if (nestedPath.endsWith(this.config.extension)) { + if (visitedFiles.has(nestedPath)) { + continue; + } + + if (nestedPath.endsWith(this.crawlingExtension)) { processingQueue.push(nestedPath); - } else if (!visitedFiles.has(nestedPath)) { + } else { await this.queueFileHandler.writeFileNameToQueueFile(modelId, nestedPath); visitedFiles.add(nestedPath); totalFilesAdded++; @@ -67,7 +79,9 @@ export abstract class BaseProvider implements Prov } } } catch (err) { - if (this.config.ignoreNotFound! && err instanceof AppError && err.status === StatusCodes.NOT_FOUND) { + const ignoreNotFound = this.config.ignoreNotFound === true; + + if (ignoreNotFound && err instanceof AppError && err.status === StatusCodes.NOT_FOUND) { this.logger.warn({ msg: 'File not found, skipping...', logContext, path: currentPath, modelName }); continue; } @@ -78,8 +92,9 @@ export abstract class BaseProvider implements Prov modelName, modelId, path: currentPath, - err, + err, }); + throw err; } } @@ -99,8 +114,9 @@ export abstract class BaseProvider implements Prov try { const fileContent = buffer.toString(); const json = JSON.parse(fileContent) as object; - const results = jsonpath.query(json, this.config.nestedJsonPath) as string[]; - + const nestedJsonPath = this.config.nestedJsonPath as string; + const results = jsonpath.query(json, nestedJsonPath) as string[]; + const dirname = Path.dirname(currentPath); return results.map((child) => { diff --git a/src/providers/nfsProvider.ts b/src/providers/nfsProvider.ts index e297ba6..0781e8c 100644 --- a/src/providers/nfsProvider.ts +++ b/src/providers/nfsProvider.ts @@ -47,14 +47,14 @@ export class NFSProvider extends BaseProvider { return data; } catch (err) { const error = err as NodeJS.ErrnoException; - + if (error.code === 'ENOENT') { throw new AppError(httpStatus.NOT_FOUND, `File ${filePath} not found`, true); } if (error.code === 'EISDIR') { throw new AppError(httpStatus.BAD_REQUEST, `${filePath} is a directory, expected a file`, true); } - + throw new AppError(httpStatus.INTERNAL_SERVER_ERROR, `Error reading file: ${error.message}`, true); } } diff --git a/src/providers/s3Provider.ts b/src/providers/s3Provider.ts index 37c8017..9fe1877 100644 --- a/src/providers/s3Provider.ts +++ b/src/providers/s3Provider.ts @@ -1,9 +1,5 @@ import httpStatus from 'http-status-codes'; -import { - GetObjectCommand, - S3Client, - S3ClientConfig, -} from '@aws-sdk/client-s3'; +import { GetObjectCommand, S3Client, S3ClientConfig } from '@aws-sdk/client-s3'; import { Logger } from '@map-colonies/js-logger'; import { inject, injectable } from 'tsyringe'; import { Tracer } from '@opentelemetry/api'; @@ -76,12 +72,8 @@ export class S3Provider extends BaseProvider { const s3Error = err as Error; const statusCode = (s3Error as unknown as { name: string }).name === 'NoSuchKey' ? httpStatus.NOT_FOUND : httpStatus.INTERNAL_SERVER_ERROR; - - throw new AppError( - statusCode, - `an error occurred during the get key ${filePath} on bucket ${this.s3Config.bucket}, ${s3Error.message}`, - true - ); + + throw new AppError(statusCode, `an error occurred during the get key ${filePath} on bucket ${this.s3Config.bucket}, ${s3Error.message}`, true); } } } diff --git a/tests/helpers/nfsHelper.ts b/tests/helpers/nfsHelper.ts index 830e0d9..78fec74 100644 --- a/tests/helpers/nfsHelper.ts +++ b/tests/helpers/nfsHelper.ts @@ -10,7 +10,7 @@ export class NFSHelper { const subFolders = path.dirname(file); const fileName = path.basename(file); const dirPath = path.join(this.config.pvPath, modelName, subFolders); - + if (!fs.existsSync(dirPath)) { await this.createFolder(dirPath); } diff --git a/tests/integration/providers/baseProvider.spec.ts b/tests/integration/providers/baseProvider.spec.ts index 72416b5..fec9f86 100644 --- a/tests/integration/providers/baseProvider.spec.ts +++ b/tests/integration/providers/baseProvider.spec.ts @@ -24,20 +24,21 @@ describe('Crawling tests', () => { extension: '.json', nestedJsonPath: "$..['uri','url']", ignoreNotFound: false, - pvPath: "test_pv_path", + pvPath: 'test_pv_path', }; beforeAll(() => { getApp({ override: [ { token: SERVICES.LOGGER, provider: { useValue: logger } }, - { token: SERVICES.PROVIDER_CONFIG, - provider: { - useValue: { - ...config, - } - } - }, + { + token: SERVICES.PROVIDER_CONFIG, + provider: { + useValue: { + ...config, + }, + }, + }, ], }); queueFileHandler = container.resolve(QueueFileHandler); @@ -72,11 +73,11 @@ describe('Crawling tests', () => { const modelId = faker.string.uuid(); const getFileSpy = jest.spyOn(crawler, 'getFile'); - + // eslint-disable-next-line @typescript-eslint/require-await getFileSpy.mockImplementation(async (path) => { const normalizedPath = path.replace(/\\/g, '/').replace(/^\//, ''); - + if (normalizedPath === 'x/y/0.json') { return Buffer.from(JSON.stringify(json0)); } @@ -93,9 +94,11 @@ describe('Crawling tests', () => { const total = await crawler.streamModelPathsToQueueFile(modelId, pathToTileset, modelName); const result = fs.readFileSync(`${queueFilePath}/${modelId}`, 'utf-8').trim().split('\n'); - + expect(total).toBe(6); - expect(result).toEqual(expect.arrayContaining([expect.stringContaining('x/y/0.json'), expect.stringContaining('x/1.json'), expect.stringContaining('x/2.json')])); + expect(result).toEqual( + expect.arrayContaining([expect.stringContaining('x/y/0.json'), expect.stringContaining('x/1.json'), expect.stringContaining('x/2.json')]) + ); getFileSpy.mockRestore(); }); @@ -117,9 +120,7 @@ describe('Crawling tests', () => { }); it('should throw on NOT_FOUND when ignoreNotFound is false', async () => { - const getFileSpy = jest - .spyOn(crawler, 'getFile') - .mockRejectedValueOnce(new AppError(StatusCodes.NOT_FOUND, 'Not Found', false)); + const getFileSpy = jest.spyOn(crawler, 'getFile').mockRejectedValueOnce(new AppError(StatusCodes.NOT_FOUND, 'Not Found', false)); await expect(crawler.streamModelPathsToQueueFile(modelId, pathToTileset, modelName)).rejects.toThrow(AppError); @@ -128,9 +129,7 @@ describe('Crawling tests', () => { it('should skip NOT_FOUND files when ignoreNotFound is true', async () => { const ignoringCrawler = createCrawler({ ignoreNotFound: true }); - const getFileSpy = jest - .spyOn(ignoringCrawler, 'getFile') - .mockRejectedValue(new AppError(StatusCodes.NOT_FOUND, 'Not Found', false)); + const getFileSpy = jest.spyOn(ignoringCrawler, 'getFile').mockRejectedValue(new AppError(StatusCodes.NOT_FOUND, 'Not Found', false)); await expect(ignoringCrawler.streamModelPathsToQueueFile(modelId, pathToTileset, modelName)).resolves.toBe(0); diff --git a/tests/integration/providers/nfsProvider.spec.ts b/tests/integration/providers/nfsProvider.spec.ts index 45d6a61..c19bd65 100644 --- a/tests/integration/providers/nfsProvider.spec.ts +++ b/tests/integration/providers/nfsProvider.spec.ts @@ -63,26 +63,26 @@ describe('NFSProvider tests', () => { const modelName = 'interconnect'; const entryFile = 'tileset.json'; const pathToTileset = `${modelName}/${entryFile}`; - + await queueFileHandler.createQueueFile(modelId); const textureFile = 'text1.png'; const childTileset = 'child.json'; - + const tilesetContent = JSON.stringify({ root: { content: { uri: childTileset }, - children: [{ content: { uri: textureFile } }] - } + children: [{ content: { uri: textureFile } }], + }, }); await nfsHelper.createFileOfModel('', pathToTileset, tilesetContent); - + await nfsHelper.createFileOfModel(modelName, textureFile, 'data'); - await nfsHelper.createFileOfModel(modelName, childTileset, JSON.stringify({ asset: { version: "1.0" } })); + await nfsHelper.createFileOfModel(modelName, childTileset, JSON.stringify({ asset: { version: '1.0' } })); await provider.streamModelPathsToQueueFile(modelId, pathToTileset, modelName); - + const result = fs.readFileSync(`${queueFilePath}/${modelId}`, 'utf-8'); expect(result).toContain(pathToTileset); diff --git a/tests/integration/providers/s3Provider.spec.ts b/tests/integration/providers/s3Provider.spec.ts index d35514b..2c4ce60 100644 --- a/tests/integration/providers/s3Provider.spec.ts +++ b/tests/integration/providers/s3Provider.spec.ts @@ -25,15 +25,16 @@ describe('S3Provider tests', () => { getApp({ override: [ { token: SERVICES.LOGGER, provider: { useValue: jsLogger({ enabled: false }) } }, - { token: SERVICES.PROVIDER_CONFIG, - provider: { - useValue: { - ...s3Config, - ignoreNotFound: false, + { + token: SERVICES.PROVIDER_CONFIG, + provider: { + useValue: { + ...s3Config, + ignoreNotFound: false, extension: '.json', nestedJsonPath: "$..['uri','url']", - } - } + }, + }, }, ], }); @@ -80,7 +81,7 @@ describe('S3Provider tests', () => { it('should recursively discover nested files across multiple directories and levels', async () => { const modelId = faker.string.uuid(); const modelName = 'complex-model'; - + const rootTileset = 'tileset.json'; const subDir = 'folderA'; const secondLevelJson = `${subDir}/sub-tileset.json`; @@ -88,14 +89,11 @@ describe('S3Provider tests', () => { const leafFileBinary = `${subDir}/geometry.b3dm`; const rootContent = JSON.stringify({ - root: { uri: secondLevelJson, url: secondLevelJson } + root: { uri: secondLevelJson, url: secondLevelJson }, }); const subTilesetContent = JSON.stringify({ - buffers: [ - { uri: 'data.json' }, - { url: 'geometry.b3dm' } - ] + buffers: [{ uri: 'data.json' }, { url: 'geometry.b3dm' }], }); await s3Helper.createFileOfModel('', rootTileset, rootContent); @@ -108,7 +106,10 @@ describe('S3Provider tests', () => { const totalAdded = await provider.streamModelPathsToQueueFile(modelId, rootTileset, modelName); const result = fs.readFileSync(`${queueFilePath}/${modelId}`, 'utf-8'); - const filesInQueue = result.trim().split('\n').map(l => l.trim()); + const filesInQueue = result + .trim() + .split('\n') + .map((l) => l.trim()); expect(totalAdded).toBe(4); diff --git a/tests/unit/providers/getProvider.spec.ts b/tests/unit/providers/getProvider.spec.ts index f774d4a..7f1c356 100644 --- a/tests/unit/providers/getProvider.spec.ts +++ b/tests/unit/providers/getProvider.spec.ts @@ -6,11 +6,7 @@ import { getProvider, getProviderConfig } from '../../../src/providers/getProvid import { SERVICES, SERVICE_NAME } from '../../../src/common/constants'; import { NFSProvider } from '../../../src/providers/nfsProvider'; import { S3Provider } from '../../../src/providers/s3Provider'; -import { - configProviderMock, - jobManagerClientMock, - queueFileHandlerMock, -} from '../../helpers/mockCreator'; +import { configProviderMock, jobManagerClientMock, queueFileHandlerMock } from '../../helpers/mockCreator'; jest.mock('config', () => ({ get: jest.fn((key: string) => { @@ -32,7 +28,7 @@ describe('getProvider tests', () => { const tracer = trace.getTracer(SERVICE_NAME); - container.register(SERVICES.LOGGER, { useValue: jsLogger({ enabled: false })}); + container.register(SERVICES.LOGGER, { useValue: jsLogger({ enabled: false }) }); container.register(SERVICES.TRACER, { useValue: tracer }); container.register(SERVICES.QUEUE_FILE_HANDLER, { useValue: queueFileHandlerMock }); container.register(SERVICES.JOB_MANAGER_CLIENT, { useValue: jobManagerClientMock }); @@ -55,9 +51,7 @@ describe('getProvider tests', () => { describe('getProvider invalid', () => { it('should throw an AppError for an unknown provider', () => { - expect(() => getProvider('invalid', container)).toThrow( - 'Invalid config provider received: invalid - available values: "nfs" or "s3"' - ); + expect(() => getProvider('invalid', container)).toThrow('Invalid config provider received: invalid - available values: "nfs" or "s3"'); }); }); @@ -67,9 +61,7 @@ describe('getProvider tests', () => { throw new Error('config failure'); }); - expect(() => getProviderConfig('NFS')).toThrow( - 'Invalid config provider received: NFS. Consult documentation for available values' - ); + expect(() => getProviderConfig('NFS')).toThrow('Invalid config provider received: NFS. Consult documentation for available values'); }); }); }); From 4c322883b9a5653db5e0aada9067b37eaefb47e3 Mon Sep 17 00:00:00 2001 From: TULCHINSKI LIRAN Date: Sun, 17 May 2026 10:17:58 +0300 Subject: [PATCH 14/21] chore: remove unnecessary as type --- src/providers/baseProvider.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/providers/baseProvider.ts b/src/providers/baseProvider.ts index 3b67c0b..f546bc0 100644 --- a/src/providers/baseProvider.ts +++ b/src/providers/baseProvider.ts @@ -23,7 +23,7 @@ export abstract class BaseProvider implements Prov class: BaseProvider.name, }; - this.crawlingExtension = this.config.extension as string; + this.crawlingExtension = this.config.extension; } @withSpanAsyncV4 @@ -114,7 +114,7 @@ export abstract class BaseProvider implements Prov try { const fileContent = buffer.toString(); const json = JSON.parse(fileContent) as object; - const nestedJsonPath = this.config.nestedJsonPath as string; + const nestedJsonPath = this.config.nestedJsonPath; const results = jsonpath.query(json, nestedJsonPath) as string[]; const dirname = Path.dirname(currentPath); From b2d0541adbd96a077809aa1390cb1ac549cb6270 Mon Sep 17 00:00:00 2001 From: TULCHINSKI LIRAN Date: Sun, 17 May 2026 10:50:53 +0300 Subject: [PATCH 15/21] chore: remove duplicated configuraion and fix test --- config/custom-environment-variables.json | 14 +------------- helm/templates/_tplValues.tpl | 2 +- helm/values.yaml | 15 +++++---------- src/providers/getProvider.ts | 10 ++-------- tests/unit/providers/getProvider.spec.ts | 4 ++++ 5 files changed, 13 insertions(+), 32 deletions(-) diff --git a/config/custom-environment-variables.json b/config/custom-environment-variables.json index 0342b0f..7c5e03d 100644 --- a/config/custom-environment-variables.json +++ b/config/custom-environment-variables.json @@ -63,22 +63,10 @@ "maxAttempts": { "__name": "S3_SOURCE_MAX_ATTEMPTS", "__format": "number" - }, - "extension": "CRAWLING_EXTENSION", - "nestedJsonPath": "CRAWLING_NESTED_JSON_PATH", - "ignoreNotFound": { - "__name": "CRAWLING_IGNORE_NOT_FOUND", - "__format": "boolean" } }, "NFS": { - "pvPath": "PV_SOURCE_PATH", - "extension": "CRAWLING_EXTENSION", - "nestedJsonPath": "CRAWLING_NESTED_JSON_PATH", - "ignoreNotFound": { - "__name": "CRAWLING_IGNORE_NOT_FOUND", - "__format": "boolean" - } + "pvPath": "PV_SOURCE_PATH" }, "crawling": { "extension": "CRAWLING_EXTENSION", diff --git a/helm/templates/_tplValues.tpl b/helm/templates/_tplValues.tpl index 6a383fa..cd19d47 100644 --- a/helm/templates/_tplValues.tpl +++ b/helm/templates/_tplValues.tpl @@ -96,5 +96,5 @@ Custom definitions {{- end -}} {{- define "merged.crawling" -}} -{{- include "common.tplvalues.merge" ( dict "values" ( list .Values.crawling .Values.global.crawling ) "context" . ) }} +{{- include "common.tplvalues.merge" ( dict "value" .Values.crawling "context" . ) }} {{- end -}} diff --git a/helm/values.yaml b/helm/values.yaml index e58d3f5..312d159 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -37,11 +37,6 @@ global: jobType: '' taskType: '' - crawling: - extension: '.json' - nestedJsonPath: "$..['uri','url']" - ignoreNotFound: true - cloudProvider: dockerRegistryUrl: flavor: @@ -122,11 +117,6 @@ jobManager: jobType: taskType: -crawling: - extension: '.json' - nestedJsonPath: "$..['uri','url']" - ignoreNotFound: true - env: port: 80 targetPort: 8080 @@ -143,6 +133,11 @@ env: url: '' maxConcurrency: 5 + crawling: + extension: '.json' + nestedJsonPath: "$..['uri','url']" + ignoreNotFound: true + resources: enabled: true value: diff --git a/src/providers/getProvider.ts b/src/providers/getProvider.ts index 638110d..542a694 100644 --- a/src/providers/getProvider.ts +++ b/src/providers/getProvider.ts @@ -3,21 +3,15 @@ import httpStatus from 'http-status-codes'; import { DependencyContainer } from 'tsyringe'; import { AppError } from '../common/appError'; import { Provider, ProviderConfig } from '../common/interfaces'; -import { SERVICES } from '../common/constants'; import { NFSProvider } from './nfsProvider'; import { S3Provider } from './s3Provider'; function getProvider(provider: string, container: DependencyContainer): Provider { - const childContainer = container.createChildContainer(); - childContainer.register(SERVICES.PROVIDER_CONFIG, { - useFactory: () => getProviderConfig(provider), - }); - switch (provider.toLowerCase()) { case 'nfs': - return childContainer.resolve(NFSProvider); + return container.resolve(NFSProvider); case 's3': - return childContainer.resolve(S3Provider); + return container.resolve(S3Provider); default: throw new AppError(httpStatus.INTERNAL_SERVER_ERROR, `Invalid config provider received: ${provider} - available values: "nfs" or "s3"`, false); } diff --git a/tests/unit/providers/getProvider.spec.ts b/tests/unit/providers/getProvider.spec.ts index 7f1c356..1a13680 100644 --- a/tests/unit/providers/getProvider.spec.ts +++ b/tests/unit/providers/getProvider.spec.ts @@ -33,6 +33,10 @@ describe('getProvider tests', () => { container.register(SERVICES.QUEUE_FILE_HANDLER, { useValue: queueFileHandlerMock }); container.register(SERVICES.JOB_MANAGER_CLIENT, { useValue: jobManagerClientMock }); container.register(SERVICES.PROVIDER, { useValue: configProviderMock }); + + container.register(SERVICES.PROVIDER_CONFIG, { + useFactory: () => getProviderConfig('default_provider'), + }); }); describe('getProvider nfs', () => { From e2619b929a6befb728ea5962bafc9bc8b57d97f4 Mon Sep 17 00:00:00 2001 From: TULCHINSKI LIRAN Date: Mon, 18 May 2026 10:05:51 +0300 Subject: [PATCH 16/21] chore: remove crawling from tplValues --- helm/templates/_tplValues.tpl | 4 ---- helm/templates/configmap.yaml | 7 +++---- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/helm/templates/_tplValues.tpl b/helm/templates/_tplValues.tpl index cd19d47..846bcd9 100644 --- a/helm/templates/_tplValues.tpl +++ b/helm/templates/_tplValues.tpl @@ -94,7 +94,3 @@ Custom definitions {{- define "merged.jobManager" -}} {{- include "common.tplvalues.merge" ( dict "values" ( list .Values.jobManager .Values.global.jobManager ) "context" . ) }} {{- end -}} - -{{- define "merged.crawling" -}} -{{- include "common.tplvalues.merge" ( dict "value" .Values.crawling "context" . ) }} -{{- end -}} diff --git a/helm/templates/configmap.yaml b/helm/templates/configmap.yaml index 1911e8e..ee81463 100644 --- a/helm/templates/configmap.yaml +++ b/helm/templates/configmap.yaml @@ -3,7 +3,6 @@ {{- $tracing := (include "merged.tracing" . ) | fromYaml }} {{- $metrics := (include "merged.metrics" . ) | fromYaml }} {{- $jobManager := (include "merged.jobManager" . ) | fromYaml }} -{{- $crawling := (include "merged.crawling" . ) | fromYaml }} {{- $provider := include "provider" . -}} apiVersion: v1 kind: ConfigMap @@ -50,6 +49,6 @@ data: INGESTION_TASK_BATCHES: {{ $jobManager.ingestion.batches | quote}} JOB_DELETE_TYPE: {{ $jobManager.delete.jobType | quote }} TASK_DELETE_TYPE: {{ $jobManager.delete.taskType | quote }} - CRAWLING_EXTENSION: {{ $crawling.extension | quote }} - CRAWLING_NESTED_JSON_PATH: {{ $crawling.nestedJsonPath | quote }} - CRAWLING_IGNORE_NOT_FOUND: {{ $crawling.ignoreNotFound | quote }} + CRAWLING_EXTENSION: {{ .Values.env.crawling.extension | quote }} + CRAWLING_NESTED_JSON_PATH: {{ .Values.env.crawling.nestedJsonPath | quote }} + CRAWLING_IGNORE_NOT_FOUND: {{ .Values.env.crawling.ignoreNotFound | quote }} From 8c3848054e2956d7d1b326dc595307d6d6228c28 Mon Sep 17 00:00:00 2001 From: TULCHINSKI LIRAN Date: Mon, 18 May 2026 12:03:06 +0300 Subject: [PATCH 17/21] refactor: added crawling configuration into the providers through the code --- src/common/interfaces.ts | 4 ++-- src/providers/baseProvider.ts | 5 +++-- src/providers/getProvider.ts | 11 ++++++++++- tests/integration/providers/getProvider.spec.ts | 15 ++++++++++----- tests/unit/providers/getProvider.spec.ts | 8 +++++--- 5 files changed, 30 insertions(+), 13 deletions(-) diff --git a/src/common/interfaces.ts b/src/common/interfaces.ts index c78d1c5..268aec1 100644 --- a/src/common/interfaces.ts +++ b/src/common/interfaces.ts @@ -72,8 +72,8 @@ export interface NFSConfig extends BaseProviderConfig { } export interface BaseProviderConfig { - extension: string; - nestedJsonPath: string; + extension?: string; + nestedJsonPath?: string; ignoreNotFound?: boolean; } diff --git a/src/providers/baseProvider.ts b/src/providers/baseProvider.ts index f546bc0..86d5231 100644 --- a/src/providers/baseProvider.ts +++ b/src/providers/baseProvider.ts @@ -23,7 +23,8 @@ export abstract class BaseProvider implements Prov class: BaseProvider.name, }; - this.crawlingExtension = this.config.extension; + const extension = (this.config.extension as string) || '.json'; + this.crawlingExtension = extension.startsWith('.') ? extension : `.${extension}`; } @withSpanAsyncV4 @@ -114,7 +115,7 @@ export abstract class BaseProvider implements Prov try { const fileContent = buffer.toString(); const json = JSON.parse(fileContent) as object; - const nestedJsonPath = this.config.nestedJsonPath; + const nestedJsonPath = (this.config.nestedJsonPath as string) || "$..['uri','url']"; const results = jsonpath.query(json, nestedJsonPath) as string[]; const dirname = Path.dirname(currentPath); diff --git a/src/providers/getProvider.ts b/src/providers/getProvider.ts index 542a694..43fae73 100644 --- a/src/providers/getProvider.ts +++ b/src/providers/getProvider.ts @@ -19,7 +19,16 @@ function getProvider(provider: string, container: DependencyContainer): Provider function getProviderConfig(provider: string): ProviderConfig { try { - return config.get(provider); + // eslint-disable-next-line @typescript-eslint/no-unnecessary-type-assertion + const providerConfig = config.get(provider) as ProviderConfig; + + try { + // eslint-disable-next-line @typescript-eslint/no-unnecessary-type-assertion + const crawlingConfig = config.get('crawling') as Record; + return { ...providerConfig, ...crawlingConfig } as ProviderConfig; + } catch (err) { + return providerConfig; + } } catch (err) { throw new AppError( httpStatus.INTERNAL_SERVER_ERROR, diff --git a/tests/integration/providers/getProvider.spec.ts b/tests/integration/providers/getProvider.spec.ts index d0a6f0b..a39d138 100644 --- a/tests/integration/providers/getProvider.spec.ts +++ b/tests/integration/providers/getProvider.spec.ts @@ -1,22 +1,27 @@ import config from 'config'; import { container } from 'tsyringe'; import { AppError } from '../../../src/common/appError'; -import { NFSConfig, S3Config } from '../../../src/common/interfaces'; import { getProvider, getProviderConfig } from '../../../src/providers/getProvider'; describe('getProviderConfig tests', () => { - it('should return the NFS config when the provider is NFS', () => { + it('should return the NFS config merged with crawling config when the provider is NFS', () => { const provider = 'NFS'; - const expected = config.get('NFS'); + /* eslint-disable @typescript-eslint/no-unnecessary-type-assertion */ + const nfsConfig = config.get('NFS') as Record; + const crawlingConfig = config.get('crawling') as Record; + const expected = { ...nfsConfig, ...crawlingConfig }; const response = getProviderConfig(provider); expect(response).toStrictEqual(expected); }); - it('should return the S3 config when the provider is S3', () => { + it('should return the S3 config merged with crawling config when the provider is S3', () => { const provider = 'S3'; - const expected = config.get('S3'); + const s3Config = config.get('S3') as Record; + const crawlingConfig = config.get('crawling') as Record; + /* eslint-enable @typescript-eslint/no-unnecessary-type-assertion */ + const expected = { ...s3Config, ...crawlingConfig }; const response = getProviderConfig(provider); diff --git a/tests/unit/providers/getProvider.spec.ts b/tests/unit/providers/getProvider.spec.ts index 1a13680..bfc7df0 100644 --- a/tests/unit/providers/getProvider.spec.ts +++ b/tests/unit/providers/getProvider.spec.ts @@ -12,9 +12,11 @@ jest.mock('config', () => ({ get: jest.fn((key: string) => { switch (key) { case 'NFS': - return { pvPath: '/tmp', extension: '.json', nestedJsonPath: "$..['uri','url']" }; + return { pvPath: '/tmp' }; case 'S3': - return { bucket: 'test-bucket', extension: '.json', nestedJsonPath: "$..['uri','url']" }; + return { bucket: 'test-bucket' }; + case 'crawling': + return { extension: '.json', nestedJsonPath: "$..['uri','url']", ignoreNotFound: true }; default: return {}; } @@ -35,7 +37,7 @@ describe('getProvider tests', () => { container.register(SERVICES.PROVIDER, { useValue: configProviderMock }); container.register(SERVICES.PROVIDER_CONFIG, { - useFactory: () => getProviderConfig('default_provider'), + useFactory: () => getProviderConfig('NFS'), }); }); From 9502ba6dd161b4cf72aa76132811de6c3dffa693 Mon Sep 17 00:00:00 2001 From: asafMasa Date: Mon, 18 May 2026 17:32:37 +0300 Subject: [PATCH 18/21] refactor: change interface nullable --- src/common/interfaces.ts | 6 +++--- src/providers/baseProvider.ts | 6 +++--- src/providers/getProvider.ts | 15 +++++---------- 3 files changed, 11 insertions(+), 16 deletions(-) diff --git a/src/common/interfaces.ts b/src/common/interfaces.ts index 268aec1..2b3033f 100644 --- a/src/common/interfaces.ts +++ b/src/common/interfaces.ts @@ -72,9 +72,9 @@ export interface NFSConfig extends BaseProviderConfig { } export interface BaseProviderConfig { - extension?: string; - nestedJsonPath?: string; - ignoreNotFound?: boolean; + extension: string; + nestedJsonPath: string; + ignoreNotFound: boolean; } export type ProviderConfig = S3Config | NFSConfig; diff --git a/src/providers/baseProvider.ts b/src/providers/baseProvider.ts index 86d5231..27b6ab0 100644 --- a/src/providers/baseProvider.ts +++ b/src/providers/baseProvider.ts @@ -23,7 +23,7 @@ export abstract class BaseProvider implements Prov class: BaseProvider.name, }; - const extension = (this.config.extension as string) || '.json'; + const extension = this.config.extension; this.crawlingExtension = extension.startsWith('.') ? extension : `.${extension}`; } @@ -80,7 +80,7 @@ export abstract class BaseProvider implements Prov } } } catch (err) { - const ignoreNotFound = this.config.ignoreNotFound === true; + const ignoreNotFound = this.config.ignoreNotFound; if (ignoreNotFound && err instanceof AppError && err.status === StatusCodes.NOT_FOUND) { this.logger.warn({ msg: 'File not found, skipping...', logContext, path: currentPath, modelName }); @@ -115,7 +115,7 @@ export abstract class BaseProvider implements Prov try { const fileContent = buffer.toString(); const json = JSON.parse(fileContent) as object; - const nestedJsonPath = (this.config.nestedJsonPath as string) || "$..['uri','url']"; + const nestedJsonPath = this.config.nestedJsonPath; const results = jsonpath.query(json, nestedJsonPath) as string[]; const dirname = Path.dirname(currentPath); diff --git a/src/providers/getProvider.ts b/src/providers/getProvider.ts index 43fae73..f7a6625 100644 --- a/src/providers/getProvider.ts +++ b/src/providers/getProvider.ts @@ -2,7 +2,7 @@ import config from 'config'; import httpStatus from 'http-status-codes'; import { DependencyContainer } from 'tsyringe'; import { AppError } from '../common/appError'; -import { Provider, ProviderConfig } from '../common/interfaces'; +import { BaseProviderConfig, Provider, ProviderConfig } from '../common/interfaces'; import { NFSProvider } from './nfsProvider'; import { S3Provider } from './s3Provider'; @@ -19,16 +19,11 @@ function getProvider(provider: string, container: DependencyContainer): Provider function getProviderConfig(provider: string): ProviderConfig { try { - // eslint-disable-next-line @typescript-eslint/no-unnecessary-type-assertion - const providerConfig = config.get(provider) as ProviderConfig; + const providerConfig: ProviderConfig = config.get(provider); + const crawlingConfig: BaseProviderConfig = config.get('crawling'); + const fullConfig = { ...providerConfig, ...crawlingConfig }; - try { - // eslint-disable-next-line @typescript-eslint/no-unnecessary-type-assertion - const crawlingConfig = config.get('crawling') as Record; - return { ...providerConfig, ...crawlingConfig } as ProviderConfig; - } catch (err) { - return providerConfig; - } + return fullConfig; } catch (err) { throw new AppError( httpStatus.INTERNAL_SERVER_ERROR, From 64ebaaf9a6dc4a3bd12b19f21a580791944d9f51 Mon Sep 17 00:00:00 2001 From: TULCHINSKI LIRAN Date: Mon, 18 May 2026 18:35:59 +0300 Subject: [PATCH 19/21] fix: fixed pathToTileset to reach to tileset.json --- src/providers/baseProvider.ts | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/providers/baseProvider.ts b/src/providers/baseProvider.ts index 27b6ab0..1d7df1e 100644 --- a/src/providers/baseProvider.ts +++ b/src/providers/baseProvider.ts @@ -31,16 +31,23 @@ export abstract class BaseProvider implements Prov public async streamModelPathsToQueueFile(modelId: string, pathToTileset: string, modelName: string): Promise { const logContext = { ...this.logContext, function: this.streamModelPathsToQueueFile.name }; + let initialPath = pathToTileset; + if (!initialPath.endsWith(this.crawlingExtension)) { + initialPath = Path.join(initialPath, `tileset${this.crawlingExtension}`); + + initialPath = initialPath.replace(/\\/g, '/').replace(/^\//, ''); + } + this.logger.info({ msg: 'Started streaming model paths to queue file', logContext, modelName, modelId, - pathToTileset, + pathToTileset: initialPath, }); const visitedFiles = new Set(); - const processingQueue: string[] = [pathToTileset]; + const processingQueue: string[] = [initialPath]; let totalFilesAdded = 0; while (processingQueue.length > 0) { From ee5ad2a31e0849b47936821b75235baa7de4b2e8 Mon Sep 17 00:00:00 2001 From: TULCHINSKI LIRAN Date: Tue, 19 May 2026 11:25:28 +0300 Subject: [PATCH 20/21] chore: handling empty crawling extenstion and nestedJsonPath, even though not possible --- src/providers/baseProvider.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/providers/baseProvider.ts b/src/providers/baseProvider.ts index 1d7df1e..45f9f87 100644 --- a/src/providers/baseProvider.ts +++ b/src/providers/baseProvider.ts @@ -23,7 +23,8 @@ export abstract class BaseProvider implements Prov class: BaseProvider.name, }; - const extension = this.config.extension; + // eslint-disable-next-line @typescript-eslint/no-unnecessary-type-assertion + const extension = this.config.extension || '.json'; this.crawlingExtension = extension.startsWith('.') ? extension : `.${extension}`; } @@ -122,7 +123,8 @@ export abstract class BaseProvider implements Prov try { const fileContent = buffer.toString(); const json = JSON.parse(fileContent) as object; - const nestedJsonPath = this.config.nestedJsonPath; + // eslint-disable-next-line @typescript-eslint/no-unnecessary-type-assertion + const nestedJsonPath = this.config.nestedJsonPath || "$..['uri','url']"; const results = jsonpath.query(json, nestedJsonPath) as string[]; const dirname = Path.dirname(currentPath); From 12b421b9312f4426ae554c44da2fe36bb826cdf0 Mon Sep 17 00:00:00 2001 From: TULCHINSKI LIRAN Date: Tue, 19 May 2026 12:44:34 +0300 Subject: [PATCH 21/21] chore: revert defualt and update test --- src/providers/baseProvider.ts | 10 +++------- tests/integration/providers/nfsProvider.spec.ts | 6 ++++-- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/src/providers/baseProvider.ts b/src/providers/baseProvider.ts index 45f9f87..911e10d 100644 --- a/src/providers/baseProvider.ts +++ b/src/providers/baseProvider.ts @@ -23,8 +23,7 @@ export abstract class BaseProvider implements Prov class: BaseProvider.name, }; - // eslint-disable-next-line @typescript-eslint/no-unnecessary-type-assertion - const extension = this.config.extension || '.json'; + const extension = this.config.extension; this.crawlingExtension = extension.startsWith('.') ? extension : `.${extension}`; } @@ -88,9 +87,7 @@ export abstract class BaseProvider implements Prov } } } catch (err) { - const ignoreNotFound = this.config.ignoreNotFound; - - if (ignoreNotFound && err instanceof AppError && err.status === StatusCodes.NOT_FOUND) { + if (this.config.ignoreNotFound && err instanceof AppError && err.status === StatusCodes.NOT_FOUND) { this.logger.warn({ msg: 'File not found, skipping...', logContext, path: currentPath, modelName }); continue; } @@ -123,8 +120,7 @@ export abstract class BaseProvider implements Prov try { const fileContent = buffer.toString(); const json = JSON.parse(fileContent) as object; - // eslint-disable-next-line @typescript-eslint/no-unnecessary-type-assertion - const nestedJsonPath = this.config.nestedJsonPath || "$..['uri','url']"; + const nestedJsonPath = this.config.nestedJsonPath; const results = jsonpath.query(json, nestedJsonPath) as string[]; const dirname = Path.dirname(currentPath); diff --git a/tests/integration/providers/nfsProvider.spec.ts b/tests/integration/providers/nfsProvider.spec.ts index c19bd65..29ba143 100644 --- a/tests/integration/providers/nfsProvider.spec.ts +++ b/tests/integration/providers/nfsProvider.spec.ts @@ -9,7 +9,7 @@ import { faker } from '@faker-js/faker'; import { getApp } from '../../../src/app'; import { NFSProvider } from '../../../src/providers/nfsProvider'; import { SERVICES } from '../../../src/common/constants'; -import { NFSConfig } from '../../../src/common/interfaces'; +import { BaseProviderConfig, NFSConfig } from '../../../src/common/interfaces'; import { AppError } from '../../../src/common/appError'; import { createFile, queueFileHandlerMock } from '../../helpers/mockCreator'; import { QueueFileHandler } from '../../../src/handlers/queueFileHandler'; @@ -19,7 +19,7 @@ describe('NFSProvider tests', () => { let provider: NFSProvider; let queueFileHandler: QueueFileHandler; const queueFilePath = os.tmpdir(); - const nfsConfig = config.get('NFS'); + const nfsConfig = { ...config.get('NFS'), ...config.get('crawling') }; let nfsHelper: NFSHelper; beforeAll(() => { @@ -94,6 +94,8 @@ describe('NFSProvider tests', () => { const modelName = faker.word.sample(); const modelId = faker.string.uuid(); + (provider as unknown as { config: BaseProviderConfig }).config.ignoreNotFound = false; + const result = async () => { await provider.streamModelPathsToQueueFile(modelId, pathToTileset, modelName); };