Skip to content

Commit 3712e70

Browse files
fix: prevent heap overflow in large monorepo scans
Add streaming-based filtering to globWithGitIgnore to prevent heap overflow when scanning large monorepos with 100k+ files. Instead of accumulating all file paths and filtering afterwards, files are now filtered during streaming which dramatically reduces memory usage. Changes: - Add `filter` option to globWithGitIgnore for early filtering during streaming - Add createSupportedFilesFilter helper to create filter from supported files - Update getPackageFilesForScan to use streaming filter Fixes SMO-522 Ported from v1.x commit 9bbb8e8 ([SMO-522] Fix heap overflow in large monorepo scans #1026) Co-authored-by: Mikola Lysenko <mikolalysenko@gmail.com>
1 parent d69e195 commit 3712e70

File tree

3 files changed

+46
-14
lines changed

3 files changed

+46
-14
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
99
### Changed
1010
- Updated Coana CLI to v14.12.148.
1111

12+
### Fixed
13+
- Prevent heap overflow in large monorepo scans by using streaming-based filtering to avoid accumulating all file paths in memory before filtering.
14+
1215
## [2.1.0](https://github.com/SocketDev/socket-cli/releases/tag/v2.1.0) - 2025-11-02
1316

1417
### Added

packages/cli/src/utils/fs/glob.mts

Lines changed: 35 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,14 @@ export function filterBySupportedScanFiles(
165165
return filepaths.filter(p => micromatch.some(p, patterns, { dot: true }))
166166
}
167167

168+
export function createSupportedFilesFilter(
169+
supportedFiles: SocketSdkSuccessResult<'getReportSupportedFiles'>['data'],
170+
): (filepath: string) => boolean {
171+
const patterns = getSupportedFilePatterns(supportedFiles)
172+
return (filepath: string) =>
173+
micromatch.some(filepath, patterns, { dot: true })
174+
}
175+
168176
export function getSupportedFilePatterns(
169177
supportedFiles: SocketSdkSuccessResult<'getReportSupportedFiles'>['data'],
170178
): string[] {
@@ -179,6 +187,10 @@ export function getSupportedFilePatterns(
179187
}
180188

181189
type GlobWithGitIgnoreOptions = GlobOptions & {
190+
// Optional filter function to apply during streaming.
191+
// When provided, only files passing this filter are accumulated.
192+
// This is critical for memory efficiency when scanning large monorepos.
193+
filter?: ((filepath: string) => boolean) | undefined
182194
socketConfig?: SocketYml | undefined
183195
}
184196

@@ -188,6 +200,7 @@ export async function globWithGitIgnore(
188200
): Promise<string[]> {
189201
const {
190202
cwd = process.cwd(),
203+
filter,
191204
socketConfig,
192205
...additionalOptions
193206
} = { __proto__: null, ...options } as GlobWithGitIgnoreOptions
@@ -244,27 +257,39 @@ export async function globWithGitIgnore(
244257
...additionalOptions,
245258
} as GlobOptions
246259

247-
if (!hasNegatedPattern) {
260+
// When no filter is provided and no negated patterns exist, use the fast path.
261+
if (!hasNegatedPattern && !filter) {
248262
return await fastGlob.glob(patterns as string[], globOptions)
249263
}
250-
251264
// Add support for negated "ignore" patterns which many globbing libraries,
252265
// including 'fast-glob', 'globby', and 'tinyglobby', lack support for.
253-
const filtered: string[] = []
254-
const ig = ignore().add([...ignores])
266+
// Use streaming to avoid unbounded memory accumulation.
267+
// This is critical for large monorepos with 100k+ files.
268+
const results: string[] = []
269+
const ig = hasNegatedPattern ? ignore().add([...ignores]) : null
255270
const stream = fastGlob.globStream(
256271
patterns as string[],
257272
globOptions,
258273
) as AsyncIterable<string>
259274
for await (const p of stream) {
260-
// Note: the input files must be INSIDE the cwd. If you get strange looking
261-
// relative path errors here, most likely your path is outside the given cwd.
262-
const relPath = globOptions.absolute ? path.relative(cwd, p) : p
263-
if (!ig.ignores(relPath)) {
264-
filtered.push(p)
275+
// Check gitignore patterns with negation support.
276+
if (ig) {
277+
// Note: the input files must be INSIDE the cwd. If you get strange looking
278+
// relative path errors here, most likely your path is outside the given cwd.
279+
const relPath = globOptions.absolute ? path.relative(cwd, p) : p
280+
if (ig.ignores(relPath)) {
281+
continue
282+
}
283+
}
284+
// Apply the optional filter to reduce memory usage.
285+
// When scanning large monorepos, this filters early (e.g., to manifest files only)
286+
// instead of accumulating all 100k+ files and filtering later.
287+
if (filter && !filter(p)) {
288+
continue
265289
}
290+
results.push(p)
266291
}
267-
return filtered
292+
return results
268293
}
269294

270295
export async function globWorkspace(

packages/cli/src/utils/fs/path-resolve.mts

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ import { WIN32 } from '@socketsecurity/lib/constants/platform'
77
import { isDirSync } from '@socketsecurity/lib/fs'
88

99
import {
10-
filterBySupportedScanFiles,
10+
createSupportedFilesFilter,
1111
globWithGitIgnore,
1212
pathsToGlobPatterns,
1313
} from './glob.mts'
@@ -127,13 +127,17 @@ export async function getPackageFilesForScan(
127127
...options,
128128
} as PackageFilesForScanOptions
129129

130-
const filepaths = await globWithGitIgnore(
130+
// Apply the supported files filter during streaming to avoid accumulating
131+
// all files in memory. This is critical for large monorepos with 100k+ files
132+
// where accumulating all paths before filtering causes OOM errors.
133+
const filter = createSupportedFilesFilter(supportedFiles)
134+
135+
return await globWithGitIgnore(
131136
pathsToGlobPatterns(inputPaths, options?.cwd),
132137
{
133138
cwd,
139+
filter,
134140
socketConfig,
135141
},
136142
)
137-
138-
return filterBySupportedScanFiles(filepaths!, supportedFiles)
139143
}

0 commit comments

Comments
 (0)