Skip to content

Commit 57aafed

Browse files
committed
feat: use tree-sitter to parse requirements.txt more correctly
1 parent aba4457 commit 57aafed

12 files changed

Lines changed: 554 additions & 300 deletions

File tree

package-lock.json

Lines changed: 300 additions & 114 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@
5353
"https-proxy-agent": "^7.0.6",
5454
"node-fetch": "^3.3.2",
5555
"packageurl-js": "~1.0.2",
56+
"tree-sitter-requirements": "github:Strum355/tree-sitter-requirements",
57+
"web-tree-sitter": "^0.26.6",
5658
"yargs": "^18.0.0"
5759
},
5860
"devDependencies": {
@@ -64,6 +66,7 @@
6466
"c8": "^11.0.0",
6567
"chai": "^4.3.7",
6668
"eslint": "^8.42.0",
69+
"eslint-import-resolver-typescript": "^4.4.4",
6770
"eslint-plugin-editorconfig": "^4.0.3",
6871
"eslint-plugin-import": "^2.29.1",
6972
"esmock": "^2.6.2",

src/analysis.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ function addProxyAgent(options, opts) {
4141
async function requestStack(provider, manifest, url, html = false, opts = {}) {
4242
opts["source-manifest"] = Buffer.from(fs.readFileSync(manifest).toString()).toString('base64')
4343
opts["manifest-type"] = path.parse(manifest).base
44-
let provided = provider.provideStack(manifest, opts) // throws error if content providing failed
44+
let provided = await provider.provideStack(manifest, opts) // throws error if content providing failed
4545
opts["source-manifest"] = ""
4646
opts[rhdaOperationTypeHeader.toUpperCase().replaceAll("-", "_")] = "stack-analysis"
4747
let startTime = new Date()
@@ -105,7 +105,7 @@ async function requestStack(provider, manifest, url, html = false, opts = {}) {
105105
async function requestComponent(provider, manifest, url, opts = {}) {
106106
opts["source-manifest"] = Buffer.from(fs.readFileSync(manifest).toString()).toString('base64')
107107

108-
let provided = provider.provideComponent(manifest, opts) // throws error if content providing failed
108+
let provided = await provider.provideComponent(manifest, opts) // throws error if content providing failed
109109
opts["source-manifest"] = ""
110110
opts[rhdaOperationTypeHeader.toUpperCase().replaceAll("-", "_")] = "component-analysis"
111111
if (process.env["TRUSTIFY_DA_DEBUG"] === "true") {

src/provider.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import Javascript_yarn from './providers/javascript_yarn.js';
1010
import pythonPipProvider from './providers/python_pip.js'
1111

1212
/** @typedef {{ecosystem: string, contentType: string, content: string}} Provided */
13-
/** @typedef {{isSupported: function(string): boolean, validateLockFile: function(string): void, provideComponent: function(string, {}): Provided, provideStack: function(string, {}): Provided}} Provider */
13+
/** @typedef {{isSupported: function(string): boolean, validateLockFile: function(string): void, provideComponent: function(string, {}): Provided | Promise<Provided>, provideStack: function(string, {}): Provided | Promise<Provided>}} Provider */
1414

1515
/**
1616
* MUST include all providers here.

src/providers/python_controller.js

Lines changed: 66 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
import fs from "node:fs";
22
import path from 'node:path';
3-
import os, {EOL} from "os";
3+
import os, { EOL } from "os";
44

5-
import {environmentVariableIsPopulated,getCustom, invokeCommand} from "../tools.js";
5+
import { environmentVariableIsPopulated, getCustom, invokeCommand } from "../tools.js";
6+
7+
import { getParser, getRequirementQuery, getPinnedVersionQuery } from './requirements_parser.js';
68

79
function getPipFreezeOutput() {
810
try {
@@ -23,13 +25,15 @@ function getPipShowOutput(depNames) {
2325
/** @typedef {{name: string, version: string, dependencies: DependencyEntry[]}} DependencyEntry */
2426

2527
export default class Python_controller {
26-
2728
pythonEnvDir
2829
pathToPipBin
2930
pathToPythonBin
3031
realEnvironment
3132
pathToRequirements
3233
options
34+
parser
35+
requirementsQuery
36+
pinnedVersionQuery
3337

3438
/**
3539
* Constructor to create new python controller instance to interact with pip package manager
@@ -39,14 +43,18 @@ export default class Python_controller {
3943
* @param {string} pathToRequirements
4044
* @
4145
*/
42-
constructor(realEnvironment,pathToPip,pathToPython,pathToRequirements,options={}) {
46+
constructor(realEnvironment, pathToPip, pathToPython, pathToRequirements, options={}) {
4347
this.pathToPythonBin = pathToPython
4448
this.pathToPipBin = pathToPip
4549
this.realEnvironment= realEnvironment
4650
this.prepareEnvironment()
4751
this.pathToRequirements = pathToRequirements
4852
this.options = options
53+
this.parser = getParser()
54+
this.requirementsQuery = getRequirementQuery()
55+
this.pinnedVersionQuery = getPinnedVersionQuery()
4956
}
57+
5058
prepareEnvironment() {
5159
if(!this.realEnvironment) {
5260
this.pythonEnvDir = path.join(path.sep, "tmp", "trustify_da_env_js")
@@ -87,6 +95,24 @@ export default class Python_controller {
8795
}
8896
}
8997

98+
/**
99+
* Parse the requirements.txt file using tree-sitter and return structured requirement data.
100+
* @return {Promise<{name: string, version: string|null}[]>}
101+
*/
102+
async #parseRequirements() {
103+
const content = fs.readFileSync(this.pathToRequirements).toString();
104+
const tree = (await this.parser).parse(content);
105+
return Promise.all((await this.requirementsQuery).matches(tree.rootNode).map(async (match) => {
106+
const reqNode = match.captures.find(c => c.name === 'req').node;
107+
const name = match.captures.find(c => c.name === 'name').node.text;
108+
const versionMatches = (await this.pinnedVersionQuery).matches(reqNode);
109+
const version = versionMatches.length > 0
110+
? versionMatches[0].captures.find(c => c.name === 'version').node.text
111+
: null;
112+
return { name, version };
113+
}));
114+
}
115+
90116
#decideIfWindowsOrLinuxPath(fileName) {
91117
if (os.platform() === "win32") {
92118
return fileName + ".exe"
@@ -97,9 +123,9 @@ export default class Python_controller {
97123
/**
98124
*
99125
* @param {boolean} includeTransitive - whether to return include in returned object transitive dependencies or not
100-
* @return {[DependencyEntry]}
126+
* @return {Promise<[DependencyEntry]>}
101127
*/
102-
getDependencies(includeTransitive) {
128+
async getDependencies(includeTransitive) {
103129
let startingTime
104130
let endingTime
105131
if (process.env["TRUSTIFY_DA_DEBUG"] === "true") {
@@ -123,10 +149,10 @@ export default class Python_controller {
123149
if(matchManifestVersions === "true") {
124150
throw new Error("Conflicting settings, TRUSTIFY_DA_PYTHON_INSTALL_BEST_EFFORTS=true can only work with MATCH_MANIFEST_VERSIONS=false")
125151
}
126-
this.#installingRequirementsOneByOne()
152+
await this.#installingRequirementsOneByOne()
127153
}
128154
}
129-
let dependencies = this.#getDependenciesImpl(includeTransitive)
155+
let dependencies = await this.#getDependenciesImpl(includeTransitive)
130156
this.#cleanEnvironment()
131157
if (process.env["TRUSTIFY_DA_DEBUG"] === "true") {
132158
endingTime = new Date()
@@ -137,15 +163,13 @@ export default class Python_controller {
137163
return dependencies
138164
}
139165

140-
#installingRequirementsOneByOne() {
141-
let requirementsContent = fs.readFileSync(this.pathToRequirements);
142-
let requirementsRows = requirementsContent.toString().split(EOL);
143-
requirementsRows.filter((line) => !line.trim().startsWith("#")).filter((line) => line.trim() !== "").forEach( (dependency) => {
144-
let dependencyName = getDependencyName(dependency);
166+
async #installingRequirementsOneByOne() {
167+
const requirements = await this.#parseRequirements();
168+
requirements.forEach(({name}) => {
145169
try {
146-
invokeCommand(this.pathToPipBin, ['install', dependencyName])
170+
invokeCommand(this.pathToPipBin, ['install', name])
147171
} catch (error) {
148-
throw new Error(`Failed in best-effort installing ${dependencyName} in virtual python environment`, {cause: error})
172+
throw new Error(`Failed in best-effort installing ${name} in virtual python environment`, {cause: error})
149173
}
150174
})
151175
}
@@ -162,44 +186,33 @@ export default class Python_controller {
162186
}
163187
}
164188

165-
#getDependenciesImpl(includeTransitive) {
166-
let dependencies = new Array()
189+
async #getDependenciesImpl(includeTransitive) {
190+
let dependencies = []
167191
let usePipDepTree = getCustom("TRUSTIFY_DA_PIP_USE_DEP_TREE","false",this.options);
168-
let freezeOutput
169-
let lines
170-
let depNames
171-
let pipShowOutput
172192
let allPipShowDeps
173193
let pipDepTreeJsonArrayOutput
174194
if(usePipDepTree !== "true") {
175-
freezeOutput = getPipFreezeOutput.call(this);
176-
lines = freezeOutput.split(EOL)
177-
depNames = lines.map( line => getDependencyName(line))
178-
}
179-
else {
180-
pipDepTreeJsonArrayOutput = getDependencyTreeJsonFromPipDepTree(this.pathToPipBin,this.pathToPythonBin)
181-
}
182-
183-
184-
if(usePipDepTree !== "true") {
185-
pipShowOutput = getPipShowOutput.call(this, depNames);
195+
const freezeOutput = getPipFreezeOutput.call(this);
196+
const lines = freezeOutput.split(EOL)
197+
const depNames = lines.map( line => getDependencyName(line))
198+
const pipShowOutput = getPipShowOutput.call(this, depNames);
186199
allPipShowDeps = pipShowOutput.split( EOL + "---" + EOL);
200+
} else {
201+
pipDepTreeJsonArrayOutput = getDependencyTreeJsonFromPipDepTree(this.pathToPipBin,this.pathToPythonBin)
187202
}
188-
//debug
189-
// pipShowOutput = "alternative pip show output goes here for debugging"
190203

191204
let matchManifestVersions = getCustom("MATCH_MANIFEST_VERSIONS","true",this.options);
192-
let linesOfRequirements = fs.readFileSync(this.pathToRequirements).toString().split(EOL).filter( (line) => !line.trim().startsWith("#")).map(line => line.trim())
205+
let parsedRequirements = await this.#parseRequirements()
193206
let CachedEnvironmentDeps = {}
194207
if(usePipDepTree !== "true") {
195-
allPipShowDeps.forEach((record) => {
208+
allPipShowDeps.forEach(record => {
196209
let dependencyName = getDependencyNameShow(record).toLowerCase()
197210
CachedEnvironmentDeps[dependencyName] = record
198211
CachedEnvironmentDeps[dependencyName.replace("-", "_")] = record
199212
CachedEnvironmentDeps[dependencyName.replace("_", "-")] = record
200213
})
201214
} else {
202-
pipDepTreeJsonArrayOutput.forEach( depTreeEntry => {
215+
pipDepTreeJsonArrayOutput.forEach(depTreeEntry => {
203216
let packageName = depTreeEntry["package"]["package_name"].toLowerCase()
204217
let pipDepTreeEntryForCache = {
205218
name: packageName,
@@ -211,41 +224,25 @@ export default class Python_controller {
211224
CachedEnvironmentDeps[packageName.replace("_", "-")] = pipDepTreeEntryForCache
212225
})
213226
}
214-
linesOfRequirements.forEach( (dep) => {
215-
// if matchManifestVersions setting is turned on , then
216-
if(matchManifestVersions === "true") {
217-
let dependencyName
218-
let manifestVersion
227+
parsedRequirements.forEach(({ name: depName, version: manifestVersion }) => {
228+
if(matchManifestVersions === "true" && manifestVersion != null) {
219229
let installedVersion
220-
let doubleEqualSignPosition
221-
if(dep.includes("==")) {
222-
doubleEqualSignPosition = dep.indexOf("==")
223-
manifestVersion = dep.substring(doubleEqualSignPosition + 2).trim()
224-
if(manifestVersion.includes("#")) {
225-
let hashCharIndex = manifestVersion.indexOf("#");
226-
manifestVersion = manifestVersion.substring(0,hashCharIndex)
230+
if(CachedEnvironmentDeps[depName.toLowerCase()] !== undefined) {
231+
if(usePipDepTree !== "true") {
232+
installedVersion = getDependencyVersion(CachedEnvironmentDeps[depName.toLowerCase()])
233+
} else {
234+
installedVersion = CachedEnvironmentDeps[depName.toLowerCase()].version
227235
}
228-
dependencyName = getDependencyName(dep)
229-
// only compare between declared version in manifest to installed version , if the package is installed.
230-
if(CachedEnvironmentDeps[dependencyName.toLowerCase()] !== undefined) {
231-
if(usePipDepTree !== "true") {
232-
installedVersion = getDependencyVersion(CachedEnvironmentDeps[dependencyName.toLowerCase()])
233-
} else {
234-
installedVersion = CachedEnvironmentDeps[dependencyName.toLowerCase()].version
235-
}
236-
}
237-
if(installedVersion) {
238-
if (manifestVersion.trim() !== installedVersion.trim()) {
239-
throw new Error(`Can't continue with analysis - versions mismatch for dependency name ${dependencyName} (manifest version=${manifestVersion}, installed version=${installedVersion}).If you want to allow version mismatch for analysis between installed and requested packages, set environment variable/setting MATCH_MANIFEST_VERSIONS=false`)
240-
}
236+
}
237+
if(installedVersion) {
238+
if (manifestVersion.trim() !== installedVersion.trim()) {
239+
throw new Error(`Can't continue with analysis - versions mismatch for dependency name ${depName} (manifest version=${manifestVersion}, installed version=${installedVersion}).If you want to allow version mismatch for analysis between installed and requested packages, set environment variable/setting MATCH_MANIFEST_VERSIONS=false`)
241240
}
242241
}
243242
}
244-
let path = new Array()
245-
let depName = getDependencyName(dep)
246-
//array to track a path for each branch in the dependency tree
243+
let path = []
247244
path.push(depName.toLowerCase())
248-
bringAllDependencies(dependencies,depName,CachedEnvironmentDeps,includeTransitive,path,usePipDepTree)
245+
bringAllDependencies(dependencies, depName, CachedEnvironmentDeps, includeTransitive, path, usePipDepTree)
249246
})
250247
dependencies.sort((dep1,dep2) =>{
251248
const DEP1 = dep1.name.toLowerCase()
@@ -350,12 +347,12 @@ function bringAllDependencies(dependencies, dependencyName, cachedEnvironmentDep
350347
version = record.version
351348
directDeps = record.dependencies
352349
}
353-
let targetDeps = new Array()
350+
let targetDeps = []
354351

355-
let entry = { "name" : depName , "version" : version, "dependencies" : [] }
352+
let entry = { "name": depName, "version": version, "dependencies": [] }
356353
dependencies.push(entry)
357354
directDeps.forEach( (dep) => {
358-
let depArray = new Array()
355+
let depArray = []
359356
// to avoid infinite loop, check if the dependency not already on current path, before going recursively resolving its dependencies.
360357
if(!path.includes(dep.toLowerCase())) {
361358
// send to recurrsion the path + the current dep

0 commit comments

Comments
 (0)