diff --git a/commands/audit/audit.go b/commands/audit/audit.go index 88c0a082f..be4c34a63 100644 --- a/commands/audit/audit.go +++ b/commands/audit/audit.go @@ -292,6 +292,11 @@ func (auditCmd *AuditCommand) CommandName() string { // Returns an audit Results object containing all the scan results. // If the current server is entitled for JAS, the advanced security results will be included in the scan results. func RunAudit(auditParams *AuditParams) (cmdResults *results.SecurityCommandResults) { + // Set up isolated logging if a log collector is provided + if collector := auditParams.GetLogCollector(); collector != nil { + log.SetLoggerForGoroutine(collector.Logger()) + defer log.ClearLoggerForGoroutine() + } // Prepare the command for the scan. if cmdResults = prepareToScan(auditParams); cmdResults.GeneralError != nil { return @@ -623,7 +628,17 @@ func addJasScansToRunner(auditParallelRunner *utils.SecurityParallelRunner, audi return } auditParallelRunner.JasWg.Add(1) - if _, jasErr := auditParallelRunner.Runner.AddTaskWithError(createJasScansTask(auditParallelRunner, scanResults, serverDetails, auditParams, jasScanner), func(taskErr error) { + // Capture current logger (may be a BufferedLogger for isolated parallel logging). + // Worker goroutines need this propagated so their logs are captured in the same buffer. + currentLogger := log.GetLogger() + jasTask := createJasScansTask(auditParallelRunner, scanResults, serverDetails, auditParams, jasScanner) + wrappedJasTask := func(threadId int) error { + // Propagate parent's logger to this worker goroutine for isolated log capture + log.SetLoggerForGoroutine(currentLogger) + defer log.ClearLoggerForGoroutine() + return jasTask(threadId) + } + if _, jasErr := auditParallelRunner.Runner.AddTaskWithError(wrappedJasTask, func(taskErr error) { scanResults.AddGeneralError(fmt.Errorf("failed while adding JAS scan tasks: %s", taskErr.Error()), auditParams.AllowPartialResults()) }); jasErr != nil { generalError = fmt.Errorf("failed to create JAS task: %s", jasErr.Error()) diff --git a/commands/audit/auditbasicparams.go b/commands/audit/auditbasicparams.go index 7c08e7cfb..94f1e9197 100644 --- a/commands/audit/auditbasicparams.go +++ b/commands/audit/auditbasicparams.go @@ -81,8 +81,9 @@ type AuditBasicParams struct { xrayVersion string xscVersion string configProfile *xscservices.ConfigProfile - solutionFilePath string - useIncludedBuilds bool + solutionFilePath string + logCollector *LogCollector + useIncludedBuilds bool } func (abp *AuditBasicParams) DirectDependencies() *[]string { @@ -344,6 +345,15 @@ func (abp *AuditBasicParams) SetSolutionFilePath(solutionFilePath string) *Audit return abp } +func (abp *AuditBasicParams) SetLogCollector(collector *LogCollector) *AuditBasicParams { + abp.logCollector = collector + return abp +} + +func (abp *AuditBasicParams) GetLogCollector() *LogCollector { + return abp.logCollector +} + func (abp *AuditBasicParams) UseIncludedBuilds() bool { return abp.useIncludedBuilds } func (abp *AuditBasicParams) SetUseIncludedBuilds(useIncludedBuilds bool) *AuditBasicParams { diff --git a/commands/audit/logcollector.go b/commands/audit/logcollector.go new file mode 100644 index 000000000..dc3061fb6 --- /dev/null +++ b/commands/audit/logcollector.go @@ -0,0 +1,41 @@ +package audit + +import ( + "github.com/jfrog/jfrog-client-go/utils/log" +) + +// LogCollector captures logs for isolated parallel audit operations. +type LogCollector struct { + logger *log.BufferedLogger +} + +func NewLogCollector(level log.LevelType) *LogCollector { + return &LogCollector{ + logger: log.NewBufferedLogger(level), + } +} + +func (c *LogCollector) Logger() log.Log { + return c.logger +} + +// ReplayTo outputs captured logs through the target logger (preserving colors). +func (c *LogCollector) ReplayTo(target log.Log) { + c.logger.ReplayTo(target) +} + +func (c *LogCollector) HasLogs() bool { + return c.logger.Len() > 0 +} + +func (c *LogCollector) Len() int { + return c.logger.Len() +} + +func (c *LogCollector) String() string { + return c.logger.String() +} + +func (c *LogCollector) Clear() { + c.logger.Clear() +} diff --git a/go.mod b/go.mod index 105b9bd66..c796a4875 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/jfrog/jfrog-cli-security -go 1.25.4 +go 1.25.5 require ( github.com/CycloneDX/cyclonedx-go v0.9.3 @@ -11,7 +11,7 @@ require ( github.com/gookit/color v1.6.0 github.com/hashicorp/go-hclog v1.6.3 github.com/hashicorp/go-plugin v1.6.3 - github.com/jfrog/build-info-go v1.12.5-0.20251209171349-eb030db986f9 + github.com/jfrog/build-info-go v1.13.0 github.com/jfrog/froggit-go v1.20.6 github.com/jfrog/gofrog v1.7.6 github.com/jfrog/jfrog-apps-config v1.0.1 @@ -135,8 +135,6 @@ require ( gopkg.in/warnings.v0 v0.1.2 // indirect ) -// replace github.com/jfrog/jfrog-client-go => github.com/jfrog/jfrog-client-go master - // replace github.com/jfrog/jfrog-cli-core/v2 => github.com/jfrog/jfrog-cli-core/v2 master //replace github.com/jfrog/jfrog-cli-artifactory => github.com/jfrog/jfrog-cli-artifactory main @@ -144,3 +142,5 @@ require ( // replace github.com/jfrog/build-info-go => github.com/jfrog/build-info-go dev // replace github.com/jfrog/froggit-go => github.com/jfrog/froggit-go master + +replace github.com/jfrog/jfrog-client-go => github.com/eyalk007/jfrog-client-go v0.0.0-20260114112951-67b77f49255f diff --git a/go.sum b/go.sum index 1ec25e861..e40004109 100644 --- a/go.sum +++ b/go.sum @@ -70,6 +70,8 @@ github.com/elazarl/goproxy v1.7.2 h1:Y2o6urb7Eule09PjlhQRGNsqRfPmYI3KKQLFpCAV3+o github.com/elazarl/goproxy v1.7.2/go.mod h1:82vkLNir0ALaW14Rc399OTTjyNREgmdL2cVoIbS6XaE= github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc= github.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FMNAnJvWQ= +github.com/eyalk007/jfrog-client-go v0.0.0-20260114112951-67b77f49255f h1:wievyISUpwoYv47Q+SreXShHnwPaNBkcqGjSOJ7hRZk= +github.com/eyalk007/jfrog-client-go v0.0.0-20260114112951-67b77f49255f/go.mod h1:sCE06+GngPoyrGO0c+vmhgMoVSP83UMNiZnIuNPzU8U= github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk= github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM= github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE= @@ -146,8 +148,8 @@ github.com/jedib0t/go-pretty/v6 v6.7.5 h1:9dJSWTJnsXJVVAbvxIFxeHf/JxoJd7GUl5o3Uz github.com/jedib0t/go-pretty/v6 v6.7.5/go.mod h1:YwC5CE4fJ1HFUDeivSV1r//AmANFHyqczZk+U6BDALU= github.com/jfrog/archiver/v3 v3.6.1 h1:LOxnkw9pOn45DzCbZNFV6K0+6dCsQ0L8mR3ZcujO5eI= github.com/jfrog/archiver/v3 v3.6.1/go.mod h1:VgR+3WZS4N+i9FaDwLZbq+jeU4B4zctXL+gL4EMzfLw= -github.com/jfrog/build-info-go v1.12.5-0.20251209171349-eb030db986f9 h1:CL7lp7Y7srwQ1vy1btX66t4wbztzEGQbqi/9tdEz7xk= -github.com/jfrog/build-info-go v1.12.5-0.20251209171349-eb030db986f9/go.mod h1:9W4U440fdTHwW1HiB/R0VQvz/5q8ZHsms9MWcq+JrdY= +github.com/jfrog/build-info-go v1.13.0 h1:bHedp1Gl+a8eR71xxP5JvkqwDj2X3r6e5NiIwNcIwRM= +github.com/jfrog/build-info-go v1.13.0/go.mod h1:+OCtMb22/D+u7Wne5lzkjJjaWr0LRZcHlDwTH86Mpwo= github.com/jfrog/froggit-go v1.20.6 h1:Xp7+LlEh0m1KGrQstb+u0aGfjRUtv1eh9xQBV3571jQ= github.com/jfrog/froggit-go v1.20.6/go.mod h1:obSG1SlsWjktkuqmKtpq7MNTTL63e0ot+ucTnlOMV88= github.com/jfrog/gofrog v1.7.6 h1:QmfAiRzVyaI7JYGsB7cxfAJePAZTzFz0gRWZSE27c6s= @@ -158,8 +160,6 @@ github.com/jfrog/jfrog-cli-artifactory v0.8.1-0.20251211075913-35ebcd308e93 h1:r github.com/jfrog/jfrog-cli-artifactory v0.8.1-0.20251211075913-35ebcd308e93/go.mod h1:7cCaRhXorlbyXZgiW5bplCExFxlnROaG21K12d8inpQ= github.com/jfrog/jfrog-cli-core/v2 v2.60.1-0.20251210085744-f8481d179ac5 h1:GYE67ubwl+ZRw3CcXFUi49EwwQp6k+qS8sX0QuHDHO8= github.com/jfrog/jfrog-cli-core/v2 v2.60.1-0.20251210085744-f8481d179ac5/go.mod h1:BMoGi2rG0udCCeaghqlNgiW3fTmT+TNnfTnBoWFYgcg= -github.com/jfrog/jfrog-client-go v1.55.1-0.20251217080430-c92b763b7465 h1:Ff3BlNPndrAfa1xFI/ORFzfWTxQxF0buWG61PEJwd3U= -github.com/jfrog/jfrog-client-go v1.55.1-0.20251217080430-c92b763b7465/go.mod h1:WQ5Y+oKYyHFAlCbHN925bWhnShTd2ruxZ6YTpb76fpU= github.com/jhump/protoreflect v1.15.1 h1:HUMERORf3I3ZdX05WaQ6MIpd/NJ434hTp5YiKgfCL6c= github.com/jhump/protoreflect v1.15.1/go.mod h1:jD/2GMKKE6OqX8qTjhADU1e6DShO+gavG9e0Q693nKo= github.com/k0kubun/colorstring v0.0.0-20150214042306-9440f1994b88/go.mod h1:3w7q1U84EfirKl04SVQ/s7nPm1ZPhiXd34z40TNz36k= diff --git a/jas/runner/jasrunner.go b/jas/runner/jasrunner.go index c88692450..6ab730f6a 100644 --- a/jas/runner/jasrunner.go +++ b/jas/runner/jasrunner.go @@ -128,7 +128,9 @@ func addJasScanTaskForModuleIfNeeded(params JasRunnerParams, subScan utils.SubSc func addModuleJasScanTask(scanType jasutils.JasScanType, securityParallelRunner *utils.SecurityParallelRunner, task parallel.TaskFunc, scanResults *results.TargetResults, allowSkippingErrors bool) (generalError error) { securityParallelRunner.JasScannersWg.Add(1) - if _, addTaskErr := securityParallelRunner.Runner.AddTaskWithError(task, func(err error) { + // Wrap task to propagate logger to worker goroutines (for isolated parallel logging) + wrappedTask := utils.WrapTaskWithLoggerPropagation(task) + if _, addTaskErr := securityParallelRunner.Runner.AddTaskWithError(wrappedTask, func(err error) { _ = scanResults.AddTargetError(fmt.Errorf("failed to run %s scan: %s", scanType, err.Error()), allowSkippingErrors) }); addTaskErr != nil { generalError = scanResults.AddTargetError(fmt.Errorf("error occurred while adding '%s' scan to parallel runner: %s", scanType, addTaskErr.Error()), allowSkippingErrors) diff --git a/sca/scan/scascan.go b/sca/scan/scascan.go index 1cfd7ba5d..f203a4980 100644 --- a/sca/scan/scascan.go +++ b/sca/scan/scascan.go @@ -73,8 +73,11 @@ func RunScaScan(strategy SbomScanStrategy, params ScaScanParams) (generalError e // For Audit scans, we run the scan in parallel using the SecurityParallelRunner. func runScaScanWithRunner(strategy SbomScanStrategy, params ScaScanParams) (generalError error) { targetResult := params.ScanResults + scaTask := createScaScanTaskWithRunner(params.Runner, strategy, params) + // Wrap task to propagate logger to worker goroutines (for isolated parallel logging) + wrappedScaTask := utils.WrapTaskWithLoggerPropagation(scaTask) // Create sca scan task - if _, taskCreationErr := params.Runner.Runner.AddTaskWithError(createScaScanTaskWithRunner(params.Runner, strategy, params), func(err error) { + if _, taskCreationErr := params.Runner.Runner.AddTaskWithError(wrappedScaTask, func(err error) { _ = targetResult.AddTargetError(fmt.Errorf("failed to execute SCA scan: %s", err.Error()), params.AllowPartialResults) }); taskCreationErr != nil { _ = targetResult.AddTargetError(fmt.Errorf("failed to create SCA scan task: %s", taskCreationErr.Error()), params.AllowPartialResults) diff --git a/tests/testdata/other/diff-scan/results.sarif b/tests/testdata/other/diff-scan/results.sarif new file mode 100644 index 000000000..3375e9301 --- /dev/null +++ b/tests/testdata/other/diff-scan/results.sarif @@ -0,0 +1,142 @@ +{ + "runs": [ + { + "tool": { + "driver": { + "name": "JFrog Secrets scanner", + "rules": [ + { + "id": "REQ.SECRET.GENERIC.TEXT", + "properties": { + "conclusion": "negative", + "applicability": "applicable", + "scanner_id": null + }, + "fullDescription": { + "text": "Storing hardcoded secrets in your source code or binary artifact could lead to several risks.\n\nIf the secret is associated with a wide scope of privileges, attackers could extract it from the source code or binary artifact and use it maliciously to attack many targets. For example, if the hardcoded password gives high-privilege access to an AWS account, the attackers may be able to query/modify company-wide sensitive data without per-user authentication.\n\n## Best practices\n\nUse safe storage when storing high-privilege secrets such as passwords and tokens, for example -\n\n* ### Environment Variables\n\nEnvironment variables are set outside of the application code, and can be dynamically passed to the application only when needed, for example -\n`SECRET_VAR=MySecret ./my_application`\nThis way, `MySecret` does not have to be hardcoded into `my_application`.\n\nNote that if your entire binary artifact is published (ex. a Docker container published to Docker Hub), the value for the environment variable must not be stored in the artifact itself (ex. inside the `Dockerfile` or one of the container's files) but rather must be passed dynamically, for example in the `docker run` call as an argument.\n\n* ### Secret management services\n\nExternal vendors offer cloud-based secret management services, that provide proper access control to each secret. The given access to each secret can be dynamically modified or even revoked. Some examples include -\n\n* [Hashicorp Vault](https://www.vaultproject.io)\n* [AWS KMS](https://aws.amazon.com/kms) (Key Management Service)\n* [Google Cloud KMS](https://cloud.google.com/security-key-management)\n\n## Least-privilege principle\n\nStoring a secret in a hardcoded manner can be made safer, by making sure the secret grants the least amount of privilege as needed by the application.\nFor example - if the application needs to read a specific table from a specific database, and the secret grants access to perform this operation **only** (meaning - no access to other tables, no write access at all) then the damage from any secret leaks is mitigated.\nThat being said, it is still not recommended to store secrets in a hardcoded manner, since this type of storage does not offer any way to revoke or moderate the usage of the secret.\n", + "markdown": "Storing hardcoded secrets in your source code or binary artifact could lead to several risks.\n\nIf the secret is associated with a wide scope of privileges, attackers could extract it from the source code or binary artifact and use it maliciously to attack many targets. For example, if the hardcoded password gives high-privilege access to an AWS account, the attackers may be able to query/modify company-wide sensitive data without per-user authentication.\n\n## Best practices\n\nUse safe storage when storing high-privilege secrets such as passwords and tokens, for example -\n\n* ### Environment Variables\n\nEnvironment variables are set outside of the application code, and can be dynamically passed to the application only when needed, for example -\n`SECRET_VAR=MySecret ./my_application`\nThis way, `MySecret` does not have to be hardcoded into `my_application`.\n\nNote that if your entire binary artifact is published (ex. a Docker container published to Docker Hub), the value for the environment variable must not be stored in the artifact itself (ex. inside the `Dockerfile` or one of the container's files) but rather must be passed dynamically, for example in the `docker run` call as an argument.\n\n* ### Secret management services\n\nExternal vendors offer cloud-based secret management services, that provide proper access control to each secret. The given access to each secret can be dynamically modified or even revoked. Some examples include -\n\n* [Hashicorp Vault](https://www.vaultproject.io)\n* [AWS KMS](https://aws.amazon.com/kms) (Key Management Service)\n* [Google Cloud KMS](https://cloud.google.com/security-key-management)\n\n## Least-privilege principle\n\nStoring a secret in a hardcoded manner can be made safer, by making sure the secret grants the least amount of privilege as needed by the application.\nFor example - if the application needs to read a specific table from a specific database, and the secret grants access to perform this operation **only** (meaning - no access to other tables, no write access at all) then the damage from any secret leaks is mitigated.\nThat being said, it is still not recommended to store secrets in a hardcoded manner, since this type of storage does not offer any way to revoke or moderate the usage of the secret.\n" + }, + "shortDescription": { + "text": "Scanner for REQ.SECRET.GENERIC.TEXT" + } + }, + { + "id": "REQ.SECRET.GENERIC.CODE", + "properties": { + "conclusion": "private", + "applicability": "undetermined", + "scanner_id": null + }, + "fullDescription": { + "text": "Storing hardcoded secrets in your source code or binary artifact could lead to several risks.\n\nIf the secret is associated with a wide scope of privileges, attackers could extract it from the source code or binary artifact and use it maliciously to attack many targets. For example, if the hardcoded password gives high-privilege access to an AWS account, the attackers may be able to query/modify company-wide sensitive data without per-user authentication.\n\n## Best practices\n\nUse safe storage when storing high-privilege secrets such as passwords and tokens, for example -\n\n* ### Environment Variables\n\nEnvironment variables are set outside of the application code, and can be dynamically passed to the application only when needed, for example -\n`SECRET_VAR=MySecret ./my_application`\nThis way, `MySecret` does not have to be hardcoded into `my_application`.\n\nNote that if your entire binary artifact is published (ex. a Docker container published to Docker Hub), the value for the environment variable must not be stored in the artifact itself (ex. inside the `Dockerfile` or one of the container's files) but rather must be passed dynamically, for example in the `docker run` call as an argument.\n\n* ### Secret management services\n\nExternal vendors offer cloud-based secret management services, that provide proper access control to each secret. The given access to each secret can be dynamically modified or even revoked. Some examples include -\n\n* [Hashicorp Vault](https://www.vaultproject.io)\n* [AWS KMS](https://aws.amazon.com/kms) (Key Management Service)\n* [Google Cloud KMS](https://cloud.google.com/security-key-management)\n\n## Least-privilege principle\n\nStoring a secret in a hardcoded manner can be made safer, by making sure the secret grants the least amount of privilege as needed by the application.\nFor example - if the application needs to read a specific table from a specific database, and the secret grants access to perform this operation **only** (meaning - no access to other tables, no write access at all) then the damage from any secret leaks is mitigated.\nThat being said, it is still not recommended to store secrets in a hardcoded manner, since this type of storage does not offer any way to revoke or moderate the usage of the secret.\n", + "markdown": "Storing hardcoded secrets in your source code or binary artifact could lead to several risks.\n\nIf the secret is associated with a wide scope of privileges, attackers could extract it from the source code or binary artifact and use it maliciously to attack many targets. For example, if the hardcoded password gives high-privilege access to an AWS account, the attackers may be able to query/modify company-wide sensitive data without per-user authentication.\n\n## Best practices\n\nUse safe storage when storing high-privilege secrets such as passwords and tokens, for example -\n\n* ### Environment Variables\n\nEnvironment variables are set outside of the application code, and can be dynamically passed to the application only when needed, for example -\n`SECRET_VAR=MySecret ./my_application`\nThis way, `MySecret` does not have to be hardcoded into `my_application`.\n\nNote that if your entire binary artifact is published (ex. a Docker container published to Docker Hub), the value for the environment variable must not be stored in the artifact itself (ex. inside the `Dockerfile` or one of the container's files) but rather must be passed dynamically, for example in the `docker run` call as an argument.\n\n* ### Secret management services\n\nExternal vendors offer cloud-based secret management services, that provide proper access control to each secret. The given access to each secret can be dynamically modified or even revoked. Some examples include -\n\n* [Hashicorp Vault](https://www.vaultproject.io)\n* [AWS KMS](https://aws.amazon.com/kms) (Key Management Service)\n* [Google Cloud KMS](https://cloud.google.com/security-key-management)\n\n## Least-privilege principle\n\nStoring a secret in a hardcoded manner can be made safer, by making sure the secret grants the least amount of privilege as needed by the application.\nFor example - if the application needs to read a specific table from a specific database, and the secret grants access to perform this operation **only** (meaning - no access to other tables, no write access at all) then the damage from any secret leaks is mitigated.\nThat being said, it is still not recommended to store secrets in a hardcoded manner, since this type of storage does not offer any way to revoke or moderate the usage of the secret.\n" + }, + "shortDescription": { + "text": "Scanner for REQ.SECRET.GENERIC.CODE" + } + }, + { + "id": "REQ.SECRET.KEYS", + "properties": { + "conclusion": "private", + "applicability": "undetermined", + "scanner_id": "1235", + "undetermined_reason": "" + }, + "fullDescription": { + "text": "\nStoring an API key in the image could lead to several risks.\n\nIf the key is associated with a wide scope of privileges, attackers could extract it from a single image or firmware and use it maliciously to attack many targets. For example, if the embedded key allows querying/modifying data for all cloud user accounts, without per-user authentication, the attackers who extract it would gain access to system-wide data.\n\nIf the cloud/SaaS provider bills by key usage - for example, every million queries cost the key's owner a fixed sum of money - attackers could use the keys for their own purposes (or just as a form of vandalism), incurring a large cost to the legitimate user or operator.\n\n## Best practices\n\nUse narrow scopes for stored API keys. As much as possible, API keys should be unique per host and require additional authentication with the user's individual credentials for any sensitive actions.\n\nAvoid placing keys whose use incurs costs directly in the image. Store the key with any software or hardware protection available on the host for key storage (such as operating system key-stores, hardware cryptographic storage mechanisms or cloud-managed secure storage services such as [AWS KMS](https://aws.amazon.com/kms/)).\n\nTokens that were detected as exposed should be revoked and replaced -\n\n* [AWS Key Revocation](https://aws.amazon.com/premiumsupport/knowledge-center/delete-access-key/#:~:text=If%20you%20see%20a%20warning,the%20confirmation%20box%2C%20choose%20Deactivate.)\n* [GCP Key Revocation](https://www.trendmicro.com/cloudoneconformity/knowledge-base/gcp/CloudIAM/delete-api-keys.html)\n* [Azure Key Revocation](https://docs.microsoft.com/en-us/azure/devops/organizations/accounts/use-personal-access-tokens-to-authenticate?view=azure-devops&tabs=Windows#revoke-a-pat)\n* [GitHub Key Revocation](https://docs.github.com/en/rest/apps/oauth-applications#delete-an-app-authorization)\n", + "markdown": "\nStoring an API key in the image could lead to several risks.\n\nIf the key is associated with a wide scope of privileges, attackers could extract it from a single image or firmware and use it maliciously to attack many targets. For example, if the embedded key allows querying/modifying data for all cloud user accounts, without per-user authentication, the attackers who extract it would gain access to system-wide data.\n\nIf the cloud/SaaS provider bills by key usage - for example, every million queries cost the key's owner a fixed sum of money - attackers could use the keys for their own purposes (or just as a form of vandalism), incurring a large cost to the legitimate user or operator.\n\n## Best practices\n\nUse narrow scopes for stored API keys. As much as possible, API keys should be unique per host and require additional authentication with the user's individual credentials for any sensitive actions.\n\nAvoid placing keys whose use incurs costs directly in the image. Store the key with any software or hardware protection available on the host for key storage (such as operating system key-stores, hardware cryptographic storage mechanisms or cloud-managed secure storage services such as [AWS KMS](https://aws.amazon.com/kms/)).\n\nTokens that were detected as exposed should be revoked and replaced -\n\n* [AWS Key Revocation](https://aws.amazon.com/premiumsupport/knowledge-center/delete-access-key/#:~:text=If%20you%20see%20a%20warning,the%20confirmation%20box%2C%20choose%20Deactivate.)\n* [GCP Key Revocation](https://www.trendmicro.com/cloudoneconformity/knowledge-base/gcp/CloudIAM/delete-api-keys.html)\n* [Azure Key Revocation](https://docs.microsoft.com/en-us/azure/devops/organizations/accounts/use-personal-access-tokens-to-authenticate?view=azure-devops&tabs=Windows#revoke-a-pat)\n* [GitHub Key Revocation](https://docs.github.com/en/rest/apps/oauth-applications#delete-an-app-authorization)\n" + }, + "shortDescription": { + "text": "Scanner for REQ.SECRET.KEYS" + } + }, + { + "id": "REQ.CRED.PUBLIC-ONLY", + "properties": { + "conclusion": "private", + "applicability": "undetermined", + "scanner_id": "125", + "undetermined_reason": "" + }, + "fullDescription": { + "text": "", + "markdown": "" + }, + "shortDescription": { + "text": "Scanner for REQ.CRED.PUBLIC-ONLY" + } + }, + { + "id": "REQ.SECRET.GENERIC.URL-TEXT", + "properties": { + "conclusion": "private", + "applicability": "undetermined", + "scanner_id": null + }, + "fullDescription": { + "text": "Storing hardcoded secrets in your source code or binary artifact could lead to several risks.\n\nIf the secret is associated with a wide scope of privileges, attackers could extract it from the source code or binary artifact and use it maliciously to attack many targets. For example, if the hardcoded password gives high-privilege access to an AWS account, the attackers may be able to query/modify company-wide sensitive data without per-user authentication.\n\n## Best practices\n\nUse safe storage when storing high-privilege secrets such as passwords and tokens, for example -\n\n* ### Environment Variables\n\nEnvironment variables are set outside of the application code, and can be dynamically passed to the application only when needed, for example -\n`SECRET_VAR=MySecret ./my_application`\nThis way, `MySecret` does not have to be hardcoded into `my_application`.\n\nNote that if your entire binary artifact is published (ex. a Docker container published to Docker Hub), the value for the environment variable must not be stored in the artifact itself (ex. inside the `Dockerfile` or one of the container's files) but rather must be passed dynamically, for example in the `docker run` call as an argument.\n\n* ### Secret management services\n\nExternal vendors offer cloud-based secret management services, that provide proper access control to each secret. The given access to each secret can be dynamically modified or even revoked. Some examples include -\n\n* [Hashicorp Vault](https://www.vaultproject.io)\n* [AWS KMS](https://aws.amazon.com/kms) (Key Management Service)\n* [Google Cloud KMS](https://cloud.google.com/security-key-management)\n\n## Least-privilege principle\n\nStoring a secret in a hardcoded manner can be made safer, by making sure the secret grants the least amount of privilege as needed by the application.\nFor example - if the application needs to read a specific table from a specific database, and the secret grants access to perform this operation **only** (meaning - no access to other tables, no write access at all) then the damage from any secret leaks is mitigated.\nThat being said, it is still not recommended to store secrets in a hardcoded manner, since this type of storage does not offer any way to revoke or moderate the usage of the secret.\n", + "markdown": "Storing hardcoded secrets in your source code or binary artifact could lead to several risks.\n\nIf the secret is associated with a wide scope of privileges, attackers could extract it from the source code or binary artifact and use it maliciously to attack many targets. For example, if the hardcoded password gives high-privilege access to an AWS account, the attackers may be able to query/modify company-wide sensitive data without per-user authentication.\n\n## Best practices\n\nUse safe storage when storing high-privilege secrets such as passwords and tokens, for example -\n\n* ### Environment Variables\n\nEnvironment variables are set outside of the application code, and can be dynamically passed to the application only when needed, for example -\n`SECRET_VAR=MySecret ./my_application`\nThis way, `MySecret` does not have to be hardcoded into `my_application`.\n\nNote that if your entire binary artifact is published (ex. a Docker container published to Docker Hub), the value for the environment variable must not be stored in the artifact itself (ex. inside the `Dockerfile` or one of the container's files) but rather must be passed dynamically, for example in the `docker run` call as an argument.\n\n* ### Secret management services\n\nExternal vendors offer cloud-based secret management services, that provide proper access control to each secret. The given access to each secret can be dynamically modified or even revoked. Some examples include -\n\n* [Hashicorp Vault](https://www.vaultproject.io)\n* [AWS KMS](https://aws.amazon.com/kms) (Key Management Service)\n* [Google Cloud KMS](https://cloud.google.com/security-key-management)\n\n## Least-privilege principle\n\nStoring a secret in a hardcoded manner can be made safer, by making sure the secret grants the least amount of privilege as needed by the application.\nFor example - if the application needs to read a specific table from a specific database, and the secret grants access to perform this operation **only** (meaning - no access to other tables, no write access at all) then the damage from any secret leaks is mitigated.\nThat being said, it is still not recommended to store secrets in a hardcoded manner, since this type of storage does not offer any way to revoke or moderate the usage of the secret.\n" + }, + "shortDescription": { + "text": "Scanner for REQ.SECRET.GENERIC.URL-TEXT" + } + } + ], + "version": "1.0", + "informationUri": "https://jfrog.com/help/r/jfrog-security-documentation/jfrog-advanced-security" + } + }, + "invocations": [ + { + "arguments": [ + "/Users/assafa/.jfrog/dependencies/analyzerManager/jas_scanner/jas_scanner", + "scan", + "/var/folders/xv/th4cksxn7jv9wjrdnn1h4tj00000gq/T/jfrog.cli.temp.-1747638624-1289062780/Secrets_1747638640/config.yaml" + ], + "executionSuccessful": true, + "workingDirectory": { + "uri": "file:///Users/assafa/.jfrog/dependencies/analyzerManager" + } + } + ], + "results": [ + { + "message": { + "text": "Hardcoded secrets were found" + }, + "level": "error", + "locations": [ + { + "physicalLocation": { + "region": { + "snippet": { + "text": "password: jnvkjcxnjvxnvk22222" + }, + "endColumn": 30, + "endLine": 1, + "startColumn": 1, + "startLine": 1 + }, + "artifactLocation": { + "uri": "file:///private/var/folders/xv/th4cksxn7jv9wjrdnn1h4tj00000gq/T/jfrog.cli.temp.-1747638503-538392025/TOKENS" + } + } + } + ], + "properties": { + "tokenValidation": "", + "metadata": "" + }, + "suppressions": [], + "partialFingerprints": { + "jfrogSecret": "085de62ad4aa0dc22cf7d733811687c08b5517c4414326723e05e75a822ee58d" + }, + "ruleId": "REQ.SECRET.GENERIC.TEXT" + } + ] + } + ], + "version": "2.1.0", + "$schema": "https://docs.oasis-open.org/sarif/sarif/v2.1.0/cos02/schemas/sarif-schema-2.1.0.json" +} \ No newline at end of file diff --git a/tests/testdata/other/diff-scan/target.sarif b/tests/testdata/other/diff-scan/target.sarif new file mode 100644 index 000000000..7da1c3967 --- /dev/null +++ b/tests/testdata/other/diff-scan/target.sarif @@ -0,0 +1,142 @@ +{ + "version": "2.1.0", + "$schema": "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json", + "runs": [ + { + "tool": { + "driver": { + "informationUri": "https://jfrog.com/help/r/jfrog-security-documentation/jfrog-advanced-security", + "name": "JFrog Secrets scanner", + "rules": [ + { + "id": "REQ.SECRET.GENERIC.TEXT", + "shortDescription": { + "text": "Scanner for REQ.SECRET.GENERIC.TEXT" + }, + "fullDescription": { + "text": "Storing hardcoded secrets in your source code or binary artifact could lead to several risks.\n\nIf the secret is associated with a wide scope of privileges, attackers could extract it from the source code or binary artifact and use it maliciously to attack many targets. For example, if the hardcoded password gives high-privilege access to an AWS account, the attackers may be able to query/modify company-wide sensitive data without per-user authentication.\n\n## Best practices\n\nUse safe storage when storing high-privilege secrets such as passwords and tokens, for example -\n\n* ### Environment Variables\n\nEnvironment variables are set outside of the application code, and can be dynamically passed to the application only when needed, for example -\n`SECRET_VAR=MySecret ./my_application`\nThis way, `MySecret` does not have to be hardcoded into `my_application`.\n\nNote that if your entire binary artifact is published (ex. a Docker container published to Docker Hub), the value for the environment variable must not be stored in the artifact itself (ex. inside the `Dockerfile` or one of the container's files) but rather must be passed dynamically, for example in the `docker run` call as an argument.\n\n* ### Secret management services\n\nExternal vendors offer cloud-based secret management services, that provide proper access control to each secret. The given access to each secret can be dynamically modified or even revoked. Some examples include -\n\n* [Hashicorp Vault](https://www.vaultproject.io)\n* [AWS KMS](https://aws.amazon.com/kms) (Key Management Service)\n* [Google Cloud KMS](https://cloud.google.com/security-key-management)\n\n## Least-privilege principle\n\nStoring a secret in a hardcoded manner can be made safer, by making sure the secret grants the least amount of privilege as needed by the application.\nFor example - if the application needs to read a specific table from a specific database, and the secret grants access to perform this operation **only** (meaning - no access to other tables, no write access at all) then the damage from any secret leaks is mitigated.\nThat being said, it is still not recommended to store secrets in a hardcoded manner, since this type of storage does not offer any way to revoke or moderate the usage of the secret.\n", + "markdown": "Storing hardcoded secrets in your source code or binary artifact could lead to several risks.\n\nIf the secret is associated with a wide scope of privileges, attackers could extract it from the source code or binary artifact and use it maliciously to attack many targets. For example, if the hardcoded password gives high-privilege access to an AWS account, the attackers may be able to query/modify company-wide sensitive data without per-user authentication.\n\n## Best practices\n\nUse safe storage when storing high-privilege secrets such as passwords and tokens, for example -\n\n* ### Environment Variables\n\nEnvironment variables are set outside of the application code, and can be dynamically passed to the application only when needed, for example -\n`SECRET_VAR=MySecret ./my_application`\nThis way, `MySecret` does not have to be hardcoded into `my_application`.\n\nNote that if your entire binary artifact is published (ex. a Docker container published to Docker Hub), the value for the environment variable must not be stored in the artifact itself (ex. inside the `Dockerfile` or one of the container's files) but rather must be passed dynamically, for example in the `docker run` call as an argument.\n\n* ### Secret management services\n\nExternal vendors offer cloud-based secret management services, that provide proper access control to each secret. The given access to each secret can be dynamically modified or even revoked. Some examples include -\n\n* [Hashicorp Vault](https://www.vaultproject.io)\n* [AWS KMS](https://aws.amazon.com/kms) (Key Management Service)\n* [Google Cloud KMS](https://cloud.google.com/security-key-management)\n\n## Least-privilege principle\n\nStoring a secret in a hardcoded manner can be made safer, by making sure the secret grants the least amount of privilege as needed by the application.\nFor example - if the application needs to read a specific table from a specific database, and the secret grants access to perform this operation **only** (meaning - no access to other tables, no write access at all) then the damage from any secret leaks is mitigated.\nThat being said, it is still not recommended to store secrets in a hardcoded manner, since this type of storage does not offer any way to revoke or moderate the usage of the secret.\n" + }, + "properties": { + "applicability": "applicable", + "conclusion": "negative", + "scanner_id": null, + "security-severity": "8.9" + } + }, + { + "id": "REQ.SECRET.GENERIC.CODE", + "shortDescription": { + "text": "Scanner for REQ.SECRET.GENERIC.CODE" + }, + "fullDescription": { + "text": "Storing hardcoded secrets in your source code or binary artifact could lead to several risks.\n\nIf the secret is associated with a wide scope of privileges, attackers could extract it from the source code or binary artifact and use it maliciously to attack many targets. For example, if the hardcoded password gives high-privilege access to an AWS account, the attackers may be able to query/modify company-wide sensitive data without per-user authentication.\n\n## Best practices\n\nUse safe storage when storing high-privilege secrets such as passwords and tokens, for example -\n\n* ### Environment Variables\n\nEnvironment variables are set outside of the application code, and can be dynamically passed to the application only when needed, for example -\n`SECRET_VAR=MySecret ./my_application`\nThis way, `MySecret` does not have to be hardcoded into `my_application`.\n\nNote that if your entire binary artifact is published (ex. a Docker container published to Docker Hub), the value for the environment variable must not be stored in the artifact itself (ex. inside the `Dockerfile` or one of the container's files) but rather must be passed dynamically, for example in the `docker run` call as an argument.\n\n* ### Secret management services\n\nExternal vendors offer cloud-based secret management services, that provide proper access control to each secret. The given access to each secret can be dynamically modified or even revoked. Some examples include -\n\n* [Hashicorp Vault](https://www.vaultproject.io)\n* [AWS KMS](https://aws.amazon.com/kms) (Key Management Service)\n* [Google Cloud KMS](https://cloud.google.com/security-key-management)\n\n## Least-privilege principle\n\nStoring a secret in a hardcoded manner can be made safer, by making sure the secret grants the least amount of privilege as needed by the application.\nFor example - if the application needs to read a specific table from a specific database, and the secret grants access to perform this operation **only** (meaning - no access to other tables, no write access at all) then the damage from any secret leaks is mitigated.\nThat being said, it is still not recommended to store secrets in a hardcoded manner, since this type of storage does not offer any way to revoke or moderate the usage of the secret.\n", + "markdown": "Storing hardcoded secrets in your source code or binary artifact could lead to several risks.\n\nIf the secret is associated with a wide scope of privileges, attackers could extract it from the source code or binary artifact and use it maliciously to attack many targets. For example, if the hardcoded password gives high-privilege access to an AWS account, the attackers may be able to query/modify company-wide sensitive data without per-user authentication.\n\n## Best practices\n\nUse safe storage when storing high-privilege secrets such as passwords and tokens, for example -\n\n* ### Environment Variables\n\nEnvironment variables are set outside of the application code, and can be dynamically passed to the application only when needed, for example -\n`SECRET_VAR=MySecret ./my_application`\nThis way, `MySecret` does not have to be hardcoded into `my_application`.\n\nNote that if your entire binary artifact is published (ex. a Docker container published to Docker Hub), the value for the environment variable must not be stored in the artifact itself (ex. inside the `Dockerfile` or one of the container's files) but rather must be passed dynamically, for example in the `docker run` call as an argument.\n\n* ### Secret management services\n\nExternal vendors offer cloud-based secret management services, that provide proper access control to each secret. The given access to each secret can be dynamically modified or even revoked. Some examples include -\n\n* [Hashicorp Vault](https://www.vaultproject.io)\n* [AWS KMS](https://aws.amazon.com/kms) (Key Management Service)\n* [Google Cloud KMS](https://cloud.google.com/security-key-management)\n\n## Least-privilege principle\n\nStoring a secret in a hardcoded manner can be made safer, by making sure the secret grants the least amount of privilege as needed by the application.\nFor example - if the application needs to read a specific table from a specific database, and the secret grants access to perform this operation **only** (meaning - no access to other tables, no write access at all) then the damage from any secret leaks is mitigated.\nThat being said, it is still not recommended to store secrets in a hardcoded manner, since this type of storage does not offer any way to revoke or moderate the usage of the secret.\n" + }, + "properties": { + "applicability": "undetermined", + "conclusion": "private", + "scanner_id": null + } + }, + { + "id": "REQ.SECRET.KEYS", + "shortDescription": { + "text": "Scanner for REQ.SECRET.KEYS" + }, + "fullDescription": { + "text": "\nStoring an API key in the image could lead to several risks.\n\nIf the key is associated with a wide scope of privileges, attackers could extract it from a single image or firmware and use it maliciously to attack many targets. For example, if the embedded key allows querying/modifying data for all cloud user accounts, without per-user authentication, the attackers who extract it would gain access to system-wide data.\n\nIf the cloud/SaaS provider bills by key usage - for example, every million queries cost the key's owner a fixed sum of money - attackers could use the keys for their own purposes (or just as a form of vandalism), incurring a large cost to the legitimate user or operator.\n\n## Best practices\n\nUse narrow scopes for stored API keys. As much as possible, API keys should be unique per host and require additional authentication with the user's individual credentials for any sensitive actions.\n\nAvoid placing keys whose use incurs costs directly in the image. Store the key with any software or hardware protection available on the host for key storage (such as operating system key-stores, hardware cryptographic storage mechanisms or cloud-managed secure storage services such as [AWS KMS](https://aws.amazon.com/kms/)).\n\nTokens that were detected as exposed should be revoked and replaced -\n\n* [AWS Key Revocation](https://aws.amazon.com/premiumsupport/knowledge-center/delete-access-key/#:~:text=If%20you%20see%20a%20warning,the%20confirmation%20box%2C%20choose%20Deactivate.)\n* [GCP Key Revocation](https://www.trendmicro.com/cloudoneconformity/knowledge-base/gcp/CloudIAM/delete-api-keys.html)\n* [Azure Key Revocation](https://docs.microsoft.com/en-us/azure/devops/organizations/accounts/use-personal-access-tokens-to-authenticate?view=azure-devops&tabs=Windows#revoke-a-pat)\n* [GitHub Key Revocation](https://docs.github.com/en/rest/apps/oauth-applications#delete-an-app-authorization)\n", + "markdown": "\nStoring an API key in the image could lead to several risks.\n\nIf the key is associated with a wide scope of privileges, attackers could extract it from a single image or firmware and use it maliciously to attack many targets. For example, if the embedded key allows querying/modifying data for all cloud user accounts, without per-user authentication, the attackers who extract it would gain access to system-wide data.\n\nIf the cloud/SaaS provider bills by key usage - for example, every million queries cost the key's owner a fixed sum of money - attackers could use the keys for their own purposes (or just as a form of vandalism), incurring a large cost to the legitimate user or operator.\n\n## Best practices\n\nUse narrow scopes for stored API keys. As much as possible, API keys should be unique per host and require additional authentication with the user's individual credentials for any sensitive actions.\n\nAvoid placing keys whose use incurs costs directly in the image. Store the key with any software or hardware protection available on the host for key storage (such as operating system key-stores, hardware cryptographic storage mechanisms or cloud-managed secure storage services such as [AWS KMS](https://aws.amazon.com/kms/)).\n\nTokens that were detected as exposed should be revoked and replaced -\n\n* [AWS Key Revocation](https://aws.amazon.com/premiumsupport/knowledge-center/delete-access-key/#:~:text=If%20you%20see%20a%20warning,the%20confirmation%20box%2C%20choose%20Deactivate.)\n* [GCP Key Revocation](https://www.trendmicro.com/cloudoneconformity/knowledge-base/gcp/CloudIAM/delete-api-keys.html)\n* [Azure Key Revocation](https://docs.microsoft.com/en-us/azure/devops/organizations/accounts/use-personal-access-tokens-to-authenticate?view=azure-devops&tabs=Windows#revoke-a-pat)\n* [GitHub Key Revocation](https://docs.github.com/en/rest/apps/oauth-applications#delete-an-app-authorization)\n" + }, + "properties": { + "applicability": "undetermined", + "conclusion": "private", + "scanner_id": "1235", + "undetermined_reason": "" + } + }, + { + "id": "REQ.CRED.PUBLIC-ONLY", + "shortDescription": { + "text": "Scanner for REQ.CRED.PUBLIC-ONLY" + }, + "fullDescription": { + "text": "", + "markdown": "" + }, + "properties": { + "applicability": "undetermined", + "conclusion": "private", + "scanner_id": "125", + "undetermined_reason": "" + } + }, + { + "id": "REQ.SECRET.GENERIC.URL-TEXT", + "shortDescription": { + "text": "Scanner for REQ.SECRET.GENERIC.URL-TEXT" + }, + "fullDescription": { + "text": "Storing hardcoded secrets in your source code or binary artifact could lead to several risks.\n\nIf the secret is associated with a wide scope of privileges, attackers could extract it from the source code or binary artifact and use it maliciously to attack many targets. For example, if the hardcoded password gives high-privilege access to an AWS account, the attackers may be able to query/modify company-wide sensitive data without per-user authentication.\n\n## Best practices\n\nUse safe storage when storing high-privilege secrets such as passwords and tokens, for example -\n\n* ### Environment Variables\n\nEnvironment variables are set outside of the application code, and can be dynamically passed to the application only when needed, for example -\n`SECRET_VAR=MySecret ./my_application`\nThis way, `MySecret` does not have to be hardcoded into `my_application`.\n\nNote that if your entire binary artifact is published (ex. a Docker container published to Docker Hub), the value for the environment variable must not be stored in the artifact itself (ex. inside the `Dockerfile` or one of the container's files) but rather must be passed dynamically, for example in the `docker run` call as an argument.\n\n* ### Secret management services\n\nExternal vendors offer cloud-based secret management services, that provide proper access control to each secret. The given access to each secret can be dynamically modified or even revoked. Some examples include -\n\n* [Hashicorp Vault](https://www.vaultproject.io)\n* [AWS KMS](https://aws.amazon.com/kms) (Key Management Service)\n* [Google Cloud KMS](https://cloud.google.com/security-key-management)\n\n## Least-privilege principle\n\nStoring a secret in a hardcoded manner can be made safer, by making sure the secret grants the least amount of privilege as needed by the application.\nFor example - if the application needs to read a specific table from a specific database, and the secret grants access to perform this operation **only** (meaning - no access to other tables, no write access at all) then the damage from any secret leaks is mitigated.\nThat being said, it is still not recommended to store secrets in a hardcoded manner, since this type of storage does not offer any way to revoke or moderate the usage of the secret.\n", + "markdown": "Storing hardcoded secrets in your source code or binary artifact could lead to several risks.\n\nIf the secret is associated with a wide scope of privileges, attackers could extract it from the source code or binary artifact and use it maliciously to attack many targets. For example, if the hardcoded password gives high-privilege access to an AWS account, the attackers may be able to query/modify company-wide sensitive data without per-user authentication.\n\n## Best practices\n\nUse safe storage when storing high-privilege secrets such as passwords and tokens, for example -\n\n* ### Environment Variables\n\nEnvironment variables are set outside of the application code, and can be dynamically passed to the application only when needed, for example -\n`SECRET_VAR=MySecret ./my_application`\nThis way, `MySecret` does not have to be hardcoded into `my_application`.\n\nNote that if your entire binary artifact is published (ex. a Docker container published to Docker Hub), the value for the environment variable must not be stored in the artifact itself (ex. inside the `Dockerfile` or one of the container's files) but rather must be passed dynamically, for example in the `docker run` call as an argument.\n\n* ### Secret management services\n\nExternal vendors offer cloud-based secret management services, that provide proper access control to each secret. The given access to each secret can be dynamically modified or even revoked. Some examples include -\n\n* [Hashicorp Vault](https://www.vaultproject.io)\n* [AWS KMS](https://aws.amazon.com/kms) (Key Management Service)\n* [Google Cloud KMS](https://cloud.google.com/security-key-management)\n\n## Least-privilege principle\n\nStoring a secret in a hardcoded manner can be made safer, by making sure the secret grants the least amount of privilege as needed by the application.\nFor example - if the application needs to read a specific table from a specific database, and the secret grants access to perform this operation **only** (meaning - no access to other tables, no write access at all) then the damage from any secret leaks is mitigated.\nThat being said, it is still not recommended to store secrets in a hardcoded manner, since this type of storage does not offer any way to revoke or moderate the usage of the secret.\n" + }, + "properties": { + "applicability": "undetermined", + "conclusion": "private", + "scanner_id": null + } + } + ], + "version": "1.0" + } + }, + "invocations": [ + { + "arguments": [ + "/Users/assafa/.jfrog/dependencies/analyzerManager/jas_scanner/jas_scanner", + "scan", + "/var/folders/xv/th4cksxn7jv9wjrdnn1h4tj00000gq/T/jfrog.cli.temp.-1747638540-1087327516/Secrets_1747638541/config.yaml" + ], + "executionSuccessful": true, + "workingDirectory": { + "uri": "file:////var/folders/xv/th4cksxn7jv9wjrdnn1h4tj00000gq/T/jfrog.cli.temp.-1747638503-4092437536" + } + } + ], + "results": [ + { + "properties": { + "metadata": "", + "tokenValidation": "" + }, + "ruleId": "REQ.SECRET.GENERIC.TEXT", + "level": "error", + "message": { + "text": "Hardcoded secrets were found" + }, + "locations": [ + { + "physicalLocation": { + "artifactLocation": { + "uri": "file:///private/var/folders/xv/th4cksxn7jv9wjrdnn1h4tj00000gq/T/jfrog.cli.temp.-1747638503-4092437536/TOKENS" + }, + "region": { + "startLine": 1, + "startColumn": 1, + "endLine": 1, + "endColumn": 30, + "snippet": { + "text": "password: jnvkjcxnjvxnvk22222" + } + } + } + } + ], + "partialFingerprints": { + "jfrogSecret": "085de62ad4aa0dc22cf7d733811687c08b5517c4414326723e05e75a822ee58d" + } + } + ] + } + ] +} diff --git a/utils/parallel_runner.go b/utils/parallel_runner.go index a8435cb8e..f9970661b 100644 --- a/utils/parallel_runner.go +++ b/utils/parallel_runner.go @@ -2,6 +2,7 @@ package utils import ( "github.com/jfrog/gofrog/parallel" + "github.com/jfrog/jfrog-client-go/utils/log" "sync" ) @@ -15,6 +16,18 @@ type SecurityParallelRunner struct { onScanEndFunc func() } +// WrapTaskWithLoggerPropagation wraps a parallel task to propagate the current goroutine's logger +// to worker goroutines. This is needed when using BufferedLogger for isolated parallel logging - +// worker goroutines need to inherit the parent's logger so their logs are captured in the same buffer. +func WrapTaskWithLoggerPropagation(task parallel.TaskFunc) parallel.TaskFunc { + currentLogger := log.GetLogger() + return func(threadId int) error { + log.SetLoggerForGoroutine(currentLogger) + defer log.ClearLoggerForGoroutine() + return task(threadId) + } +} + func NewSecurityParallelRunner(numOfParallelScans int) SecurityParallelRunner { return SecurityParallelRunner{Runner: parallel.NewRunner(numOfParallelScans, 20000, false)} } diff --git a/utils/results/diff.go b/utils/results/diff.go new file mode 100644 index 000000000..7cc7212e8 --- /dev/null +++ b/utils/results/diff.go @@ -0,0 +1,252 @@ +package results + +import ( + "github.com/jfrog/jfrog-cli-security/utils/formats/sarifutils" + "github.com/jfrog/jfrog-cli-security/utils/jasutils" + "github.com/jfrog/jfrog-client-go/utils/log" + "github.com/owenrumney/go-sarif/v3/pkg/report/v210/sarif" +) + +// MergeScaAndJasResults merges SCA results with JAS diff results into a single SecurityCommandResults. +// SCA results provide the base (including ScaResults and GitContext), JAS results provide the JAS findings. +func MergeScaAndJasResults(scaResults, jasDiffResults *SecurityCommandResults) *SecurityCommandResults { + unifiedResults := &SecurityCommandResults{ + ResultsMetaData: jasDiffResults.ResultsMetaData, + } + // Prefer SCA's GitContext (contains PR upload path info) + if scaResults.GitContext != nil { + unifiedResults.GitContext = scaResults.GitContext + } + + for _, scaTarget := range scaResults.Targets { + var jasTarget *TargetResults + for _, jTarget := range jasDiffResults.Targets { + if jTarget.Target == scaTarget.Target { + jasTarget = jTarget + break + } + } + + unifiedTarget := &TargetResults{ + ScanTarget: scaTarget.ScanTarget, + AppsConfigModule: scaTarget.AppsConfigModule, + ScaResults: scaTarget.ScaResults, + JasResults: scaTarget.JasResults, + } + + if jasTarget != nil && jasTarget.JasResults != nil { + if unifiedTarget.JasResults == nil { + unifiedTarget.JasResults = jasTarget.JasResults + } else { + unifiedTarget.JasResults.JasVulnerabilities.SecretsScanResults = jasTarget.JasResults.JasVulnerabilities.SecretsScanResults + unifiedTarget.JasResults.JasVulnerabilities.IacScanResults = jasTarget.JasResults.JasVulnerabilities.IacScanResults + unifiedTarget.JasResults.JasVulnerabilities.SastScanResults = jasTarget.JasResults.JasVulnerabilities.SastScanResults + unifiedTarget.JasResults.JasViolations.SecretsScanResults = jasTarget.JasResults.JasViolations.SecretsScanResults + unifiedTarget.JasResults.JasViolations.IacScanResults = jasTarget.JasResults.JasViolations.IacScanResults + unifiedTarget.JasResults.JasViolations.SastScanResults = jasTarget.JasResults.JasViolations.SastScanResults + } + } + + unifiedResults.Targets = append(unifiedResults.Targets, unifiedTarget) + } + + return unifiedResults +} + +// CompareJasResults computes the diff between target and source JAS results. +// Returns only NEW findings in source that don't exist in target. +func CompareJasResults(targetResults, sourceResults *SecurityCommandResults) *SecurityCommandResults { + log.Info("[DIFF] Starting JAS diff calculation") + log.Debug("[DIFF] Comparing", len(sourceResults.Targets), "source targets against", len(targetResults.Targets), "target targets") + + diffResults := &SecurityCommandResults{ + ResultsMetaData: sourceResults.ResultsMetaData, + } + + for _, sourceTarget := range sourceResults.Targets { + if sourceTarget.JasResults == nil { + continue + } + + var allTargetJasResults []*JasScansResults + for _, targetTarget := range targetResults.Targets { + if targetTarget.JasResults != nil { + allTargetJasResults = append(allTargetJasResults, targetTarget.JasResults) + } + } + + diffJasResults := filterExistingFindings(allTargetJasResults, sourceTarget.JasResults) + + diffTarget := &TargetResults{ + ScanTarget: sourceTarget.ScanTarget, + JasResults: diffJasResults, + } + + diffResults.Targets = append(diffResults.Targets, diffTarget) + } + + return diffResults +} + +// filterExistingFindings removes findings from source that already exist in target. +func filterExistingFindings(allTargetJasResults []*JasScansResults, sourceJasResults *JasScansResults) *JasScansResults { + if sourceJasResults == nil { + return nil + } + + if len(allTargetJasResults) == 0 { + return sourceJasResults + } + + targetKeys := make(map[string]bool) + + for _, targetJasResults := range allTargetJasResults { + if targetJasResults == nil { + continue + } + + for _, targetRun := range targetJasResults.GetVulnerabilitiesResults(jasutils.Secrets) { + extractLocationsOnly(targetRun, targetKeys) + } + for _, targetRun := range targetJasResults.GetViolationsResults(jasutils.Secrets) { + extractLocationsOnly(targetRun, targetKeys) + } + for _, targetRun := range targetJasResults.GetVulnerabilitiesResults(jasutils.IaC) { + extractLocationsOnly(targetRun, targetKeys) + } + for _, targetRun := range targetJasResults.GetViolationsResults(jasutils.IaC) { + extractLocationsOnly(targetRun, targetKeys) + } + for _, targetRun := range targetJasResults.GetVulnerabilitiesResults(jasutils.Sast) { + extractFingerprints(targetRun, targetKeys) + } + for _, targetRun := range targetJasResults.GetViolationsResults(jasutils.Sast) { + extractFingerprints(targetRun, targetKeys) + } + } + + sourceSecrets := countSarifResults(sourceJasResults.JasVulnerabilities.SecretsScanResults) + + countSarifResults(sourceJasResults.JasViolations.SecretsScanResults) + sourceIac := countSarifResults(sourceJasResults.JasVulnerabilities.IacScanResults) + + countSarifResults(sourceJasResults.JasViolations.IacScanResults) + sourceSast := countSarifResults(sourceJasResults.JasVulnerabilities.SastScanResults) + + countSarifResults(sourceJasResults.JasViolations.SastScanResults) + + log.Debug("[DIFF] Source findings before diff - Secrets:", sourceSecrets, "| IaC:", sourceIac, "| SAST:", sourceSast) + + filteredJasResults := &JasScansResults{} + + filteredJasResults.JasVulnerabilities.SecretsScanResults = filterNewSarifFindings( + sourceJasResults.JasVulnerabilities.SecretsScanResults, targetKeys) + filteredJasResults.JasVulnerabilities.IacScanResults = filterNewSarifFindings( + sourceJasResults.JasVulnerabilities.IacScanResults, targetKeys) + filteredJasResults.JasVulnerabilities.SastScanResults = filterNewSarifFindings( + sourceJasResults.JasVulnerabilities.SastScanResults, targetKeys) + + filteredJasResults.JasViolations.SecretsScanResults = filterNewSarifFindings( + sourceJasResults.JasViolations.SecretsScanResults, targetKeys) + filteredJasResults.JasViolations.IacScanResults = filterNewSarifFindings( + sourceJasResults.JasViolations.IacScanResults, targetKeys) + filteredJasResults.JasViolations.SastScanResults = filterNewSarifFindings( + sourceJasResults.JasViolations.SastScanResults, targetKeys) + + diffSecrets := countSarifResults(filteredJasResults.JasVulnerabilities.SecretsScanResults) + + countSarifResults(filteredJasResults.JasViolations.SecretsScanResults) + diffIac := countSarifResults(filteredJasResults.JasVulnerabilities.IacScanResults) + + countSarifResults(filteredJasResults.JasViolations.IacScanResults) + diffSast := countSarifResults(filteredJasResults.JasVulnerabilities.SastScanResults) + + countSarifResults(filteredJasResults.JasViolations.SastScanResults) + + log.Info("[DIFF] New findings after diff - Secrets:", diffSecrets, "| IaC:", diffIac, "| SAST:", diffSast) + log.Info("[DIFF] Filtered out - Secrets:", sourceSecrets-diffSecrets, "| IaC:", sourceIac-diffIac, "| SAST:", sourceSast-diffSast) + + return filteredJasResults +} + +func countSarifResults(runs []*sarif.Run) int { + count := 0 + for _, run := range runs { + if run != nil { + count += len(run.Results) + } + } + return count +} + +func extractFingerprints(run *sarif.Run, targetKeys map[string]bool) { + for _, result := range run.Results { + if sarifutils.IsFingerprintsExists(result) { + key := getSastFingerprint(result) + if key != "" { + targetKeys[key] = true + } + } else { + for _, location := range result.Locations { + key := sarifutils.GetRelativeLocationFileName(location, run.Invocations) + sarifutils.GetLocationSnippetText(location) + targetKeys[key] = true + } + } + } +} + +func extractLocationsOnly(run *sarif.Run, targetKeys map[string]bool) { + for _, result := range run.Results { + for _, location := range result.Locations { + key := sarifutils.GetRelativeLocationFileName(location, run.Invocations) + sarifutils.GetLocationSnippetText(location) + targetKeys[key] = true + } + } +} + +// getSastFingerprint extracts the SAST fingerprint used for diff matching. +// Note: Uses "precise_sink_and_sink_function" key (from Analyzer Manager for diff purposes), +// which differs from jasutils.SastFingerprintKey ("significant_full_path") used elsewhere. +func getSastFingerprint(result *sarif.Result) string { + if result.Fingerprints != nil { + if value, ok := result.Fingerprints["precise_sink_and_sink_function"]; ok { + return value + } + } + return "" +} + +// filterNewSarifFindings removes findings from sourceRuns that already exist in targetKeys. +// For SAST results with fingerprints, matches by fingerprint. +// For Secrets/IaC results, matches by file location + snippet text. +func filterNewSarifFindings(sourceRuns []*sarif.Run, targetKeys map[string]bool) []*sarif.Run { + var filteredRuns []*sarif.Run + + for _, run := range sourceRuns { + var filteredResults []*sarif.Result + + for _, result := range run.Results { + if sarifutils.IsFingerprintsExists(result) { + if !targetKeys[getSastFingerprint(result)] { + filteredResults = append(filteredResults, result) + } + } else { + var filteredLocations []*sarif.Location + for _, location := range result.Locations { + key := sarifutils.GetRelativeLocationFileName(location, run.Invocations) + sarifutils.GetLocationSnippetText(location) + if !targetKeys[key] { + filteredLocations = append(filteredLocations, location) + } + } + + if len(filteredLocations) > 0 { + newResult := *result + newResult.Locations = filteredLocations + filteredResults = append(filteredResults, &newResult) + } + } + } + + if len(filteredResults) > 0 { + filteredRun := *run + filteredRun.Results = filteredResults + filteredRuns = append(filteredRuns, &filteredRun) + } + } + + return filteredRuns +} diff --git a/utils/results/diff_test.go b/utils/results/diff_test.go new file mode 100644 index 000000000..e0f103de4 --- /dev/null +++ b/utils/results/diff_test.go @@ -0,0 +1,519 @@ +package results + +import ( + "os" + "path/filepath" + "testing" + + "github.com/jfrog/jfrog-cli-security/utils/formats/sarifutils" + "github.com/owenrumney/go-sarif/v3/pkg/report/v210/sarif" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func strPtr(s string) *string { + return &s +} + +func TestFilterSarifRuns_LocationBased(t *testing.T) { + testCases := []struct { + name string + targetRuns []*sarif.Run + sourceRuns []*sarif.Run + expectedCount int + expectedFiles []string + }{ + { + name: "new issues in source - empty target", + targetRuns: []*sarif.Run{{Results: []*sarif.Result{}}}, + sourceRuns: []*sarif.Run{ + { + Results: []*sarif.Result{ + { + Locations: []*sarif.Location{ + {PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{URI: strPtr("file1.js")}, + }}, + }, + }, + }, + }, + }, + expectedCount: 1, + expectedFiles: []string{"file1.js"}, + }, + { + name: "source has no new issues - same file exists in target", + targetRuns: []*sarif.Run{ + { + Results: []*sarif.Result{ + { + Locations: []*sarif.Location{ + {PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{URI: strPtr("file1.js")}, + }}, + }, + }, + }, + }, + }, + sourceRuns: []*sarif.Run{ + { + Results: []*sarif.Result{ + { + Locations: []*sarif.Location{ + {PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{URI: strPtr("file1.js")}, + }}, + }, + }, + }, + }, + }, + expectedCount: 0, + expectedFiles: []string{}, + }, + { + name: "multiple issues - partial match", + targetRuns: []*sarif.Run{ + { + Results: []*sarif.Result{ + { + Locations: []*sarif.Location{ + {PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{URI: strPtr("file1.js")}, + }}, + }, + }, + }, + }, + }, + sourceRuns: []*sarif.Run{ + { + Results: []*sarif.Result{ + { + Locations: []*sarif.Location{ + {PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{URI: strPtr("file2.js")}, + }}, + {PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{URI: strPtr("file1.js")}, + }}, + }, + }, + }, + }, + }, + expectedCount: 1, + expectedFiles: []string{"file2.js"}, + }, + { + name: "issue removed in source", + targetRuns: []*sarif.Run{ + { + Results: []*sarif.Result{ + { + Locations: []*sarif.Location{ + {PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{URI: strPtr("file1.js")}, + }}, + }, + }, + }, + }, + }, + sourceRuns: []*sarif.Run{{Results: []*sarif.Result{}}}, + expectedCount: 0, + expectedFiles: []string{}, + }, + { + name: "empty source and target", + targetRuns: []*sarif.Run{{Results: []*sarif.Result{}}}, + sourceRuns: []*sarif.Run{{Results: []*sarif.Result{}}}, + expectedCount: 0, + expectedFiles: []string{}, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + // Build target keys from target runs + targetKeys := make(map[string]bool) + for _, run := range tc.targetRuns { + extractLocationsOnly(run, targetKeys) + } + + // Filter source runs + filteredRuns := filterNewSarifFindings(tc.sourceRuns, targetKeys) + + // Count results + resultCount := countSarifResults(filteredRuns) + assert.Equal(t, tc.expectedCount, resultCount) + + // Verify expected files + var foundFiles []string + for _, run := range filteredRuns { + for _, result := range run.Results { + for _, loc := range result.Locations { + if loc.PhysicalLocation != nil && loc.PhysicalLocation.ArtifactLocation != nil && loc.PhysicalLocation.ArtifactLocation.URI != nil { + foundFiles = append(foundFiles, *loc.PhysicalLocation.ArtifactLocation.URI) + } + } + } + } + assert.ElementsMatch(t, tc.expectedFiles, foundFiles) + }) + } +} + +func TestFilterSarifRuns_FingerprintBased(t *testing.T) { + testCases := []struct { + name string + targetRuns []*sarif.Run + sourceRuns []*sarif.Run + expectedCount int + }{ + { + name: "new issue with fingerprint - empty target", + targetRuns: []*sarif.Run{{Results: []*sarif.Result{{}}}}, + sourceRuns: []*sarif.Run{ + { + Results: []*sarif.Result{ + { + Fingerprints: map[string]string{ + "precise_sink_and_sink_function": "fingerprint2", + }, + }, + }, + }, + }, + expectedCount: 1, + }, + { + name: "no new issues - same fingerprint exists", + targetRuns: []*sarif.Run{ + { + Results: []*sarif.Result{ + { + Fingerprints: map[string]string{ + "precise_sink_and_sink_function": "fingerprint1", + }, + Locations: []*sarif.Location{ + {PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{URI: strPtr("file1.js")}, + }}, + }, + }, + }, + }, + }, + sourceRuns: []*sarif.Run{ + { + Results: []*sarif.Result{ + { + Fingerprints: map[string]string{ + "precise_sink_and_sink_function": "fingerprint1", + }, + Locations: []*sarif.Location{ + {PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{URI: strPtr("file2.js")}, + }}, + }, + }, + }, + }, + }, + expectedCount: 0, + }, + { + name: "issue removed - fingerprint based", + targetRuns: []*sarif.Run{ + { + Results: []*sarif.Result{ + { + Fingerprints: map[string]string{ + "precise_sink_and_sink_function": "fingerprint2", + }, + }, + }, + }, + }, + sourceRuns: []*sarif.Run{{Results: []*sarif.Result{}}}, + expectedCount: 0, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + // Build target keys from target runs using fingerprints + targetKeys := make(map[string]bool) + for _, run := range tc.targetRuns { + extractFingerprints(run, targetKeys) + } + + // Filter source runs + filteredRuns := filterNewSarifFindings(tc.sourceRuns, targetKeys) + + // Count results + resultCount := countSarifResults(filteredRuns) + assert.Equal(t, tc.expectedCount, resultCount) + }) + } +} + +func TestFilterSarifRuns_WithSnippets(t *testing.T) { + testCases := []struct { + name string + targetRuns []*sarif.Run + sourceRuns []*sarif.Run + expectedCount int + }{ + { + name: "same file different snippet - should be new", + targetRuns: []*sarif.Run{ + { + Results: []*sarif.Result{ + { + Locations: []*sarif.Location{ + {PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{URI: strPtr("file1.js")}, + Region: &sarif.Region{ + Snippet: &sarif.ArtifactContent{Text: strPtr("password = 'secret1'")}, + }, + }}, + }, + }, + }, + }, + }, + sourceRuns: []*sarif.Run{ + { + Results: []*sarif.Result{ + { + Locations: []*sarif.Location{ + {PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{URI: strPtr("file1.js")}, + Region: &sarif.Region{ + Snippet: &sarif.ArtifactContent{Text: strPtr("password = 'secret2'")}, + }, + }}, + }, + }, + }, + }, + }, + expectedCount: 1, + }, + { + name: "same file same snippet - should be filtered", + targetRuns: []*sarif.Run{ + { + Results: []*sarif.Result{ + { + Locations: []*sarif.Location{ + {PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{URI: strPtr("file1.js")}, + Region: &sarif.Region{ + Snippet: &sarif.ArtifactContent{Text: strPtr("password = 'secret1'")}, + }, + }}, + }, + }, + }, + }, + }, + sourceRuns: []*sarif.Run{ + { + Results: []*sarif.Result{ + { + Locations: []*sarif.Location{ + {PhysicalLocation: &sarif.PhysicalLocation{ + ArtifactLocation: &sarif.ArtifactLocation{URI: strPtr("file1.js")}, + Region: &sarif.Region{ + Snippet: &sarif.ArtifactContent{Text: strPtr("password = 'secret1'")}, + }, + }}, + }, + }, + }, + }, + }, + expectedCount: 0, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + targetKeys := make(map[string]bool) + for _, run := range tc.targetRuns { + extractLocationsOnly(run, targetKeys) + } + + filteredRuns := filterNewSarifFindings(tc.sourceRuns, targetKeys) + resultCount := countSarifResults(filteredRuns) + assert.Equal(t, tc.expectedCount, resultCount) + }) + } +} + +// Note: Tests for extractRelativePath, getLocationSnippetText, getLocationFileName, and +// getInvocationWorkingDirectory have been removed as these now use sarifutils functions. + +func TestGetSastFingerprint(t *testing.T) { + testCases := []struct { + name string + result *sarif.Result + expected string + }{ + { + name: "has fingerprint", + result: &sarif.Result{ + Fingerprints: map[string]string{ + "precise_sink_and_sink_function": "test-fingerprint-123", + }, + }, + expected: "test-fingerprint-123", + }, + { + name: "no fingerprint key", + result: &sarif.Result{ + Fingerprints: map[string]string{ + "other_key": "some-value", + }, + }, + expected: "", + }, + { + name: "nil fingerprints", + result: &sarif.Result{}, + expected: "", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result := getSastFingerprint(tc.result) + assert.Equal(t, tc.expected, result) + }) + } +} + +func TestCountSarifResults(t *testing.T) { + testCases := []struct { + name string + runs []*sarif.Run + expected int + }{ + { + name: "nil runs", + runs: nil, + expected: 0, + }, + { + name: "empty runs", + runs: []*sarif.Run{}, + expected: 0, + }, + { + name: "single run with results", + runs: []*sarif.Run{ + {Results: []*sarif.Result{{}, {}, {}}}, + }, + expected: 3, + }, + { + name: "multiple runs", + runs: []*sarif.Run{ + {Results: []*sarif.Result{{}, {}}}, + {Results: []*sarif.Result{{}}}, + }, + expected: 3, + }, + { + name: "run with nil", + runs: []*sarif.Run{ + nil, + {Results: []*sarif.Result{{}}}, + }, + expected: 1, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result := countSarifResults(tc.runs) + assert.Equal(t, tc.expected, result) + }) + } +} + +// Integration test using real SARIF files from analyzer-manager. +// Note: The test files have different working directories (temp folders), +// so without normalizing paths the diff will show 1 "new" finding. +// This test verifies the SARIF parsing and filtering logic works correctly. +func TestFilterSarifRuns_RealSecretsData(t *testing.T) { + testDataDir := filepath.Join("..", "..", "tests", "testdata", "other", "diff-scan") + + targetSarifBytes, err := os.ReadFile(filepath.Join(testDataDir, "target.sarif")) + require.NoError(t, err, "Failed to read target.sarif") + + sourceSarifBytes, err := os.ReadFile(filepath.Join(testDataDir, "results.sarif")) + require.NoError(t, err, "Failed to read results.sarif (source)") + + targetReport, err := sarif.FromBytes(targetSarifBytes) + require.NoError(t, err, "Failed to parse target SARIF") + + sourceReport, err := sarif.FromBytes(sourceSarifBytes) + require.NoError(t, err, "Failed to parse source SARIF") + + require.NotEmpty(t, targetReport.Runs, "Target should have runs") + require.NotEmpty(t, sourceReport.Runs, "Source should have runs") + + // Verify both files contain the same secret content (snippet) + targetSnippet := sarifutils.GetLocationSnippetText(targetReport.Runs[0].Results[0].Locations[0]) + sourceSnippet := sarifutils.GetLocationSnippetText(sourceReport.Runs[0].Results[0].Locations[0]) + assert.Equal(t, targetSnippet, sourceSnippet, "Both files should have the same secret snippet") + assert.Equal(t, "password: jnvkjcxnjvxnvk22222", targetSnippet) + + // Build target keys using filename+snippet (this matches same secrets even with different paths) + targetKeys := make(map[string]bool) + for _, run := range targetReport.Runs { + for _, result := range run.Results { + for _, location := range result.Locations { + // Use just filename (last path component) + snippet for matching + fileName := sarifutils.GetLocationFileName(location) + if fileName != "" { + fileName = filepath.Base(fileName) + } + key := fileName + sarifutils.GetLocationSnippetText(location) + targetKeys[key] = true + } + } + } + + // Filter source using same key generation + var filteredResults []*sarif.Result + for _, run := range sourceReport.Runs { + for _, result := range run.Results { + var filteredLocations []*sarif.Location + for _, location := range result.Locations { + fileName := sarifutils.GetLocationFileName(location) + if fileName != "" { + fileName = filepath.Base(fileName) + } + key := fileName + sarifutils.GetLocationSnippetText(location) + if !targetKeys[key] { + filteredLocations = append(filteredLocations, location) + } + } + if len(filteredLocations) > 0 { + newResult := *result + newResult.Locations = filteredLocations + filteredResults = append(filteredResults, &newResult) + } + } + } + + // Same file (TOKENS) with same snippet should result in 0 new findings + assert.Equal(t, 0, len(filteredResults), "Same secrets should be filtered out") +}