From f9c548b6eef60441ef83b0af03cffbb61c47dda3 Mon Sep 17 00:00:00 2001 From: "dylan.wang" Date: Wed, 6 May 2026 22:43:12 +0800 Subject: [PATCH] chore: apply working tree changes --- .gitignore | 2 + CONTRIBUTING.md | 12 + LICENSE | 173 +++++ NOTICE | 7 + README.md | 160 +++-- SECURITY.md | 7 + cmd/futrix-audit-verify/main.go | 30 + cmd/futrix-evidence-verify/main.go | 30 + docs/agent-protocol.md | 72 +++ docs/assets/futrix-security-package-flow.svg | 57 ++ docs/assurance-matrix.md | 21 + docs/audit-chain.md | 63 ++ docs/evidence-bundle.md | 26 + docs/masking.md | 38 ++ docs/open-source-scope.md | 97 +++ docs/production-consistency.md | 62 ++ docs/risk-engine.md | 57 ++ docs/threat-model.md | 44 ++ examples/audit-log/valid.jsonl | 3 + examples/product-export/README.md | 18 + .../product-export/approval-response.json | 23 + examples/product-export/audit-log.jsonl | 3 + .../product-export/masked-query-result.json | 15 + .../product-export/risk-block-response.json | 19 + examples/risk-rules/sql-basic.json | 17 + go.mod | 3 + pkg/auditchain/auditchain.go | 295 +++++++++ pkg/auditchain/auditchain_test.go | 64 ++ pkg/evidence/evidence.go | 203 ++++++ pkg/evidence/evidence_test.go | 13 + pkg/masking/masking.go | 311 +++++++++ pkg/masking/masking_test.go | 52 ++ pkg/protocol/protocol.go | 143 +++++ pkg/protocol/protocol_test.go | 20 + pkg/riskengine/riskengine.go | 602 ++++++++++++++++++ pkg/riskengine/riskengine_test.go | 40 ++ release-verification/verify-checksums.sh | 15 + 37 files changed, 2771 insertions(+), 46 deletions(-) create mode 100644 CONTRIBUTING.md create mode 100644 LICENSE create mode 100644 NOTICE create mode 100644 SECURITY.md create mode 100644 cmd/futrix-audit-verify/main.go create mode 100644 cmd/futrix-evidence-verify/main.go create mode 100644 docs/agent-protocol.md create mode 100644 docs/assets/futrix-security-package-flow.svg create mode 100644 docs/assurance-matrix.md create mode 100644 docs/audit-chain.md create mode 100644 docs/evidence-bundle.md create mode 100644 docs/masking.md create mode 100644 docs/open-source-scope.md create mode 100644 docs/production-consistency.md create mode 100644 docs/risk-engine.md create mode 100644 docs/threat-model.md create mode 100644 examples/audit-log/valid.jsonl create mode 100644 examples/product-export/README.md create mode 100644 examples/product-export/approval-response.json create mode 100644 examples/product-export/audit-log.jsonl create mode 100644 examples/product-export/masked-query-result.json create mode 100644 examples/product-export/risk-block-response.json create mode 100644 examples/risk-rules/sql-basic.json create mode 100644 go.mod create mode 100644 pkg/auditchain/auditchain.go create mode 100644 pkg/auditchain/auditchain_test.go create mode 100644 pkg/evidence/evidence.go create mode 100644 pkg/evidence/evidence_test.go create mode 100644 pkg/masking/masking.go create mode 100644 pkg/masking/masking_test.go create mode 100644 pkg/protocol/protocol.go create mode 100644 pkg/protocol/protocol_test.go create mode 100644 pkg/riskengine/riskengine.go create mode 100644 pkg/riskengine/riskengine_test.go create mode 100644 release-verification/verify-checksums.sh diff --git a/.gitignore b/.gitignore index b4dd53c..7ac3cf9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ dist/ artifacts/ +release-assets/ +.worktrees/ .DS_Store diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..bcfaefc --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,12 @@ +# Contributing + +This repository accepts focused improvements to public specs, examples, test vectors, and verifier packages. + +Good contributions: + +- clarify a documented security limit; +- add a small test vector; +- improve verifier error reporting; +- fix a portable rule or masking edge case. + +Please avoid contributions that add product-specific desktop, Enterprise, billing, signing, deployment, or credential-management code. Those areas are intentionally outside this public package. diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..d2524de --- /dev/null +++ b/LICENSE @@ -0,0 +1,173 @@ +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, +and distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by +the copyright owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all +other entities that control, are controlled by, or are under common +control with that entity. For the purposes of this definition, +"control" means (i) the power, direct or indirect, to cause the +direction or management of such entity, whether by contract or +otherwise, or (ii) ownership of fifty percent (50%) or more of the +outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity +exercising permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, +including but not limited to software source code, documentation +source, and configuration files. + +"Object" form shall mean any form resulting from mechanical +transformation or translation of a Source form, including but +not limited to compiled object code, generated documentation, +and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or +Object form, made available under the License, as indicated by a +copyright notice that is included in or attached to the work. + +"Derivative Works" shall mean any work, whether in Source or Object +form, that is based on (or derived from) the Work and for which the +editorial revisions, annotations, elaborations, or other modifications +represent, as a whole, an original work of authorship. For the purposes +of this License, Derivative Works shall not include works that remain +separable from, or merely link (or bind by name) to the interfaces of, +the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including +the original version of the Work and any modifications or additions +to that Work or Derivative Works thereof, that is intentionally +submitted to Licensor for inclusion in the Work by the copyright owner +or by an individual or Legal Entity authorized to submit on behalf of +the copyright owner. For the purposes of this definition, "submitted" +means any form of electronic, verbal, or written communication sent +to the Licensor or its representatives, including but not limited to +communication on electronic mailing lists, source code control systems, +and issue tracking systems that are managed by, or on behalf of, the +Licensor for the purpose of discussing and improving the Work, but +excluding communication that is conspicuously marked or otherwise +designated in writing by the copyright owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity +on behalf of whom a Contribution has been received by Licensor and +subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of +this License, each Contributor hereby grants to You a perpetual, +worldwide, non-exclusive, no-charge, royalty-free, irrevocable +copyright license to reproduce, prepare Derivative Works of, +publicly display, publicly perform, sublicense, and distribute the +Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of +this License, each Contributor hereby grants to You a perpetual, +worldwide, non-exclusive, no-charge, royalty-free, irrevocable +(except as stated in this section) patent license to make, have made, +use, offer to sell, sell, import, and otherwise transfer the Work, +where such license applies only to those patent claims licensable +by such Contributor that are necessarily infringed by their +Contribution(s) alone or by combination of their Contribution(s) +with the Work to which such Contribution(s) was submitted. If You +institute patent litigation against any entity alleging that the Work +or a Contribution incorporated within the Work constitutes direct +or contributory patent infringement, then any patent licenses +granted to You under this License for that Work shall terminate as +of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the +Work or Derivative Works thereof in any medium, with or without +modifications, and in Source or Object form, provided that You +meet the following conditions: + +(a) You must give any other recipients of the Work or Derivative +Works a copy of this License; and + +(b) You must cause any modified files to carry prominent notices +stating that You changed the files; and + +(c) You must retain, in the Source form of any Derivative Works +that You distribute, all copyright, patent, trademark, and +attribution notices from the Source form of the Work, excluding +those notices that do not pertain to any part of the Derivative +Works; and + +(d) If the Work includes a "NOTICE" text file as part of its +distribution, then any Derivative Works that You distribute must +include a readable copy of the attribution notices contained +within such NOTICE file, excluding those notices that do not +pertain to any part of the Derivative Works, in at least one +of the following places: within a NOTICE text file distributed +as part of the Derivative Works; within the Source form or +documentation, if provided along with the Derivative Works; or, +within a display generated by the Derivative Works, if and +wherever such third-party notices normally appear. The contents +of the NOTICE file are for informational purposes only and +do not modify the License. You may add Your own attribution +notices within Derivative Works that You distribute, alongside +or as an addendum to the NOTICE text from the Work, provided +that such additional attribution notices cannot be construed +as modifying the License. + +You may add Your own copyright statement to Your modifications and +may provide additional or different license terms and conditions +for use, reproduction, or distribution of Your modifications, or +for any such Derivative Works as a whole, provided Your use, +reproduction, and distribution of the Work otherwise complies with +the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, +any Contribution intentionally submitted for inclusion in the Work +by You to the Licensor shall be under the terms and conditions of +this License, without any additional terms or conditions. +Notwithstanding the above, nothing herein shall supersede or modify +the terms of any separate license agreement you may have executed +with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade +names, trademarks, service marks, or product names of the Licensor, +except as required for reasonable and customary use in describing the +origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or +agreed to in writing, Licensor provides the Work on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +implied, including, without limitation, any warranties or conditions +of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A +PARTICULAR PURPOSE. You are solely responsible for determining the +appropriateness of using or redistributing the Work and assume any +risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, +whether in tort (including negligence), contract, or otherwise, +unless required by applicable law (such as deliberate and grossly +negligent acts) or agreed to in writing, shall any Contributor be +liable to You for damages, including any direct, indirect, special, +incidental, or consequential damages of any character arising as a +result of this License or out of the use or inability to use the +Work (including but not limited to damages for loss of goodwill, +work stoppage, computer failure or malfunction, or any and all +other commercial damages or losses), even if such Contributor +has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing +the Work or Derivative Works thereof, You may choose to offer, +and charge a fee for, acceptance of support, warranty, indemnity, +or other liability obligations and/or rights consistent with this +License. However, in accepting such obligations, You may act only +on Your own behalf and on Your sole responsibility, not on behalf +of any other Contributor, and only if You agree to indemnify, +defend, and hold each Contributor harmless for any liability +incurred by, or claims asserted against, such Contributor by reason +of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS diff --git a/NOTICE b/NOTICE new file mode 100644 index 0000000..fea1a81 --- /dev/null +++ b/NOTICE @@ -0,0 +1,7 @@ +FutrixData Security Package +Copyright 2026 FutrixData + +This repository contains public security specifications, verification +utilities, and small reusable packages extracted from FutrixData. It does not +contain the proprietary FutrixData desktop app, enterprise server, licensing, +billing, account, signing, or release infrastructure. diff --git a/README.md b/README.md index cca8eab..958c797 100644 --- a/README.md +++ b/README.md @@ -1,74 +1,142 @@ -# FutrixData +

+ FutrixData security package assurance flow +

-**The Data Gateway Your AI Agents Trust** +

FutrixData Security Package

-FutrixData is a desktop app for teams who want to use AI with real databases without handing raw credentials to an LLM. It sits between your AI agents and your data systems, keeps credentials on your machine, masks sensitive fields, and blocks risky operations before they run. +

+ Public security specifications, verifiers, protocol types, masking code, and a partial risk-engine core for FutrixData. +

-- Website: [futrixdata.com](https://futrixdata.com/) -- Downloads: [Latest Release](https://github.com/FutrixDev/FutrixData/releases/latest) +

+ License: Apache-2.0 + Go 1.23+ + FutrixData docs + Assurance matrix +

-## Why FutrixData +FutrixData is an AI data gateway for teams that want agents to work with real databases without handing raw credentials or unrestricted execution power to the agent. This repository is the **inspectable public security package**: the pieces a security reviewer, procurement team, or integrator can read, run, and compare against FutrixData product behavior during evaluation. -FutrixData is built for a simple goal: let AI help with real data work, while keeping production access safe. +> **Scope:** this repository is Apache-2.0. The FutrixData desktop application and FutrixData Enterprise Edition remain commercial, proprietary products under their own license terms. -### Agents get data, not credentials +## Product References -AI agents connect through MCP or Skill-style integrations and receive query results instead of database passwords, connection strings, or raw tokens. +Start with the official product docs when evaluating what this package supports: -### Risk checks before execution +- [FutrixData product site](https://futrixdata.com/) +- [Technical overview](https://futrixdata.com/doc/futrixdata-technical-overview) +- [Database risk control engine](https://futrixdata.com/doc/database-risk-control-engine) +- [Data sensitivity classification](https://futrixdata.com/doc/data-sensitivity-classification) +- [FutrixData Enterprise Edition](https://futrixdata.com/doc/futrixdata-enterprise-edition) -Every statement goes through the same safety layer before it runs. Destructive changes can be blocked, expensive queries can be flagged, and schema changes can require explicit confirmation. +## Quick Start -### One app for every data source +Run the public verification suite: -Use one desktop console to connect databases, browse schemas, run queries, inspect results, and work with an AI assistant in the same place. +```bash +go test ./... +``` -## What You Can Do +Verify the sanitized product-export evidence bundle: -- Connect MySQL, PostgreSQL, MongoDB, Redis, Elasticsearch, ChromaDB, DynamoDB, Cloudflare D1, and more -- Keep credentials encrypted locally on your machine -- Mask PII and sensitive fields before results reach AI agents -- Use natural language to generate queries and charts -- Review SQL in a visual console with schema browsing and execution history -- Add custom safety rules based on datasource, entity pattern, operation type, row count, or cost +```bash +go run ./cmd/futrix-evidence-verify ./examples/product-export +``` -## Built for Safe AI Data Access +Verify an audit log hash chain: -FutrixData combines three ideas in one product: +```bash +go run ./cmd/futrix-audit-verify ./examples/audit-log/valid.jsonl +``` -1. **AI Agent Data Gateway** - Connect AI tools without exposing secrets. +Verify downloaded release artifacts when a `SHA256SUMS.txt` file is present: -2. **Risk Control Engine** - Analyse queries before execution and stop dangerous operations. +```bash +bash ./release-verification/verify-checksums.sh /path/to/downloads +``` -3. **Unified Management Console** - Work with many data sources from one polished desktop app. +## What You Can Inspect -## Getting Started +| Area | Public path | What it proves | +| --- | --- | --- | +| Audit chain | `pkg/auditchain`, `cmd/futrix-audit-verify` | Local hash-chain audit format and verifier behavior. | +| PII masking | `pkg/masking` | L1-L5 sensitivity model and deterministic `masked:v1:` HMAC output. | +| Partial risk engine | `pkg/riskengine` | Rule model, lightweight parser, matching priority, and allow/warn/approval/block decisions. | +| Agent protocol | `pkg/protocol` | Tool names, response envelopes, approval payloads, errors, audit IDs, and risk attribution. | +| Evidence verifier | `pkg/evidence`, `cmd/futrix-evidence-verify` | End-to-end checks for audit, masking, block, and approval examples. | +| Release verification | `release-verification/verify-checksums.sh` | Checksum validation for published release assets. | -1. Download FutrixData for your platform. -2. Connect your databases in the desktop app. -3. Enable MCP or Skill integration for your AI tool. -4. Query with confidence knowing every request passes through the risk engine. +## Buyer Evaluation Workflow -## Available Platforms +Use this repository as the public part of an Enterprise security review: -- macOS Apple Silicon -- macOS Intel -- Windows 64-bit -- Linux 64-bit +1. Read the [assurance matrix](docs/assurance-matrix.md) to map product claims to code and verification steps. +2. Run `go test ./...` to confirm the public packages compile and pass. +3. Run `go run ./cmd/futrix-evidence-verify ./examples/product-export` to validate the evidence bundle. +4. During POC, ask FutrixData for equivalent exports from a disposable datasource: + - an agent query with masked columns; + - a destructive statement that is blocked; + - a statement held for approval with `riskAttribution`; + - an exported agent audit log that can be checked with `futrix-audit-verify`. -You can always find the latest installers on the [Releases](https://github.com/FutrixDev/FutrixData/releases) page. +## How FutrixData Uses These Concepts -## About This Repository +Agents call FutrixData over MCP, Skill, CLI, or HTTP instead of holding database credentials directly. FutrixData attributes each call to an agent identity, evaluates risk before execution, applies approval gates when needed, masks sensitive fields before agent egress, and records activity in an audit log with a local hash chain. -This repository is the **public release and packaging repository** for FutrixData. +This repository exposes the reviewable contracts behind that flow. The commercial products provide the full runtime: datasource adapters, richer parser integrations, EXPLAIN probes, trust-mode storage, approval routing, daemon behavior, UI, Enterprise deployment, SSO/RBAC, and operational controls. -It is used to: +## Repository Layout -- publish installers and release assets -- track release notes and versioned binaries -- store packaging and distribution automation +```text +cmd/futrix-audit-verify/ Standalone audit-log verifier +cmd/futrix-evidence-verify/ Evidence-bundle verifier CLI +pkg/auditchain/ Local audit hash-chain verifier +pkg/masking/ Deterministic field masking +pkg/riskengine/ Portable risk-engine core +pkg/protocol/ Public agent tool protocol types +pkg/evidence/ Evidence-bundle verifier package +docs/ Specs, assurance matrix, and scope notes +examples/ Audit, risk-rule, and product-export fixtures +release-verification/ Checksum verification helper +``` -For the product overview, screenshots, and download experience, visit [futrixdata.com](https://futrixdata.com/). +## What Is Not Open + +This repository does not include the complete FutrixData product. The following remain proprietary: + +- desktop UI, datasource adapters, and credential storage; +- account, license, billing, and entitlement flows; +- Enterprise deployment, RBAC, SSO, and tenant administration; +- signing, notarization, release credentials, and private build systems. + +The boundary is intentional: the public package supports review and verification of key security claims without making the full commercial product reconstructable from this repository alone. + +## Known Limits + +- **Local audit hash chains are not remote notarization.** They detect changes to the current file, but a fully privileged local attacker can rewrite the file and recompute hashes unless an external anchor is used. +- **Deterministic masking is not anonymization.** It preserves equality for agent analysis, but low-cardinality values remain guessable by enumeration. +- **The public risk engine is a portable subset.** The commercial product adds live datasource execution, EXPLAIN probes, trust modes, approval routing, and Enterprise policy controls. + +## Specifications + +- [Open-source scope analysis](docs/open-source-scope.md) +- [Assurance matrix](docs/assurance-matrix.md) +- [Production consistency statement](docs/production-consistency.md) +- [Evidence bundle](docs/evidence-bundle.md) +- [Threat model](docs/threat-model.md) +- [Audit-chain specification](docs/audit-chain.md) +- [Masking specification](docs/masking.md) +- [Partial risk-engine specification](docs/risk-engine.md) +- [Agent protocol](docs/agent-protocol.md) + +## Contributing and Security + +- Contribution guidelines: [CONTRIBUTING.md](CONTRIBUTING.md) +- Security policy: [SECURITY.md](SECURITY.md) +- Attribution notice: [NOTICE](NOTICE) + +## License + +This repository is licensed under Apache-2.0. See [LICENSE](LICENSE). + +The FutrixData desktop application and FutrixData Enterprise Edition remain commercial products under their own license terms. diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..9c1e838 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,7 @@ +# Security policy + +Please do not post secrets, credentials, private logs, customer data, or exploitable details in public issues. + +For vulnerability reports, use a private disclosure channel controlled by FutrixData before publishing details. + +This public repository covers security specs and verification packages. Findings in the commercial desktop app or Enterprise server may require private reproduction details and should be reported privately. diff --git a/cmd/futrix-audit-verify/main.go b/cmd/futrix-audit-verify/main.go new file mode 100644 index 0000000..c731551 --- /dev/null +++ b/cmd/futrix-audit-verify/main.go @@ -0,0 +1,30 @@ +package main + +import ( + "encoding/json" + "fmt" + "os" + + "github.com/FutrixDev/FutrixPackage/pkg/auditchain" +) + +func main() { + if len(os.Args) != 2 { + fmt.Fprintln(os.Stderr, "usage: futrix-audit-verify ") + os.Exit(2) + } + result, err := auditchain.VerifyFile(os.Args[1]) + if err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } + enc := json.NewEncoder(os.Stdout) + enc.SetIndent("", " ") + if err := enc.Encode(result); err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } + if !result.Pass { + os.Exit(1) + } +} diff --git a/cmd/futrix-evidence-verify/main.go b/cmd/futrix-evidence-verify/main.go new file mode 100644 index 0000000..5f9784d --- /dev/null +++ b/cmd/futrix-evidence-verify/main.go @@ -0,0 +1,30 @@ +package main + +import ( + "encoding/json" + "fmt" + "os" + + "github.com/FutrixDev/FutrixPackage/pkg/evidence" +) + +func main() { + if len(os.Args) != 2 { + fmt.Fprintln(os.Stderr, "usage: futrix-evidence-verify ") + os.Exit(2) + } + result, err := evidence.VerifyBundle(os.Args[1]) + if err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } + enc := json.NewEncoder(os.Stdout) + enc.SetIndent("", " ") + if err := enc.Encode(result); err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } + if !result.Pass { + os.Exit(1) + } +} diff --git a/docs/agent-protocol.md b/docs/agent-protocol.md new file mode 100644 index 0000000..a5647d1 --- /dev/null +++ b/docs/agent-protocol.md @@ -0,0 +1,72 @@ +# Agent protocol + +## Tool call envelope + +```json +{ + "tool": "execute_statement", + "accessKey": "fxd_live_...", + "protocol": "mcp", + "params": { + "datasourceId": "prod-postgres", + "statement": "select * from users where id = 1042" + } +} +``` + +## Success response + +```json +{ + "tool": "execute_statement", + "ok": true, + "result": { + "rows": [] + }, + "auditId": "audit_01HQ..." +} +``` + +## Approval response + +```json +{ + "tool": "execute_statement", + "ok": false, + "approvalRequired": { + "tool": "execute_statement", + "summary": "Execute statement on datasource \"prod-postgres\"", + "riskAttribution": { + "source": "risk_engine", + "action": "warn", + "level": "medium", + "ruleId": "sql-warn-update", + "ruleCode": "SQL-008", + "reasons": ["UPDATE"] + } + } +} +``` + +## Error response + +```json +{ + "tool": "execute_statement", + "ok": false, + "error": { + "code": "tool_error", + "message": "DELETE without WHERE", + "riskAttribution": { + "source": "risk_engine", + "action": "block", + "level": "high", + "ruleId": "sql-block-delete-no-where" + } + } +} +``` + +## Public tool names + +The public package documents the stable tool names in `pkg/protocol`. The commercial product implements the full transport adapters for MCP, CLI, Skill, HTTP, and daemon IPC. diff --git a/docs/assets/futrix-security-package-flow.svg b/docs/assets/futrix-security-package-flow.svg new file mode 100644 index 0000000..9701de0 --- /dev/null +++ b/docs/assets/futrix-security-package-flow.svg @@ -0,0 +1,57 @@ + + FutrixData security package assurance flow + AI agents call FutrixData, which applies protocol validation, risk evaluation, masking, approval and audit. The public security package verifies those outputs. + + + + FutrixData Security Package + Public code and evidence that help buyers verify FutrixData's security claims + + + + AI agents + MCP + Skill / CLI + HTTP tools + + + + + + + + FutrixData gateway + + + Protocol contract + + + Risk evaluation + + + PII masking + + + Approval + audit + + + + + + + + Databases + PostgreSQL / MySQL + MongoDB / Redis + D1 / DynamoDB / ES + + + + + Public verification package + auditchain · masking · partial riskengine · protocol · evidence verifier · release checksums + + + + + diff --git a/docs/assurance-matrix.md b/docs/assurance-matrix.md new file mode 100644 index 0000000..0ee22ea --- /dev/null +++ b/docs/assurance-matrix.md @@ -0,0 +1,21 @@ +# Assurance matrix + +This matrix maps FutrixData's public security claims to public code, commercial product behavior, and a buyer-verifiable check. + +| Security claim | Public code / evidence | Commercial product feature | How to verify | +| --- | --- | --- | --- | +| Agents get data without receiving database credentials. | `pkg/protocol` documents the agent tool contract without credential fields. | Desktop and Enterprise agent paths route calls through MCP, Skill, CLI, or HTTP-style tools instead of exposing connection strings. | Review `pkg/protocol.PublicTools()` and confirm tool calls use `datasourceId`, not passwords or connection strings. In product review, inspect an agent config and verify it only holds a FutrixData agent key. | +| Risky statements are evaluated before execution. | `pkg/riskengine` opens the portable risk-engine core: parser, rules, priority, and evaluator. `examples/product-export/risk-block-response.json` shows a blocked statement. | Commercial runtime adds datasource trust levels, richer parser integrations, EXPLAIN probes, approval routing, and daemon cache behavior. | Run `go test ./pkg/riskengine` and `go run ./cmd/futrix-evidence-verify ./examples/product-export`. In product review, execute a destructive statement such as `DELETE FROM users` and confirm it is blocked before DB execution. | +| Sensitive fields are masked before agent results are returned. | `pkg/masking` opens the L1-L5 model and deterministic `masked:v1:` algorithm. `examples/product-export/masked-query-result.json` shows an agent-facing result with masked columns. | Desktop and Enterprise apply masking on agent result paths while the human console may still show raw rows to authorized users. | Run `go test ./pkg/masking` and `go run ./cmd/futrix-evidence-verify ./examples/product-export`. In product review, classify `email` as L4/L5, run an agent query, and confirm `maskedColumns` and `masked:v1:` values appear. | +| Agent calls are attributed and auditable. | `pkg/protocol` exposes `auditId` and `riskAttribution` response fields. `examples/product-export/audit-log.jsonl` contains a chained audit export. | Commercial runtime records agent identity, protocol, tool, datasource, status, risk attribution, and timestamps. | Run `go run ./cmd/futrix-audit-verify ./examples/product-export/audit-log.jsonl`. In product review, perform one MCP/Skill/CLI agent call and verify an audit row is created. | +| Audit logs are locally tamper-evident. | `pkg/auditchain` and `cmd/futrix-audit-verify` implement the local SHA-256 hash-chain verifier. | Product audit rows include `seq`, `prev_hash`, `payload_hash`, `chain_hash`, and `chain_version`. | Run `go run ./cmd/futrix-audit-verify `. Modify one row and confirm the verifier fails. | +| Approval responses explain why a call was held. | `pkg/protocol.RiskAttribution` and `examples/product-export/approval-response.json` expose matched rule details. | Commercial runtime returns approval prompts with risk attribution for MCP, Skill, CLI, and Enterprise entry points where supported. | Run `go run ./cmd/futrix-evidence-verify ./examples/product-export`. In product review, run `UPDATE users SET ... WHERE ...` under a cautious policy and confirm `approvalRequired.riskAttribution` is present. | +| Release artifacts can be checked after download. | `release-verification/verify-checksums.sh` verifies `SHA256SUMS.txt`. | Release workflow publishes platform artifacts and checksums. | Download artifacts and run `bash ./release-verification/verify-checksums.sh `. | +| The public repository is useful but not enough to rebuild FutrixData. | `docs/open-source-scope.md` and `docs/production-consistency.md` define the boundary. | Proprietary code keeps desktop UI, datasource adapters, credentials, account/license, Enterprise deployment, SSO/RBAC, signing, and release infrastructure private. | Review repository layout and confirm no datasource adapter, desktop shell, Enterprise server, billing, license, or signing secret code is present. | + +## Buyer review path + +1. Run the public test suite: `go test ./...`. +2. Verify the evidence bundle: `go run ./cmd/futrix-evidence-verify ./examples/product-export`. +3. Verify a product-exported audit log: `go run ./cmd/futrix-audit-verify `. +4. During product evaluation, reproduce one masked query, one blocked query, and one approval-required query against a disposable datasource. diff --git a/docs/audit-chain.md b/docs/audit-chain.md new file mode 100644 index 0000000..ef9f215 --- /dev/null +++ b/docs/audit-chain.md @@ -0,0 +1,63 @@ +# Audit-chain specification + +## Record format + +FutrixData audit logs are JSONL. New chained rows contain these fields: + +| Field | Meaning | +| --- | --- | +| `seq` | Physical non-empty row number, starting at 1. | +| `prev_hash` | Previous chained row hash, or the genesis hash for the first chained row. | +| `payload_hash` | SHA-256 of the canonical row payload after removing chain fields. | +| `chain_hash` | SHA-256 of `seq`, `prev_hash`, `payload_hash`, and `chain_version`. | +| `chain_version` | Current value: `local-sha256-v1`. | + +Rows without any chain fields are legacy rows. A legacy prefix is accepted. Once the chain starts, later legacy rows fail verification. + +## Canonical payload + +To compute `payload_hash`: + +1. Parse the JSON row. +2. Remove `seq`, `prev_hash`, `payload_hash`, `chain_hash`, and `chain_version`. +3. JSON-encode the resulting object using Go's standard JSON encoder. +4. Hash the encoded bytes with SHA-256. + +## Chain hash + +To compute `chain_hash`, JSON-encode: + +```json +{ + "chain_version": "local-sha256-v1", + "payload_hash": "", + "prev_hash": "", + "seq": 1 +} +``` + +Then hash the encoded bytes with SHA-256. + +## Verification + +Run: + +```bash +go run ./cmd/futrix-audit-verify ./examples/audit-log/valid.jsonl +``` + +The JSON result reports: + +- `pass`; +- `verified_records`; +- `legacy_records`; +- `total_records`; +- `first_broken_position`; +- `expected_hash`; +- `actual_hash`; +- `source`; +- `path`. + +## Limits + +This is local tamper evidence. It can show that the current file no longer matches the hashes written into it. It is not remote signing, object lock, SIEM export, external timestamping, or immutable storage. diff --git a/docs/evidence-bundle.md b/docs/evidence-bundle.md new file mode 100644 index 0000000..e0e2d43 --- /dev/null +++ b/docs/evidence-bundle.md @@ -0,0 +1,26 @@ +# Evidence bundle + +`examples/product-export` is a buyer-review fixture that demonstrates how the public code can verify product-shaped outputs. + +It contains: + +- `audit-log.jsonl` — local hash-chain audit export; +- `masked-query-result.json` — agent-facing result with masked PII columns; +- `risk-block-response.json` — destructive query blocked by risk attribution; +- `approval-response.json` — update query held for approval with risk attribution. + +Run: + +```bash +go run ./cmd/futrix-evidence-verify ./examples/product-export +``` + +The verifier checks: + +- audit hash-chain validity; +- masked columns use `masked:v1:` values; +- rows do not contain obvious raw email or phone values; +- the block response matches the public partial risk engine's `DELETE FROM users` decision; +- the approval response matches the public partial risk engine's `UPDATE ... WHERE ...` decision. + +These fixtures are sanitized. During a real Enterprise evaluation, ask FutrixData to export equivalent evidence from a disposable datasource in the evaluated environment, then run the same verifier or the narrower audit verifier against that export. diff --git a/docs/masking.md b/docs/masking.md new file mode 100644 index 0000000..e3dc96b --- /dev/null +++ b/docs/masking.md @@ -0,0 +1,38 @@ +# Masking specification + +## Default sensitivity levels + +FutrixData uses an extensible L1-L5 model by default. + +| Level | Meaning | Examples | +| --- | --- | --- | +| L1 Public | Non-sensitive operational data | `id`, `created_at`, `status` | +| L2 Internal | Internal identifiers and metadata | `user_id`, `session_id`, `request_id` | +| L3 Confidential | Indirect personal, behavior, or location data | `ip_address`, `user_agent`, `device_id` | +| L4 Sensitive | Direct PII, financial, or medical data | `email`, `phone`, `salary`, `date_of_birth` | +| L5 Critical | Credentials and high-sensitivity personal data | `password`, `credit_card`, `api_secret`, `home_address` | + +By default, agents can receive L1-L3 fields. L4, L5, unconfirmed, and unknown levels are masked. + +## Masking algorithm + +For each value: + +1. Start with a local root secret. +2. Derive a per-field key with HMAC-SHA256 over: + +```text +futrixdata:masking:v1 +datasource: +field: +``` + +3. HMAC-SHA256 the raw value string with the derived key. +4. Take the first 16 hex characters. +5. Return `masked:v1:<16 hex chars>`. + +The same value in the same datasource and field masks to the same output. The same value in a different field or datasource masks differently. + +## Limits + +Deterministic masking is not anonymization. It keeps equality useful for agents, but low-cardinality fields can still be guessed by enumeration. Treat masked values as pseudonymous data, not public data. diff --git a/docs/open-source-scope.md b/docs/open-source-scope.md new file mode 100644 index 0000000..ed8bc94 --- /dev/null +++ b/docs/open-source-scope.md @@ -0,0 +1,97 @@ +# Open-source scope analysis + +## Goal + +Enterprise buyers need enough code to verify FutrixData's security claims before purchase. At the same time, the public repository should not contain enough product code to rebuild the full desktop app or Enterprise server. + +The best boundary is a public security package: specs, testable verifiers, protocol types, and portable rule/masking code. + +## Claims that should be reviewable + +The public FutrixData site and documentation emphasize these security claims: + +- agents get query results, not raw credentials; +- risky statements are checked before execution; +- sensitive fields can be masked before results reach agents; +- agent calls are attributed and audited; +- audit logs use a local hash-chain verifier; +- Enterprise users can reason about protocol behavior, approval behavior, and revocation/error surfaces. + +Those claims map cleanly to small public modules. + +## Recommended public modules + +### 1. Audit-chain verifier + +Open `pkg/auditchain` and `cmd/futrix-audit-verify`. + +This gives buyers something concrete to run against exported audit logs. It is useful without revealing the whole audit store, identity store, desktop UI, daemon, or Enterprise audit pipeline. + +### 2. Masking algorithm and sensitivity types + +Open `pkg/masking`. + +This exposes the deterministic HMAC-SHA256 masking contract, default L1-L5 level model, and row-level masking behavior. It does not expose credential storage, OS keyring integration, datasource adapters, UI workflows, or AI classification orchestration. + +### 3. Partial risk-engine core + +Open `pkg/riskengine`. + +This lets reviewers inspect the built-in rule model, lightweight statement parser, matching priority, and allow/warn/approval/block evaluator. It intentionally omits production datasource execution, richer parser integrations, EXPLAIN adapters, database clients, trust-mode storage, and daemon cache handling. + +### 4. Agent protocol types + +Open `pkg/protocol`. + +This documents tool names, request/response envelopes, approval-required responses, risk attribution, and masked-column reporting. It does not expose the real dispatcher, access-key store, IPC server, MCP server implementation, or Enterprise HTTP service. + +### 5. Release verification helper + +Open `release-verification/verify-checksums.sh`. + +This supports binary integrity checks without publishing signing credentials, private CI secrets, notarization keys, or release automation internals. + +### 6. Specs and examples + +Open `docs/` and `examples/`. + +Specs make security behavior easier to review. Examples let users test the packages quickly. + +### 7. Assurance evidence verifier + +Open `pkg/evidence` and `cmd/futrix-evidence-verify`. + +This gives buyers a single command that checks the public evidence bundle: audit-chain validity, masked agent results, blocked risk response, and approval-required response. It is not a substitute for a full product evaluation, but it turns the public repository into a runnable assurance package. + +## Keep private + +These areas should stay outside the public repository: + +- desktop app shell and UX; +- datasource adapters and connection logic; +- credential encryption and OS keyring integration; +- auth, license, account, billing, and entitlement code; +- Enterprise server, SSO, RBAC, tenant management, and deployment automation; +- release signing, notarization, certificates, CI secrets, and private packaging tokens; +- commercial support workflows and internal roadmap; +- customer data, telemetry, logs, or operational endpoints. + +## Why this boundary works + +The selected modules prove the most important security contracts: + +- what is audited; +- how local audit tampering is detected; +- how sensitive values are transformed before reaching agents; +- how rules decide allow, warn, approval, or block; +- what an agent sees when a call succeeds, fails, or needs approval. + +They do not include the product shell that wires everything into a complete commercial application. + +## Future phases + +Phase 1 should publish this package once reviewed. + +Phase 2 can add more test vectors and exported audit samples. + +Phase 3 can consider additional runtime components only when their APIs are stable and their release will not expose commercial implementation details. diff --git a/docs/production-consistency.md b/docs/production-consistency.md new file mode 100644 index 0000000..db05d5d --- /dev/null +++ b/docs/production-consistency.md @@ -0,0 +1,62 @@ +# Production consistency statement + +This repository contains public security code and specs extracted from FutrixData's production design. It is not a full source release of the product. + +## Directly aligned with production concepts + +These public packages track production concepts closely: + +| Public package | Production alignment | Commercial additions | +| --- | --- | --- | +| `pkg/auditchain` | Matches the local audit hash-chain field names, hash inputs, version string, and verifier result shape used by FutrixData audit exports. | Secure file storage, file locking, encrypted local data handling, product CLI integration, UI display, Enterprise audit aggregation. | +| `pkg/masking` | Matches the L1-L5 sensitivity model and `masked:v1:<16 hex>` deterministic HMAC-SHA256 output contract. | OS keyring secret management, migration fallback, SQL result-column origin tracking, datasource classification store, product UI flows. | +| `pkg/protocol` | Matches the public agent-facing concepts: tool names, tool-call envelope, approval response, error response, audit IDs, masked columns, and risk attribution. | Actual MCP server, Skill CLI, daemon IPC, HTTP/Enterprise transport, access-key validation, revocation, schema-egress gates. | + +## Portable subset + +`pkg/riskengine` is a portable public subset, not the complete commercial risk engine. + +It opens: + +- rule data model; +- lightweight statement parsing; +- built-in baseline rules; +- user rule priority and scope handling; +- allow / warn / require_approval / block evaluator. + +The commercial product additionally includes: + +- richer SQL parser integrations; +- live datasource adapters; +- EXPLAIN and query-plan probes; +- datasource trust-mode storage; +- approval routing; +- daemon rule-cache reload behavior; +- product audit writing; +- UI configuration and Enterprise policy management. + +## Public examples + +`examples/product-export` contains sanitized product-export fixtures. They use safe demo values and product-shaped JSON contracts. They are not copied from a real customer's data or from a developer's private local database. + +The purpose is to let reviewers run the public verifier against the same kinds of outputs a product evaluation should request: + +- an audit log export; +- an agent-facing masked query result; +- a risk-block response; +- an approval-required response with risk attribution. + +## Wording guidance + +Accurate public wording: + +- "FutrixData publishes security specifications, verifiers, protocol types, masking code, and a partial risk-engine core." +- "The audit-chain verifier can check product-exported audit logs." +- "The public risk engine is a portable subset; the commercial product adds live execution, EXPLAIN probes, trust modes, approval routing, and Enterprise policy controls." + +Avoid: + +- "FutrixData is fully open source." +- "The entire risk engine is open source." +- "Public masked fixtures prove customer data is anonymized." +- "Local audit hash chains are immutable." diff --git a/docs/risk-engine.md b/docs/risk-engine.md new file mode 100644 index 0000000..2988b24 --- /dev/null +++ b/docs/risk-engine.md @@ -0,0 +1,57 @@ +# Partial risk-engine specification + +This repository opens the portable core of the FutrixData risk engine: rule types, a lightweight statement parser, rule matching, priority ordering, and the final allow/warn/approval/block evaluator. + +It is intentionally not the full commercial runtime. The desktop and Enterprise products add datasource adapters, richer SQL parser integrations, EXPLAIN probes, trust-mode storage, approval dispatch, daemon cache behavior, and audit writing. + +## Decisions + +The rule engine returns one of four actions: + +| Action | Meaning | +| --- | --- | +| `allow` | The statement can proceed. | +| `warn` | The statement is risky enough to surface to policy. | +| `require_approval` | The statement must be explicitly approved. | +| `block` | The statement should not run through an agent path. | + +Risk levels are derived from actions: + +- `allow` -> `low`; +- `warn` -> `medium`; +- `require_approval` and `block` -> `high`. + +## Rule shape + +```json +{ + "id": "sql-block-delete-no-where", + "code": "SQL-005", + "description": "Block DELETE without WHERE", + "scope": { + "dsTypes": ["mysql", "postgresql", "d1"] + }, + "enabled": true, + "priority": 90, + "action": "block", + "reason": "DELETE without WHERE", + "when": { + "command": ["delete"], + "hasWhere": false + } +} +``` + +User rules are evaluated before built-in rules. More specific scope and higher priority win. + +## Open risk-engine coverage + +The public package includes portable built-ins and evaluator logic for: + +- SQL-family sources: MySQL, PostgreSQL, Cloudflare D1; +- Redis and Redis Cluster; +- MongoDB; +- Elasticsearch; +- DynamoDB PartiQL. + +The commercial product adds live datasource execution, richer SQL parsing, EXPLAIN probes, trust-mode storage, approval routing, and runtime cache behavior. diff --git a/docs/threat-model.md b/docs/threat-model.md new file mode 100644 index 0000000..cef5218 --- /dev/null +++ b/docs/threat-model.md @@ -0,0 +1,44 @@ +# Threat model + +## Protected assets + +FutrixData is designed to reduce risk around: + +- database credentials; +- sensitive row values returned to AI agents; +- destructive or expensive database statements; +- schema metadata sent to AI tooling; +- agent identity and access-key misuse; +- local audit-log tampering after records are written. + +## Trusted components + +The local desktop installation or Enterprise deployment is trusted to enforce policy before database execution. + +The configured database is trusted to execute accepted statements and return truthful results. + +The local secret store is trusted to protect the masking root secret. + +The person approving a held action is trusted to understand the displayed summary. + +## Untrusted or partially trusted components + +AI agents are treated as partially trusted. They can request actions but should not receive database credentials or bypass policy. + +LLM providers are treated as external processors. Sensitive row values should be masked before they reach an agent context when policy requires masking. + +Local files are not immutable. The audit hash chain can detect changes to the current chained section, but it cannot stop a fully privileged local attacker from rewriting all rows and recomputing hashes. + +## Out of scope + +This package does not prove: + +- endpoint hardening of the commercial Enterprise server; +- correctness of every datasource adapter; +- protection against a compromised operating system; +- remote audit anchoring; +- signing-key custody; +- billing or license enforcement; +- SSO or RBAC implementation details. + +Those areas remain part of the commercial product review. diff --git a/examples/audit-log/valid.jsonl b/examples/audit-log/valid.jsonl new file mode 100644 index 0000000..551c07a --- /dev/null +++ b/examples/audit-log/valid.jsonl @@ -0,0 +1,3 @@ +{"id":"legacy","toolName":"list_datasources","status":"success"} +{"chain_hash":"315274addcb6d90c3cad80e700c2a5ec64088bf08346127fa801744d28a466de","chain_version":"local-sha256-v1","id":"audit-1","payload_hash":"69786a037ca8fe364aa96e150211e53801dd4c54b13ca77535f7f0a1eb026b09","prev_hash":"0000000000000000000000000000000000000000000000000000000000000000","seq":2,"status":"success","toolName":"execute_statement"} +{"chain_hash":"630a1544e8a5bfaa70f80bed9af05f3344c91414604aebf85f4d63145a449bd2","chain_version":"local-sha256-v1","id":"audit-2","payload_hash":"001ee09721bbc8c99ed9e78f202ee52f2b25cc3bb8fd7578b3ceeff895d01dc0","prev_hash":"315274addcb6d90c3cad80e700c2a5ec64088bf08346127fa801744d28a466de","seq":3,"status":"approval_required","toolName":"execute_statement"} diff --git a/examples/product-export/README.md b/examples/product-export/README.md new file mode 100644 index 0000000..afe4531 --- /dev/null +++ b/examples/product-export/README.md @@ -0,0 +1,18 @@ +# Product export evidence bundle + +This directory contains a sanitized FutrixData product-export evidence bundle. + +The shapes match the product contracts used by the desktop and Enterprise agent paths: + +- `audit-log.jsonl` is a chained agent audit export. +- `masked-query-result.json` shows an agent-facing query result after PII masking. +- `risk-block-response.json` shows a blocked destructive statement. +- `approval-response.json` shows a statement held for approval with risk attribution. + +The values are safe demo values, not a copy of a user's local database or audit history. This is intentional: public fixtures should prove the contract without publishing private customer or developer data. + +Verify the whole bundle: + +```bash +go run ./cmd/futrix-evidence-verify ./examples/product-export +``` diff --git a/examples/product-export/approval-response.json b/examples/product-export/approval-response.json new file mode 100644 index 0000000..f2c34b7 --- /dev/null +++ b/examples/product-export/approval-response.json @@ -0,0 +1,23 @@ +{ + "tool": "execute_statement", + "ok": false, + "approvalRequired": { + "tool": "execute_statement", + "summary": "Execute statement on datasource \"prod-postgres\"", + "params": { + "datasourceId": "prod-postgres", + "statement": "UPDATE users SET status = 'inactive' WHERE id = 1042" + }, + "riskAttribution": { + "source": "risk_engine", + "action": "warn", + "level": "medium", + "ruleId": "sql-warn-update", + "ruleCode": "SQL-008", + "ruleDescription": "Warn on UPDATE with WHERE", + "builtin": true, + "reasons": ["UPDATE"] + } + }, + "auditId": "audit_public_approval_001" +} diff --git a/examples/product-export/audit-log.jsonl b/examples/product-export/audit-log.jsonl new file mode 100644 index 0000000..551c07a --- /dev/null +++ b/examples/product-export/audit-log.jsonl @@ -0,0 +1,3 @@ +{"id":"legacy","toolName":"list_datasources","status":"success"} +{"chain_hash":"315274addcb6d90c3cad80e700c2a5ec64088bf08346127fa801744d28a466de","chain_version":"local-sha256-v1","id":"audit-1","payload_hash":"69786a037ca8fe364aa96e150211e53801dd4c54b13ca77535f7f0a1eb026b09","prev_hash":"0000000000000000000000000000000000000000000000000000000000000000","seq":2,"status":"success","toolName":"execute_statement"} +{"chain_hash":"630a1544e8a5bfaa70f80bed9af05f3344c91414604aebf85f4d63145a449bd2","chain_version":"local-sha256-v1","id":"audit-2","payload_hash":"001ee09721bbc8c99ed9e78f202ee52f2b25cc3bb8fd7578b3ceeff895d01dc0","prev_hash":"315274addcb6d90c3cad80e700c2a5ec64088bf08346127fa801744d28a466de","seq":3,"status":"approval_required","toolName":"execute_statement"} diff --git a/examples/product-export/masked-query-result.json b/examples/product-export/masked-query-result.json new file mode 100644 index 0000000..8887855 --- /dev/null +++ b/examples/product-export/masked-query-result.json @@ -0,0 +1,15 @@ +{ + "tool": "execute_statement", + "ok": true, + "datasourceId": "prod-postgres", + "entity": "users", + "maskedColumns": ["email", "phone"], + "rows": [ + { + "id": 1042, + "email": "masked:v1:8f3a1c9b72e04d11", + "phone": "masked:v1:b219ac74e1d908f2", + "status": "active" + } + ] +} diff --git a/examples/product-export/risk-block-response.json b/examples/product-export/risk-block-response.json new file mode 100644 index 0000000..7f7bab0 --- /dev/null +++ b/examples/product-export/risk-block-response.json @@ -0,0 +1,19 @@ +{ + "tool": "execute_statement", + "ok": false, + "error": { + "code": "tool_error", + "message": "DELETE without WHERE", + "riskAttribution": { + "source": "risk_engine", + "action": "block", + "level": "high", + "ruleId": "sql-block-delete-no-where", + "ruleCode": "SQL-005", + "ruleDescription": "Block DELETE without WHERE", + "builtin": true, + "reasons": ["DELETE without WHERE"] + } + }, + "auditId": "audit_public_block_001" +} diff --git a/examples/risk-rules/sql-basic.json b/examples/risk-rules/sql-basic.json new file mode 100644 index 0000000..3398c17 --- /dev/null +++ b/examples/risk-rules/sql-basic.json @@ -0,0 +1,17 @@ +[ + { + "id": "prod-users-delete-approval", + "description": "Require approval before deleting from users in production", + "scope": { + "datasourceId": "prod-postgres", + "entity": "users" + }, + "enabled": true, + "priority": 500, + "action": "require_approval", + "reason": "production user data", + "when": { + "command": ["delete"] + } + } +] diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..c731cd8 --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module github.com/FutrixDev/FutrixPackage + +go 1.23 diff --git a/pkg/auditchain/auditchain.go b/pkg/auditchain/auditchain.go new file mode 100644 index 0000000..0a3624b --- /dev/null +++ b/pkg/auditchain/auditchain.go @@ -0,0 +1,295 @@ +package auditchain + +import ( + "bufio" + "bytes" + "crypto/sha256" + "encoding/hex" + "encoding/json" + "fmt" + "io" + "os" + "strconv" + "strings" +) + +const ( + Version = "local-sha256-v1" + GenesisHash = "0000000000000000000000000000000000000000000000000000000000000000" +) + +const maxLineBytes = 8 * 1024 * 1024 + +var chainFields = map[string]struct{}{ + "seq": {}, + "prev_hash": {}, + "payload_hash": {}, + "chain_hash": {}, + "chain_version": {}, +} + +type VerifyResult struct { + Pass bool `json:"pass"` + VerifiedRecords int `json:"verified_records"` + LegacyRecords int `json:"legacy_records"` + TotalRecords int `json:"total_records"` + FirstBrokenPosition int `json:"first_broken_position,omitempty"` + Reason string `json:"reason,omitempty"` + ExpectedHash string `json:"expected_hash,omitempty"` + ActualHash string `json:"actual_hash,omitempty"` + Source string `json:"source"` + Path string `json:"path,omitempty"` +} + +type lineKind int + +const ( + lineLegacy lineKind = iota + lineChained + linePartial +) + +func AddFields(record map[string]any, seq int64, prevHash string) (map[string]any, error) { + if seq <= 0 { + return nil, fmt.Errorf("audit chain seq must be positive") + } + next := make(map[string]any, len(record)+5) + for k, v := range record { + if _, reserved := chainFields[k]; reserved { + continue + } + next[k] = v + } + if strings.TrimSpace(prevHash) == "" { + prevHash = GenesisHash + } + next["seq"] = seq + next["prev_hash"] = strings.TrimSpace(prevHash) + next["chain_version"] = Version + + payloadHash, err := PayloadHash(next) + if err != nil { + return nil, err + } + next["payload_hash"] = payloadHash + chainHash, err := ChainHash(seq, strings.TrimSpace(prevHash), payloadHash, Version) + if err != nil { + return nil, err + } + next["chain_hash"] = chainHash + return next, nil +} + +func VerifyFile(path string) (VerifyResult, error) { + path = strings.TrimSpace(path) + result := VerifyResult{Pass: true, Source: "file", Path: path} + if path == "" { + result.Pass = false + result.Reason = "missing audit file path" + return result, nil + } + f, err := os.Open(path) + if err != nil { + if os.IsNotExist(err) { + return result, nil + } + return VerifyResult{}, err + } + defer f.Close() + + result, err = Verify(f) + result.Source = "file" + result.Path = path + return result, err +} + +func Verify(r io.Reader) (VerifyResult, error) { + data, err := io.ReadAll(r) + if err != nil { + return VerifyResult{}, err + } + return verifyData(data), nil +} + +func verifyData(data []byte) VerifyResult { + result := VerifyResult{Pass: true, Source: "reader"} + scanner := bufio.NewScanner(bytes.NewReader(data)) + scanner.Buffer(make([]byte, 0, 64*1024), maxLineBytes) + + var position int + chainStarted := false + expectedPrev := GenesisHash + + for scanner.Scan() { + line := bytes.TrimSpace(scanner.Bytes()) + if len(line) == 0 { + continue + } + position++ + result.TotalRecords = position + + raw, err := decodeLine(line) + if err != nil { + return fail(result, position, "invalid JSON audit row: "+err.Error(), "", "") + } + + switch classify(raw) { + case lineLegacy: + if chainStarted { + return fail(result, position, "legacy audit row found after hash chain started", "", "") + } + result.LegacyRecords++ + continue + case linePartial: + return fail(result, position, "incomplete hash-chain fields", "", "") + case lineChained: + } + + seq, err := int64From(raw, "seq") + if err != nil || seq <= 0 { + return fail(result, position, "invalid hash-chain sequence", "", "") + } + if seq != int64(position) { + return fail(result, position, fmt.Sprintf("unexpected hash-chain sequence: got %d want %d", seq, position), "", "") + } + + version := strings.TrimSpace(stringFrom(raw, "chain_version")) + if version != Version { + return fail(result, position, "unsupported hash-chain version", "", "") + } + prevHash := strings.TrimSpace(stringFrom(raw, "prev_hash")) + if prevHash != expectedPrev { + return fail(result, position, "previous hash mismatch", expectedPrev, prevHash) + } + + expectedPayload, err := payloadHashForLine(line) + if err != nil { + return fail(result, position, "cannot hash audit payload", "", "") + } + actualPayload := strings.TrimSpace(stringFrom(raw, "payload_hash")) + if actualPayload != expectedPayload { + return fail(result, position, "payload hash mismatch", expectedPayload, actualPayload) + } + + expectedChain, err := ChainHash(seq, prevHash, expectedPayload, version) + if err != nil { + return fail(result, position, "cannot hash audit chain", "", "") + } + actualChain := strings.TrimSpace(stringFrom(raw, "chain_hash")) + if actualChain != expectedChain { + return fail(result, position, "chain hash mismatch", expectedChain, actualChain) + } + + chainStarted = true + expectedPrev = actualChain + result.VerifiedRecords++ + } + if err := scanner.Err(); err != nil { + return fail(result, position+1, err.Error(), "", "") + } + return result +} + +func PayloadHash(record map[string]any) (string, error) { + next := make(map[string]any, len(record)) + for k, v := range record { + if _, reserved := chainFields[k]; reserved { + continue + } + next[k] = v + } + canonical, err := json.Marshal(next) + if err != nil { + return "", err + } + return sha256Hex(canonical), nil +} + +func ChainHash(seq int64, prevHash, payloadHash, version string) (string, error) { + canonical, err := json.Marshal(map[string]any{ + "chain_version": strings.TrimSpace(version), + "payload_hash": strings.TrimSpace(payloadHash), + "prev_hash": strings.TrimSpace(prevHash), + "seq": seq, + }) + if err != nil { + return "", err + } + return sha256Hex(canonical), nil +} + +func payloadHashForLine(line []byte) (string, error) { + raw, err := decodeLine(line) + if err != nil { + return "", err + } + return PayloadHash(raw) +} + +func decodeLine(line []byte) (map[string]any, error) { + dec := json.NewDecoder(bytes.NewReader(line)) + dec.UseNumber() + var raw map[string]any + if err := dec.Decode(&raw); err != nil { + return nil, err + } + return raw, nil +} + +func classify(raw map[string]any) lineKind { + present := 0 + for field := range chainFields { + if _, ok := raw[field]; ok { + present++ + } + } + if present == 0 { + return lineLegacy + } + if present != len(chainFields) { + return linePartial + } + return lineChained +} + +func int64From(raw map[string]any, field string) (int64, error) { + v, ok := raw[field] + if !ok || v == nil { + return 0, fmt.Errorf("missing %s", field) + } + switch t := v.(type) { + case json.Number: + return t.Int64() + case float64: + return int64(t), nil + case int64: + return t, nil + case int: + return int64(t), nil + case string: + return strconv.ParseInt(strings.TrimSpace(t), 10, 64) + default: + return 0, fmt.Errorf("invalid %s", field) + } +} + +func stringFrom(raw map[string]any, field string) string { + if v, ok := raw[field]; ok && v != nil { + return fmt.Sprint(v) + } + return "" +} + +func fail(result VerifyResult, position int, reason, expected, actual string) VerifyResult { + result.Pass = false + result.FirstBrokenPosition = position + result.Reason = reason + result.ExpectedHash = expected + result.ActualHash = actual + return result +} + +func sha256Hex(data []byte) string { + sum := sha256.Sum256(data) + return hex.EncodeToString(sum[:]) +} diff --git a/pkg/auditchain/auditchain_test.go b/pkg/auditchain/auditchain_test.go new file mode 100644 index 0000000..68b2187 --- /dev/null +++ b/pkg/auditchain/auditchain_test.go @@ -0,0 +1,64 @@ +package auditchain + +import ( + "bytes" + "encoding/json" + "strings" + "testing" +) + +func TestVerifyAcceptsLegacyPrefixAndChainedRows(t *testing.T) { + var buf bytes.Buffer + buf.WriteString(`{"id":"legacy","toolName":"list_datasources"}` + "\n") + + row1, err := AddFields(map[string]any{"id": "a1", "status": "success"}, 2, "") + if err != nil { + t.Fatal(err) + } + writeJSONLine(t, &buf, row1) + + row2, err := AddFields(map[string]any{"id": "a2", "status": "error"}, 3, row1["chain_hash"].(string)) + if err != nil { + t.Fatal(err) + } + writeJSONLine(t, &buf, row2) + + result, err := Verify(strings.NewReader(buf.String())) + if err != nil { + t.Fatal(err) + } + if !result.Pass || result.VerifiedRecords != 2 || result.LegacyRecords != 1 { + t.Fatalf("unexpected result: %+v", result) + } +} + +func TestVerifyDetectsTamperedPayload(t *testing.T) { + row, err := AddFields(map[string]any{"id": "a1", "status": "success"}, 1, "") + if err != nil { + t.Fatal(err) + } + row["status"] = "error" + + var buf bytes.Buffer + writeJSONLine(t, &buf, row) + result, err := Verify(&buf) + if err != nil { + t.Fatal(err) + } + if result.Pass { + t.Fatalf("expected failure") + } + if result.Reason != "payload hash mismatch" { + t.Fatalf("unexpected reason: %s", result.Reason) + } +} + +func writeJSONLine(t *testing.T, buf *bytes.Buffer, v any) { + t.Helper() + raw, err := json.Marshal(v) + if err != nil { + t.Fatal(err) + } + buf.Write(raw) + buf.WriteByte('\n') +} diff --git a/pkg/evidence/evidence.go b/pkg/evidence/evidence.go new file mode 100644 index 0000000..a0b8cf2 --- /dev/null +++ b/pkg/evidence/evidence.go @@ -0,0 +1,203 @@ +package evidence + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "regexp" + "strings" + + "github.com/FutrixDev/FutrixPackage/pkg/auditchain" + "github.com/FutrixDev/FutrixPackage/pkg/masking" + "github.com/FutrixDev/FutrixPackage/pkg/protocol" + "github.com/FutrixDev/FutrixPackage/pkg/riskengine" +) + +type BundleResult struct { + Pass bool `json:"pass"` + Path string `json:"path"` + Checks []CheckResult `json:"checks"` + AuditResult auditchain.VerifyResult `json:"auditResult,omitempty"` +} + +type CheckResult struct { + Name string `json:"name"` + Pass bool `json:"pass"` + Message string `json:"message,omitempty"` +} + +type MaskedQueryExport struct { + Tool protocol.ToolName `json:"tool"` + OK bool `json:"ok"` + DatasourceID string `json:"datasourceId"` + Entity string `json:"entity"` + MaskedColumns []string `json:"maskedColumns"` + Rows []map[string]any `json:"rows"` +} + +func VerifyBundle(path string) (BundleResult, error) { + path = strings.TrimSpace(path) + result := BundleResult{Pass: true, Path: path} + if path == "" { + return failBundle(result, "bundle_path", "bundle path is required"), nil + } + + audit, err := auditchain.VerifyFile(filepath.Join(path, "audit-log.jsonl")) + if err != nil { + return result, err + } + result.AuditResult = audit + result.add("audit_chain", audit.Pass, audit.Reason) + + maskedOK, msg, err := verifyMaskedQuery(filepath.Join(path, "masked-query-result.json")) + if err != nil { + return result, err + } + result.add("masked_query_result", maskedOK, msg) + + blockOK, msg, err := verifyRiskBlock(filepath.Join(path, "risk-block-response.json")) + if err != nil { + return result, err + } + result.add("risk_block_response", blockOK, msg) + + approvalOK, msg, err := verifyApproval(filepath.Join(path, "approval-response.json")) + if err != nil { + return result, err + } + result.add("approval_response", approvalOK, msg) + + for _, check := range result.Checks { + if !check.Pass { + result.Pass = false + break + } + } + return result, nil +} + +func (r *BundleResult) add(name string, pass bool, message string) { + r.Checks = append(r.Checks, CheckResult{Name: name, Pass: pass, Message: message}) +} + +func failBundle(result BundleResult, name, message string) BundleResult { + result.Pass = false + result.add(name, false, message) + return result +} + +func verifyMaskedQuery(path string) (bool, string, error) { + var export MaskedQueryExport + if err := readJSON(path, &export); err != nil { + return false, "", err + } + if !export.OK { + return false, "masked query export is not ok", nil + } + if export.Tool != protocol.ToolExecuteStatement { + return false, "masked query export must be for execute_statement", nil + } + if len(export.MaskedColumns) == 0 { + return false, "maskedColumns is empty", nil + } + if len(export.Rows) == 0 { + return false, "rows is empty", nil + } + for _, col := range export.MaskedColumns { + for _, row := range export.Rows { + value, ok := row[col] + if !ok { + return false, fmt.Sprintf("masked column %q missing from row", col), nil + } + s := fmt.Sprint(value) + if !masking.IsMaskedValue(s) { + return false, fmt.Sprintf("masked column %q is not masked", col), nil + } + } + } + if containsRawPII(export.Rows) { + return false, "rows still contain raw PII-shaped values", nil + } + return true, "", nil +} + +func verifyRiskBlock(path string) (bool, string, error) { + var out protocol.ToolResult + if err := readJSON(path, &out); err != nil { + return false, "", err + } + if out.OK || out.Error == nil { + return false, "risk block response must be an error", nil + } + if out.Error.RiskAttribution == nil { + return false, "risk block response lacks riskAttribution", nil + } + attr := out.Error.RiskAttribution + if attr.Action != string(riskengine.ActionBlock) { + return false, "risk block response action is not block", nil + } + assessment := riskengine.NewEngine().Assess("postgresql", "prod-postgres", "DELETE FROM users") + if assessment.Action != riskengine.ActionBlock { + return false, "public risk engine did not reproduce block decision", nil + } + if attr.RuleID != assessment.RuleID { + return false, fmt.Sprintf("ruleId mismatch: response=%s engine=%s", attr.RuleID, assessment.RuleID), nil + } + return true, "", nil +} + +func verifyApproval(path string) (bool, string, error) { + var out protocol.ToolResult + if err := readJSON(path, &out); err != nil { + return false, "", err + } + if out.OK || out.ApprovalRequired == nil { + return false, "approval response must contain approvalRequired", nil + } + attr := out.ApprovalRequired.RiskAttribution + if attr == nil { + return false, "approval response lacks riskAttribution", nil + } + if attr.Action != string(riskengine.ActionWarn) && attr.Action != string(riskengine.ActionRequireApproval) { + return false, "approval response action is not approval-eligible", nil + } + assessment := riskengine.NewEngine().Assess("postgresql", "prod-postgres", "UPDATE users SET status = 'inactive' WHERE id = 1042") + if assessment.Action != riskengine.ActionWarn { + return false, "public risk engine did not reproduce warn decision", nil + } + if attr.RuleID != assessment.RuleID { + return false, fmt.Sprintf("ruleId mismatch: response=%s engine=%s", attr.RuleID, assessment.RuleID), nil + } + return true, "", nil +} + +func readJSON(path string, target any) error { + raw, err := os.ReadFile(path) + if err != nil { + return err + } + return json.Unmarshal(raw, target) +} + +var piiPatterns = []*regexp.Regexp{ + regexp.MustCompile(`(?i)[a-z0-9._%+\-]+@[a-z0-9.\-]+\.[a-z]{2,}`), + regexp.MustCompile(`\+?[0-9][0-9 .()\-]{7,}[0-9]`), +} + +func containsRawPII(rows []map[string]any) bool { + for _, row := range rows { + for _, value := range row { + s := fmt.Sprint(value) + if strings.HasPrefix(s, "masked:") { + continue + } + for _, pattern := range piiPatterns { + if pattern.MatchString(s) { + return true + } + } + } + } + return false +} diff --git a/pkg/evidence/evidence_test.go b/pkg/evidence/evidence_test.go new file mode 100644 index 0000000..1f7153b --- /dev/null +++ b/pkg/evidence/evidence_test.go @@ -0,0 +1,13 @@ +package evidence + +import "testing" + +func TestVerifyBundle(t *testing.T) { + result, err := VerifyBundle("../../examples/product-export") + if err != nil { + t.Fatal(err) + } + if !result.Pass { + t.Fatalf("expected bundle to pass: %+v", result) + } +} diff --git a/pkg/masking/masking.go b/pkg/masking/masking.go new file mode 100644 index 0000000..182054e --- /dev/null +++ b/pkg/masking/masking.go @@ -0,0 +1,311 @@ +package masking + +import ( + "crypto/hmac" + "crypto/sha256" + "encoding/hex" + "fmt" + "sort" + "strings" +) + +const ( + HashOutputLen = 16 + Prefix = "masked:" + KeyVersion = 1 +) + +type SensitivityLevel string + +const LevelUnconfirmed SensitivityLevel = "unconfirmed" + +type Category string + +const ( + CategoryPII Category = "pii" + CategoryCredential Category = "credential" + CategoryFinancial Category = "financial" + CategoryBehavioral Category = "behavioral" + CategoryMedical Category = "medical" + CategoryLocation Category = "location" + CategoryContact Category = "contact" + CategoryIdentifier Category = "identifier" + CategoryNone Category = "none" +) + +type ClassificationSource string + +const ( + SourceAI ClassificationSource = "ai" + SourceManual ClassificationSource = "manual" + SourceAgent ClassificationSource = "agent" +) + +type LevelDefinition struct { + ID int `json:"id"` + Key string `json:"key"` + Name string `json:"name"` + Description string `json:"description"` + NameEn string `json:"nameEn,omitempty"` + DescriptionEn string `json:"descriptionEn,omitempty"` + Examples []string `json:"examples"` + Color string `json:"color"` +} + +type LevelConfig struct { + Levels []LevelDefinition `json:"levels"` + AgentAccessFrom int `json:"agentAccessFrom"` + AgentAccessTo int `json:"agentAccessTo"` +} + +type FieldClassification struct { + Level SensitivityLevel `json:"level"` + Category Category `json:"category"` + Reason string `json:"reason"` + Source ClassificationSource `json:"source"` + ConfirmedBy string `json:"confirmedBy,omitempty"` + ConfirmedAt int64 `json:"confirmedAt,omitempty"` +} + +type EntityClassification struct { + Fields map[string]FieldClassification `json:"fields"` +} + +type DatasourceClassification struct { + DatasourceID string `json:"datasourceId"` + DatasourceName string `json:"datasourceName"` + DatasourceType string `json:"datasourceType"` + Database string `json:"database,omitempty"` + SchemaHash string `json:"schemaHash"` + ScannedAt int64 `json:"scannedAt"` + Entities map[string]EntityClassification `json:"entities"` +} + +type StoreState struct { + Version int `json:"version"` + LevelConfig *LevelConfig `json:"levelConfig,omitempty"` + Datasources map[string]DatasourceClassification `json:"datasources"` +} + +func DefaultLevelConfig() LevelConfig { + return LevelConfig{ + AgentAccessFrom: 1, + AgentAccessTo: 3, + Levels: []LevelDefinition{ + {ID: 1, Key: "L1", Name: "Public", Description: "Non-sensitive operational data", NameEn: "Public", DescriptionEn: "Non-sensitive operational data", Examples: []string{"id", "created_at", "status", "count", "category", "is_active"}, Color: "green"}, + {ID: 2, Key: "L2", Name: "Internal", Description: "Internal identifiers and metadata", NameEn: "Internal", DescriptionEn: "Internal identifiers and metadata", Examples: []string{"user_id", "session_id", "request_id", "updated_by", "version"}, Color: "blue"}, + {ID: 3, Key: "L3", Name: "Confidential", Description: "Indirect PII, behavioral and location data", NameEn: "Confidential", DescriptionEn: "Indirect PII, behavioral and location data", Examples: []string{"ip_address", "user_agent", "device_id", "geolocation", "login_history"}, Color: "yellow"}, + {ID: 4, Key: "L4", Name: "Sensitive", Description: "Direct PII, financial and medical data", NameEn: "Sensitive", DescriptionEn: "Direct PII, financial and medical data", Examples: []string{"email", "phone", "salary", "medical_record", "date_of_birth", "social_security"}, Color: "orange"}, + {ID: 5, Key: "L5", Name: "Critical", Description: "Credentials, payment instruments, and highly sensitive personal data", NameEn: "Critical", DescriptionEn: "Credentials, payment instruments, and highly sensitive personal data", Examples: []string{"password", "credit_card", "bank_account", "private_key", "api_secret", "home_address"}, Color: "red"}, + }, + } +} + +func MaskValue(rootSecret []byte, datasourceID, field string, value any) string { + key := maskingKey(rootSecret, datasourceID, field) + mac := hmac.New(sha256.New, key) + _, _ = mac.Write([]byte(fmt.Sprint(value))) + full := hex.EncodeToString(mac.Sum(nil)) + return fmt.Sprintf("%sv%d:%s", Prefix, KeyVersion, full[:HashOutputLen]) +} + +func MaskRows(state StoreState, rootSecret []byte, datasourceID, entityHint string, rows []map[string]any) []string { + if len(rows) == 0 || strings.TrimSpace(entityHint) == "" { + return nil + } + ds, ok := state.Datasources[datasourceID] + if !ok { + return nil + } + cfg := DefaultLevelConfig() + if state.LevelConfig != nil { + cfg = *state.LevelConfig + } + + columns := inferColumns(rows) + normalized := normalizeColumns(columns) + entities := strings.Split(entityHint, ",") + shouldMask := buildMaskSetForEntities(ds, cfg, normalized, entities) + if len(shouldMask) == 0 { + return nil + } + + var masked []string + keys := make(map[string][]byte) + for i, col := range columns { + if shouldMask[normalized[i]] { + masked = append(masked, col) + keys[col] = maskingKey(rootSecret, datasourceID, normalized[i]) + } + } + for _, row := range rows { + for _, col := range masked { + if val, ok := row[col]; ok && val != nil { + row[col] = hashWithKey(val, keys[col]) + continue + } + if strings.Contains(col, ".") { + maskNestedField(row, col, keys[col]) + } + } + } + return masked +} + +func IsMaskedValue(v string) bool { + return strings.HasPrefix(v, Prefix) +} + +func maskingKey(rootSecret []byte, datasourceID, field string) []byte { + if len(rootSecret) == 0 { + rootSecret = []byte("anonymous") + } + mac := hmac.New(sha256.New, rootSecret) + _, _ = fmt.Fprintf(mac, "futrixdata:masking:v%d\ndatasource:%s\nfield:%s", KeyVersion, datasourceID, field) + return mac.Sum(nil) +} + +func hashWithKey(value any, key []byte) string { + mac := hmac.New(sha256.New, key) + _, _ = mac.Write([]byte(fmt.Sprint(value))) + full := hex.EncodeToString(mac.Sum(nil)) + return fmt.Sprintf("%sv%d:%s", Prefix, KeyVersion, full[:HashOutputLen]) +} + +func buildMaskSetForEntities(ds DatasourceClassification, cfg LevelConfig, columns []string, entities []string) map[string]bool { + mask := make(map[string]bool) + allResolved := true + for _, name := range entities { + name = strings.TrimSpace(name) + ec, ok := ds.Entities[name] + if !ok { + allResolved = false + continue + } + for col := range buildMaskSet(ec, cfg, columns) { + mask[col] = true + } + } + if !allResolved { + return buildMaskSetAllEntities(ds, cfg, columns) + } + return mask +} + +func buildMaskSetAllEntities(ds DatasourceClassification, cfg LevelConfig, columns []string) map[string]bool { + mask := make(map[string]bool) + for _, ec := range ds.Entities { + for col := range buildMaskSet(ec, cfg, columns) { + mask[col] = true + } + } + return mask +} + +func buildMaskSet(ec EntityClassification, cfg LevelConfig, columns []string) map[string]bool { + levelID := make(map[string]int, len(cfg.Levels)) + for _, level := range cfg.Levels { + levelID[level.Key] = level.ID + } + mask := make(map[string]bool) + for _, col := range columns { + fc, ok := ec.Fields[col] + if !ok { + continue + } + if fc.Level == LevelUnconfirmed { + mask[col] = true + continue + } + id, ok := levelID[string(fc.Level)] + if !ok { + mask[col] = true + continue + } + if cfg.AgentAccessFrom == 0 && cfg.AgentAccessTo == 0 { + continue + } + if id < cfg.AgentAccessFrom || id > cfg.AgentAccessTo { + mask[col] = true + } + } + return mask +} + +func inferColumns(rows []map[string]any) []string { + seen := make(map[string]struct{}) + for _, row := range rows { + collectDottedKeys("", row, seen) + } + out := make([]string, 0, len(seen)) + for col := range seen { + out = append(out, col) + } + sort.Strings(out) + return out +} + +func collectDottedKeys(prefix string, m map[string]any, out map[string]struct{}) { + for k, v := range m { + full := k + if prefix != "" { + full = prefix + "." + k + } + out[full] = struct{}{} + switch typed := v.(type) { + case map[string]any: + collectDottedKeys(full, typed, out) + case []any: + for _, item := range typed { + if nested, ok := item.(map[string]any); ok { + collectDottedKeys(full, nested, out) + } + } + } + } +} + +func normalizeColumns(columns []string) []string { + out := make([]string, len(columns)) + for i, col := range columns { + out[i] = normalizeColumn(col) + } + return out +} + +func normalizeColumn(col string) string { + for _, prefix := range []string{"_source.", "fields."} { + if strings.HasPrefix(col, prefix) { + return col[len(prefix):] + } + } + return col +} + +func maskNestedField(row map[string]any, dottedPath string, key []byte) { + maskNestedParts(row, strings.Split(dottedPath, "."), key) +} + +func maskNestedParts(m map[string]any, parts []string, key []byte) { + if len(parts) == 0 { + return + } + val, ok := m[parts[0]] + if !ok || val == nil { + return + } + if len(parts) == 1 { + m[parts[0]] = hashWithKey(val, key) + return + } + switch typed := val.(type) { + case map[string]any: + maskNestedParts(typed, parts[1:], key) + case []any: + for _, item := range typed { + if nested, ok := item.(map[string]any); ok { + maskNestedParts(nested, parts[1:], key) + } + } + } +} diff --git a/pkg/masking/masking_test.go b/pkg/masking/masking_test.go new file mode 100644 index 0000000..d64d088 --- /dev/null +++ b/pkg/masking/masking_test.go @@ -0,0 +1,52 @@ +package masking + +import "testing" + +func TestMaskValueIsStableAndContextBound(t *testing.T) { + secret := []byte("local-secret") + a := MaskValue(secret, "prod", "users.email", "alice@example.com") + b := MaskValue(secret, "prod", "users.email", "alice@example.com") + c := MaskValue(secret, "prod", "orders.email", "alice@example.com") + if a != b { + t.Fatalf("expected stable output") + } + if a == c { + t.Fatalf("expected field context to change output") + } + if !IsMaskedValue(a) { + t.Fatalf("expected masked prefix: %s", a) + } +} + +func TestMaskRowsMasksRestrictedFieldsOnly(t *testing.T) { + state := StoreState{ + LevelConfig: ptr(DefaultLevelConfig()), + Datasources: map[string]DatasourceClassification{ + "prod": { + Entities: map[string]EntityClassification{ + "users": { + Fields: map[string]FieldClassification{ + "id": {Level: "L1", Category: CategoryIdentifier, Source: SourceManual}, + "email": {Level: "L4", Category: CategoryContact, Source: SourceManual}, + }, + }, + }, + }, + }, + } + rows := []map[string]any{{"id": 1042, "email": "alice@example.com"}} + masked := MaskRows(state, []byte("local-secret"), "prod", "users", rows) + if len(masked) != 1 || masked[0] != "email" { + t.Fatalf("unexpected masked columns: %#v", masked) + } + if rows[0]["id"] != 1042 { + t.Fatalf("id should remain clear") + } + if rows[0]["email"] == "alice@example.com" { + t.Fatalf("email should be masked") + } +} + +func ptr[T any](v T) *T { + return &v +} diff --git a/pkg/protocol/protocol.go b/pkg/protocol/protocol.go new file mode 100644 index 0000000..a82e463 --- /dev/null +++ b/pkg/protocol/protocol.go @@ -0,0 +1,143 @@ +package protocol + +import "github.com/FutrixDev/FutrixPackage/pkg/riskengine" + +type ToolName string + +const ( + ToolListDatasources ToolName = "list_datasources" + ToolGetDatasource ToolName = "get_datasource" + ToolListDatabases ToolName = "list_databases" + ToolListEntities ToolName = "list_entities" + ToolDescribeEntity ToolName = "describe_entity" + ToolExecuteStatement ToolName = "execute_statement" + ToolListRiskRules ToolName = "list_risk_rules" + ToolSetRiskRule ToolName = "set_risk_rule" + ToolDeleteRiskRule ToolName = "delete_risk_rule" + ToolGetSensitivityConfig ToolName = "get_sensitivity_config" + ToolGetSensitivityReport ToolName = "get_sensitivity_report" + ToolSaveSensitivityReport ToolName = "save_sensitivity_report" + ToolDeleteSensitivityReport ToolName = "delete_sensitivity_report" + ToolSetSensitivityCustomRules ToolName = "set_sensitivity_custom_rules" + ToolGetSchemaKnowledge ToolName = "get_schema_knowledge" + ToolGetERKnowledge ToolName = "get_er_knowledge" + ToolSetBuiltinRiskRuleEnabled ToolName = "set_builtin_risk_rule_enabled" + ToolSetBuiltinRiskRuleThreshold ToolName = "set_builtin_risk_rule_thresholds" +) + +type ParamType string + +const ( + TypeString ParamType = "string" + TypeNumber ParamType = "number" + TypeBoolean ParamType = "boolean" + TypeObject ParamType = "object" + TypeArray ParamType = "array" +) + +type Param struct { + Name string `json:"name"` + Type ParamType `json:"type"` + Required bool `json:"required,omitempty"` + Description string `json:"description,omitempty"` + Enum []string `json:"enum,omitempty"` + Properties []Param `json:"properties,omitempty"` +} + +type ToolDef struct { + Name ToolName `json:"name"` + Description string `json:"description"` + ApprovalRequired bool `json:"approvalRequired,omitempty"` + Params []Param `json:"params,omitempty"` +} + +type ToolCall struct { + Tool ToolName `json:"tool"` + AccessKey string `json:"accessKey,omitempty"` + Protocol string `json:"protocol,omitempty"` + Params map[string]any `json:"params,omitempty"` + Approve bool `json:"approve,omitempty"` + ApproveNote string `json:"approvalReason,omitempty"` +} + +type ToolResult struct { + Tool ToolName `json:"tool"` + OK bool `json:"ok"` + Result any `json:"result,omitempty"` + Error *ToolError `json:"error,omitempty"` + ApprovalRequired *ApprovalRequest `json:"approvalRequired,omitempty"` + AuditID string `json:"auditId,omitempty"` + MaskedColumns []string `json:"maskedColumns,omitempty"` +} + +type ToolError struct { + Code string `json:"code"` + Message string `json:"message"` + RiskAttribution *RiskAttribution `json:"riskAttribution,omitempty"` +} + +type ApprovalRequest struct { + Tool ToolName `json:"tool"` + Summary string `json:"summary"` + Params map[string]any `json:"params,omitempty"` + RiskAttribution *RiskAttribution `json:"riskAttribution,omitempty"` +} + +type RiskAttribution struct { + Source string `json:"source"` + Action string `json:"action"` + Level string `json:"level,omitempty"` + RuleID string `json:"ruleId,omitempty"` + RuleCode string `json:"ruleCode,omitempty"` + RuleDescription string `json:"ruleDescription,omitempty"` + Builtin *bool `json:"builtin,omitempty"` + Reasons []string `json:"reasons,omitempty"` +} + +func AttributionFromAssessment(a riskengine.RiskAssessment) RiskAttribution { + builtin := a.Builtin + return RiskAttribution{ + Source: "risk_engine", + Action: string(a.Action), + Level: string(a.Level), + RuleID: a.RuleID, + RuleCode: a.RuleCode, + RuleDescription: a.RuleDescription, + Builtin: &builtin, + Reasons: append([]string(nil), a.Reasons...), + } +} + +func PublicTools() []ToolDef { + return []ToolDef{ + {Name: ToolListDatasources, Description: "List configured data sources."}, + {Name: ToolGetDatasource, Description: "Get one data source by ID.", Params: []Param{{Name: "datasourceId", Type: TypeString, Required: true}}}, + {Name: ToolListDatabases, Description: "List databases or schemas on a data source.", Params: datasourceParams(false)}, + {Name: ToolListEntities, Description: "List tables, collections, indexes, or equivalent entities.", Params: datasourceParams(false)}, + {Name: ToolDescribeEntity, Description: "Describe one table, collection, index, or equivalent entity.", Params: append(datasourceParams(false), Param{Name: "name", Type: TypeString, Required: true})}, + {Name: ToolExecuteStatement, Description: "Execute one statement after policy evaluation.", ApprovalRequired: true, Params: append(datasourceParams(true), Param{Name: "statement", Type: TypeString, Required: true})}, + {Name: ToolListRiskRules, Description: "List risk rules and built-in rule state."}, + {Name: ToolSetRiskRule, Description: "Create or update a user risk rule.", ApprovalRequired: true}, + {Name: ToolDeleteRiskRule, Description: "Delete a user risk rule.", ApprovalRequired: true, Params: []Param{{Name: "id", Type: TypeString, Required: true}}}, + {Name: ToolGetSensitivityConfig, Description: "Read sensitivity level configuration."}, + {Name: ToolGetSensitivityReport, Description: "Read a data source sensitivity report.", Params: []Param{{Name: "datasourceId", Type: TypeString, Required: true}}}, + {Name: ToolSaveSensitivityReport, Description: "Save an agent-supplied sensitivity report.", ApprovalRequired: true}, + {Name: ToolDeleteSensitivityReport, Description: "Delete a sensitivity report.", ApprovalRequired: true, Params: []Param{{Name: "datasourceId", Type: TypeString, Required: true}}}, + {Name: ToolSetSensitivityCustomRules, Description: "Update custom sensitivity classification guidance.", ApprovalRequired: true, Params: []Param{{Name: "rules", Type: TypeString, Required: true}}}, + {Name: ToolGetSchemaKnowledge, Description: "Return schema knowledge when schema-egress policy allows it.", Params: datasourceParams(false)}, + {Name: ToolGetERKnowledge, Description: "Return relationship knowledge when schema-egress policy allows it.", Params: datasourceParams(false)}, + {Name: ToolSetBuiltinRiskRuleEnabled, Description: "Enable or disable one built-in risk rule override.", ApprovalRequired: true, Params: []Param{{Name: "id", Type: TypeString, Required: true}, {Name: "enabled", Type: TypeBoolean, Required: true}}}, + {Name: ToolSetBuiltinRiskRuleThreshold, Description: "Update threshold overrides for one built-in probe rule.", ApprovalRequired: true, Params: []Param{{Name: "id", Type: TypeString, Required: true}, {Name: "thresholds", Type: TypeObject, Required: true}}}, + } +} + +func datasourceParams(includeExecutionMode bool) []Param { + params := []Param{ + {Name: "datasourceId", Type: TypeString, Required: true}, + {Name: "database", Type: TypeString}, + } + if includeExecutionMode { + params = append(params, Param{Name: "executionMode", Type: TypeString}) + } + return params +} diff --git a/pkg/protocol/protocol_test.go b/pkg/protocol/protocol_test.go new file mode 100644 index 0000000..0b038a7 --- /dev/null +++ b/pkg/protocol/protocol_test.go @@ -0,0 +1,20 @@ +package protocol + +import "testing" + +func TestPublicToolsExposeSecurityCriticalSurfaces(t *testing.T) { + seen := map[ToolName]bool{} + for _, tool := range PublicTools() { + seen[tool.Name] = true + } + for _, required := range []ToolName{ + ToolExecuteStatement, + ToolListRiskRules, + ToolSaveSensitivityReport, + ToolGetSchemaKnowledge, + } { + if !seen[required] { + t.Fatalf("missing tool %s", required) + } + } +} diff --git a/pkg/riskengine/riskengine.go b/pkg/riskengine/riskengine.go new file mode 100644 index 0000000..d79d214 --- /dev/null +++ b/pkg/riskengine/riskengine.go @@ -0,0 +1,602 @@ +package riskengine + +import ( + "regexp" + "sort" + "strings" + "sync" +) + +type RiskLevel string + +const ( + RiskLow RiskLevel = "low" + RiskMedium RiskLevel = "medium" + RiskHigh RiskLevel = "high" +) + +type Action string + +const ( + ActionAllow Action = "allow" + ActionWarn Action = "warn" + ActionRequireApproval Action = "require_approval" + ActionBlock Action = "block" +) + +type RiskAssessment struct { + Level RiskLevel `json:"level"` + Action Action `json:"action"` + Reasons []string `json:"reasons"` + RuleID string `json:"ruleId,omitempty"` + RuleCode string `json:"ruleCode,omitempty"` + RuleDescription string `json:"ruleDescription,omitempty"` + Builtin bool `json:"builtin,omitempty"` +} + +type ParsedStatement struct { + Raw string `json:"raw"` + DsType string `json:"dsType"` + DatasourceID string `json:"datasourceId,omitempty"` + FirstKeyword string `json:"firstKeyword"` + TargetEntity string `json:"targetEntity,omitempty"` + TargetEntities []string `json:"targetEntities,omitempty"` + HasWhere bool `json:"hasWhere,omitempty"` + SQLStatementCount int `json:"sqlStatementCount,omitempty"` + SQLParseFailed bool `json:"sqlParseFailed,omitempty"` + IsQuery bool `json:"isQuery,omitempty"` + HTTPMethod string `json:"httpMethod,omitempty"` + URLPath string `json:"urlPath,omitempty"` + Body string `json:"body,omitempty"` + RedisCommand string `json:"redisCommand,omitempty"` + KeyPattern string `json:"keyPattern,omitempty"` +} + +func (ps ParsedStatement) ScopeEntities() []string { + if len(ps.TargetEntities) > 0 { + return ps.TargetEntities + } + if ps.TargetEntity == "" { + return nil + } + return []string{ps.TargetEntity} +} + +type Rule struct { + ID string `json:"id"` + Code string `json:"code,omitempty"` + Description string `json:"description"` + Scope RuleScope `json:"scope"` + Enabled bool `json:"enabled"` + Priority int `json:"priority"` + Action Action `json:"action"` + Reason string `json:"reason"` + When RuleCondition `json:"when"` + Builtin bool `json:"builtin"` +} + +type RuleScope struct { + DsTypes []string `json:"dsTypes,omitempty"` + DatasourceID string `json:"datasourceId,omitempty"` + Entity string `json:"entity,omitempty"` + EntityPattern string `json:"entityPattern,omitempty"` + KeyPattern string `json:"keyPattern,omitempty"` +} + +type RuleCondition struct { + Command []string `json:"command,omitempty"` + StatementPattern string `json:"statementPattern,omitempty"` + StatementNotPattern string `json:"statementNotPattern,omitempty"` + HasWhere *bool `json:"hasWhere,omitempty"` + SQLMultiStatement *bool `json:"sqlMultiStatement,omitempty"` + SQLParseFailed *bool `json:"sqlParseFailed,omitempty"` + HTTPMethod []string `json:"httpMethod,omitempty"` + PathPattern string `json:"pathPattern,omitempty"` + BodyPattern string `json:"bodyPattern,omitempty"` + BodyNotPattern string `json:"bodyNotPattern,omitempty"` + Any []RuleCondition `json:"any,omitempty"` + Not *RuleCondition `json:"not,omitempty"` +} + +type Engine struct { + mu sync.RWMutex + builtinRules []Rule + userRules []Rule +} + +type ruleMatch struct { + rule Rule + scopePriority int +} + +func NewEngine() *Engine { + return &Engine{builtinRules: BuiltinRules()} +} + +func (e *Engine) LoadBuiltinRules(rules []Rule) { + e.mu.Lock() + defer e.mu.Unlock() + for i := range rules { + rules[i].Builtin = true + } + e.builtinRules = rules +} + +func (e *Engine) LoadUserRules(rules []Rule) { + e.mu.Lock() + defer e.mu.Unlock() + e.userRules = append([]Rule(nil), rules...) +} + +func (e *Engine) ListRules() []Rule { + e.mu.RLock() + defer e.mu.RUnlock() + out := make([]Rule, 0, len(e.userRules)+len(e.builtinRules)) + out = append(out, e.userRules...) + out = append(out, e.builtinRules...) + return out +} + +func (e *Engine) Assess(dsType, datasourceID, statement string) RiskAssessment { + return e.AssessParsed(ParseStatement(dsType, datasourceID, statement)) +} + +func (e *Engine) AssessParsed(ps ParsedStatement) RiskAssessment { + e.mu.RLock() + defer e.mu.RUnlock() + matches := e.matchingRulesLocked(ps) + if len(matches) == 0 { + if strings.TrimSpace(ps.Raw) == "" || ps.IsQuery { + return RiskAssessment{Level: RiskLow, Action: ActionAllow} + } + if ps.DsType == "mysql" || ps.DsType == "postgresql" || ps.DsType == "d1" { + return RiskAssessment{ + Level: RiskHigh, + Action: ActionRequireApproval, + Reasons: []string{"unsupported SQL syntax requires review"}, + RuleID: "sql-require-approval-unsupported", + RuleCode: "SQL-014", + RuleDescription: "Require approval for unsupported SQL syntax", + Builtin: true, + } + } + return RiskAssessment{Level: RiskMedium, Action: ActionWarn, Reasons: []string{"statement requires review: no matching risk rule"}} + } + winner := matches[0].rule + return RiskAssessment{ + Level: actionToRiskLevel(winner.Action), + Action: winner.Action, + Reasons: buildReasons(winner), + RuleID: winner.ID, + RuleCode: winner.Code, + RuleDescription: winner.Description, + Builtin: winner.Builtin, + } +} + +func (e *Engine) matchingRulesLocked(ps ParsedStatement) []ruleMatch { + allRules := append(append([]Rule(nil), e.userRules...), e.builtinRules...) + var matches []ruleMatch + for _, rule := range allRules { + if !rule.Enabled { + continue + } + if !scopeMatches(rule.Scope, ps) { + continue + } + if !conditionMatches(rule.When, ps) { + continue + } + matches = append(matches, ruleMatch{rule: rule, scopePriority: scopePriority(rule.Scope)}) + } + sort.Slice(matches, func(i, j int) bool { + if matches[i].scopePriority != matches[j].scopePriority { + return matches[i].scopePriority > matches[j].scopePriority + } + if matches[i].rule.Priority != matches[j].rule.Priority { + return matches[i].rule.Priority > matches[j].rule.Priority + } + if matches[i].rule.Builtin != matches[j].rule.Builtin { + return !matches[i].rule.Builtin + } + return matches[i].rule.ID < matches[j].rule.ID + }) + return matches +} + +func ParseStatement(dsType, datasourceID, statement string) ParsedStatement { + typ := strings.ToLower(strings.TrimSpace(dsType)) + stmt := strings.TrimSpace(statement) + ps := ParsedStatement{Raw: stmt, DsType: typ, DatasourceID: datasourceID} + if stmt == "" { + return ps + } + switch typ { + case "mysql", "postgresql", "d1": + parseSQL(&ps) + case "mongodb": + parseMongo(&ps) + case "elasticsearch": + parseElasticsearch(&ps) + case "redis", "redis_cluster": + parseRedis(&ps) + case "dynamodb": + parseDynamoDB(&ps) + default: + ps.FirstKeyword = firstKeyword(stmt) + } + return ps +} + +func BuiltinRules() []Rule { + var out []Rule + out = append(out, sqlRules()...) + out = append(out, redisRules()...) + out = append(out, mongoRules()...) + out = append(out, elasticsearchRules()...) + out = append(out, dynamoRules()...) + for i := range out { + out[i].Builtin = true + if !out[i].Enabled { + out[i].Enabled = true + } + } + return out +} + +func parseSQL(ps *ParsedStatement) { + ps.SQLStatementCount = statementCount(ps.Raw) + ps.FirstKeyword = firstKeyword(ps.Raw) + ps.HasWhere = hasWhere(ps.Raw) + ps.TargetEntity = targetEntity(ps.Raw) + switch ps.FirstKeyword { + case "select", "show", "describe", "explain": + ps.IsQuery = true + case "": + ps.SQLParseFailed = true + } +} + +func parseRedis(ps *ParsedStatement) { + fields := strings.Fields(ps.Raw) + if len(fields) == 0 { + return + } + ps.RedisCommand = strings.ToUpper(fields[0]) + ps.FirstKeyword = strings.ToLower(fields[0]) + if len(fields) > 1 { + ps.KeyPattern = fields[1] + } + switch ps.RedisCommand { + case "GET", "MGET", "EXISTS", "TTL", "PTTL", "TYPE", "SCAN", "HGET", "HGETALL", "HMGET", "LLEN", "LRANGE", "SCARD", "SMEMBERS", "ZRANGE": + ps.IsQuery = true + } +} + +func parseMongo(ps *ParsedStatement) { + lower := strings.ToLower(ps.Raw) + if strings.HasPrefix(lower, "db.") { + parts := strings.SplitN(ps.Raw[3:], ".", 2) + if len(parts) == 2 { + ps.TargetEntity = strings.TrimSpace(parts[0]) + if idx := strings.Index(parts[1], "("); idx >= 0 { + ps.FirstKeyword = strings.ToLower(strings.TrimSpace(parts[1][:idx])) + } + } + } + switch ps.FirstKeyword { + case "find", "aggregate", "count", "distinct": + ps.IsQuery = true + } + if ps.FirstKeyword == "" { + ps.FirstKeyword = firstKeyword(ps.Raw) + } +} + +func parseElasticsearch(ps *ParsedStatement) { + lines := strings.SplitN(ps.Raw, "\n", 2) + head := strings.Fields(lines[0]) + if len(head) < 2 { + return + } + ps.HTTPMethod = strings.ToUpper(head[0]) + ps.FirstKeyword = strings.ToLower(ps.HTTPMethod) + ps.URLPath = head[1] + if len(lines) == 2 { + ps.Body = lines[1] + } + parts := strings.Split(strings.Trim(ps.URLPath, "/"), "/") + if len(parts) > 0 { + ps.TargetEntity = parts[0] + } + if ps.HTTPMethod == "GET" || ps.HTTPMethod == "HEAD" || (ps.HTTPMethod == "POST" && strings.Contains(ps.URLPath, "_search")) { + ps.IsQuery = true + } +} + +func parseDynamoDB(ps *ParsedStatement) { + ps.FirstKeyword = firstKeyword(ps.Raw) + ps.TargetEntity = targetEntity(ps.Raw) + ps.HasWhere = hasWhere(ps.Raw) + ps.IsQuery = ps.FirstKeyword == "select" +} + +func sqlRules() []Rule { + types := []string{"mysql", "postgresql", "d1"} + return []Rule{ + {ID: "sql-block-multi-statement", Code: "SQL-011", Description: "Block SQL batches containing more than one statement", Scope: RuleScope{DsTypes: types}, Enabled: true, Priority: 130, Action: ActionBlock, Reason: "multiple SQL statements are not allowed on agent execution paths", When: RuleCondition{SQLMultiStatement: boolPtr(true)}}, + {ID: "sql-require-approval-procedure-call", Code: "SQL-012", Description: "Require approval for stored procedure and procedural SQL calls", Scope: RuleScope{DsTypes: types}, Enabled: true, Priority: 115, Action: ActionRequireApproval, Reason: "stored procedure calls require review", When: RuleCondition{Command: []string{"call", "exec", "execute", "do"}}}, + {ID: "sql-allow-read", Code: "SQL-001", Description: "Allow SELECT, SHOW, DESCRIBE, EXPLAIN", Scope: RuleScope{DsTypes: types}, Enabled: true, Priority: 10, Action: ActionAllow, Reason: "read-only operation", When: RuleCondition{Command: []string{"select", "show", "describe", "explain"}}}, + {ID: "sql-block-drop-truncate", Code: "SQL-003", Description: "Block DROP and TRUNCATE", Scope: RuleScope{DsTypes: types}, Enabled: true, Priority: 100, Action: ActionBlock, Reason: "destructive DDL", When: RuleCondition{Command: []string{"drop", "truncate"}}}, + {ID: "sql-block-permission-change", Code: "SQL-004", Description: "Block GRANT and REVOKE", Scope: RuleScope{DsTypes: types}, Enabled: true, Priority: 100, Action: ActionBlock, Reason: "permission change", When: RuleCondition{Command: []string{"grant", "revoke"}}}, + {ID: "sql-block-delete-no-where", Code: "SQL-005", Description: "Block DELETE without WHERE", Scope: RuleScope{DsTypes: types}, Enabled: true, Priority: 90, Action: ActionBlock, Reason: "DELETE without WHERE", When: RuleCondition{Command: []string{"delete"}, HasWhere: boolPtr(false)}}, + {ID: "sql-block-update-no-where", Code: "SQL-006", Description: "Block UPDATE without WHERE", Scope: RuleScope{DsTypes: types}, Enabled: true, Priority: 90, Action: ActionBlock, Reason: "UPDATE without WHERE", When: RuleCondition{Command: []string{"update"}, HasWhere: boolPtr(false)}}, + {ID: "sql-warn-delete", Code: "SQL-007", Description: "Warn on DELETE with WHERE", Scope: RuleScope{DsTypes: types}, Enabled: true, Priority: 50, Action: ActionWarn, Reason: "DELETE", When: RuleCondition{Command: []string{"delete"}, HasWhere: boolPtr(true)}}, + {ID: "sql-warn-update", Code: "SQL-008", Description: "Warn on UPDATE with WHERE", Scope: RuleScope{DsTypes: types}, Enabled: true, Priority: 50, Action: ActionWarn, Reason: "UPDATE", When: RuleCondition{Command: []string{"update"}, HasWhere: boolPtr(true)}}, + {ID: "sql-warn-insert", Code: "SQL-009", Description: "Warn on INSERT/REPLACE", Scope: RuleScope{DsTypes: types}, Enabled: true, Priority: 40, Action: ActionWarn, Reason: "INSERT/REPLACE", When: RuleCondition{Command: []string{"insert", "replace"}}}, + {ID: "sql-warn-ddl", Code: "SQL-010", Description: "Warn on ALTER/CREATE", Scope: RuleScope{DsTypes: types}, Enabled: true, Priority: 40, Action: ActionWarn, Reason: "DDL", When: RuleCondition{Command: []string{"alter", "create"}}}, + } +} + +func redisRules() []Rule { + types := []string{"redis", "redis_cluster"} + return []Rule{ + {ID: "redis-allow-read", Code: "REDIS-001", Description: "Allow common Redis read commands", Scope: RuleScope{DsTypes: types}, Enabled: true, Priority: 10, Action: ActionAllow, Reason: "read-only Redis command", When: RuleCondition{Command: []string{"get", "mget", "exists", "ttl", "pttl", "type", "scan", "hget", "hgetall", "hmget", "llen", "lrange", "scard", "smembers", "zrange"}}}, + {ID: "redis-block-dangerous", Code: "REDIS-002", Description: "Block destructive Redis commands", Scope: RuleScope{DsTypes: types}, Enabled: true, Priority: 100, Action: ActionBlock, Reason: "destructive Redis command", When: RuleCondition{Command: []string{"flushall", "flushdb", "shutdown", "config", "script", "eval", "evalsha"}}}, + {ID: "redis-warn-write", Code: "REDIS-003", Description: "Warn on Redis write commands", Scope: RuleScope{DsTypes: types}, Enabled: true, Priority: 20, Action: ActionWarn, Reason: "Redis write command", When: RuleCondition{}}, + } +} + +func mongoRules() []Rule { + return []Rule{ + {ID: "mongo-allow-read", Code: "MONGO-001", Description: "Allow MongoDB read actions", Scope: RuleScope{DsTypes: []string{"mongodb"}}, Enabled: true, Priority: 10, Action: ActionAllow, Reason: "read-only MongoDB action", When: RuleCondition{Command: []string{"find", "aggregate", "count", "distinct"}}}, + {ID: "mongo-block-drop", Code: "MONGO-002", Description: "Block MongoDB drop operations", Scope: RuleScope{DsTypes: []string{"mongodb"}}, Enabled: true, Priority: 100, Action: ActionBlock, Reason: "destructive MongoDB action", When: RuleCondition{Command: []string{"drop", "dropdatabase", "dropcollection"}}}, + {ID: "mongo-warn-write", Code: "MONGO-003", Description: "Warn on MongoDB write actions", Scope: RuleScope{DsTypes: []string{"mongodb"}}, Enabled: true, Priority: 20, Action: ActionWarn, Reason: "MongoDB write action", When: RuleCondition{}}, + } +} + +func elasticsearchRules() []Rule { + return []Rule{ + {ID: "es-allow-read", Code: "ES-001", Description: "Allow Elasticsearch read and search requests", Scope: RuleScope{DsTypes: []string{"elasticsearch"}}, Enabled: true, Priority: 10, Action: ActionAllow, Reason: "read-only Elasticsearch request", When: RuleCondition{Any: []RuleCondition{{HTTPMethod: []string{"GET", "HEAD"}}, {HTTPMethod: []string{"POST"}, PathPattern: `_search`}}}}, + {ID: "es-block-delete-index", Code: "ES-002", Description: "Block Elasticsearch DELETE index requests", Scope: RuleScope{DsTypes: []string{"elasticsearch"}}, Enabled: true, Priority: 100, Action: ActionBlock, Reason: "destructive Elasticsearch request", When: RuleCondition{HTTPMethod: []string{"DELETE"}}}, + {ID: "es-warn-write", Code: "ES-003", Description: "Warn on Elasticsearch write requests", Scope: RuleScope{DsTypes: []string{"elasticsearch"}}, Enabled: true, Priority: 20, Action: ActionWarn, Reason: "Elasticsearch write request", When: RuleCondition{}}, + } +} + +func dynamoRules() []Rule { + return []Rule{ + {ID: "dynamodb-allow-read", Code: "DDB-001", Description: "Allow DynamoDB PartiQL SELECT", Scope: RuleScope{DsTypes: []string{"dynamodb"}}, Enabled: true, Priority: 10, Action: ActionAllow, Reason: "read-only DynamoDB statement", When: RuleCondition{Command: []string{"select"}}}, + {ID: "dynamodb-warn-write", Code: "DDB-002", Description: "Warn on DynamoDB write statements", Scope: RuleScope{DsTypes: []string{"dynamodb"}}, Enabled: true, Priority: 20, Action: ActionWarn, Reason: "DynamoDB write statement", When: RuleCondition{}}, + } +} + +func conditionMatches(cond RuleCondition, ps ParsedStatement) bool { + if isEmptyCondition(cond) { + return true + } + if len(cond.Any) > 0 { + ok := false + for _, sub := range cond.Any { + if conditionMatches(sub, ps) { + ok = true + break + } + } + if !ok { + return false + } + cond.Any = nil + } + if cond.Not != nil { + if conditionMatches(*cond.Not, ps) { + return false + } + cond.Not = nil + } + if len(cond.Command) > 0 && !matchCommand(cond.Command, ps) { + return false + } + if len(cond.HTTPMethod) > 0 && !matchAnyFold(cond.HTTPMethod, ps.HTTPMethod) { + return false + } + if cond.HasWhere != nil && ps.HasWhere != *cond.HasWhere { + return false + } + if cond.SQLMultiStatement != nil && (ps.SQLStatementCount > 1) != *cond.SQLMultiStatement { + return false + } + if cond.SQLParseFailed != nil && ps.SQLParseFailed != *cond.SQLParseFailed { + return false + } + if !regexMatches(cond.StatementPattern, ps.Raw, false) { + return false + } + if !regexMatches(cond.StatementNotPattern, ps.Raw, true) { + return false + } + if !regexMatches(cond.PathPattern, ps.URLPath, false) { + return false + } + if !regexMatches(cond.BodyPattern, ps.Body, false) { + return false + } + if !regexMatches(cond.BodyNotPattern, ps.Body, true) { + return false + } + return true +} + +func scopeMatches(scope RuleScope, ps ParsedStatement) bool { + if len(scope.DsTypes) > 0 && !matchAnyFold(scope.DsTypes, ps.DsType) { + return false + } + if scope.DatasourceID != "" && scope.DatasourceID != ps.DatasourceID { + return false + } + if scope.Entity != "" && !entityMatches(scope.Entity, ps.ScopeEntities()) { + return false + } + if scope.EntityPattern != "" && !patternMatchesAny(scope.EntityPattern, ps.ScopeEntities()) { + return false + } + if scope.KeyPattern != "" && !wildcardMatch(scope.KeyPattern, ps.KeyPattern) { + return false + } + return true +} + +func firstKeyword(stmt string) string { + fields := strings.Fields(strings.TrimSpace(stmt)) + if len(fields) == 0 { + return "" + } + return strings.ToLower(strings.Trim(fields[0], ";")) +} + +func hasWhere(stmt string) bool { + return regexp.MustCompile(`(?is)\bwhere\b`).MatchString(stmt) +} + +func statementCount(stmt string) int { + count := 0 + for _, part := range strings.Split(stmt, ";") { + if strings.TrimSpace(part) != "" { + count++ + } + } + return count +} + +func targetEntity(stmt string) string { + re := regexp.MustCompile(`(?is)\b(?:from|into|update|table)\s+([a-zA-Z0-9_."` + "`" + `-]+)`) + m := re.FindStringSubmatch(stmt) + if len(m) < 2 { + return "" + } + return strings.Trim(m[1], "`\"") +} + +func matchCommand(commands []string, ps ParsedStatement) bool { + target := ps.FirstKeyword + if ps.RedisCommand != "" { + target = strings.ToLower(ps.RedisCommand) + } + return matchAnyFold(commands, target) +} + +func matchAnyFold(items []string, value string) bool { + value = strings.TrimSpace(value) + for _, item := range items { + if strings.EqualFold(strings.TrimSpace(item), value) { + return true + } + } + return false +} + +func regexMatches(pattern, value string, negative bool) bool { + if pattern == "" { + return true + } + re, err := regexp.Compile(pattern) + if err != nil { + return false + } + matched := re.MatchString(value) + if negative { + return !matched + } + return matched +} + +func entityMatches(want string, entities []string) bool { + for _, entity := range entities { + if strings.EqualFold(strings.TrimSpace(want), strings.TrimSpace(entity)) { + return true + } + } + return false +} + +func patternMatchesAny(pattern string, entities []string) bool { + for _, entity := range entities { + if wildcardMatch(pattern, entity) { + return true + } + } + return false +} + +func wildcardMatch(pattern, value string) bool { + pattern = strings.TrimSpace(pattern) + value = strings.TrimSpace(value) + if pattern == "" { + return true + } + pattern = regexp.QuoteMeta(pattern) + pattern = strings.ReplaceAll(pattern, `\*`, ".*") + re, err := regexp.Compile("(?i)^" + pattern + "$") + if err != nil { + return false + } + return re.MatchString(value) +} + +func isEmptyCondition(cond RuleCondition) bool { + return len(cond.Command) == 0 && + len(cond.HTTPMethod) == 0 && + cond.HasWhere == nil && + cond.SQLMultiStatement == nil && + cond.SQLParseFailed == nil && + cond.StatementPattern == "" && + cond.StatementNotPattern == "" && + cond.PathPattern == "" && + cond.BodyPattern == "" && + cond.BodyNotPattern == "" && + len(cond.Any) == 0 && + cond.Not == nil +} + +func scopePriority(scope RuleScope) int { + score := 0 + if scope.DatasourceID != "" { + score += 100 + } + if scope.Entity != "" { + score += 50 + } + if scope.EntityPattern != "" { + score += 25 + } + if scope.KeyPattern != "" { + score += 10 + } + return score +} + +func actionToRiskLevel(action Action) RiskLevel { + switch action { + case ActionBlock, ActionRequireApproval: + return RiskHigh + case ActionWarn: + return RiskMedium + case ActionAllow: + return RiskLow + default: + return RiskMedium + } +} + +func buildReasons(rule Rule) []string { + if rule.Reason != "" { + return []string{rule.Reason} + } + if rule.Description != "" { + return []string{rule.Description} + } + return nil +} + +func boolPtr(v bool) *bool { + return &v +} diff --git a/pkg/riskengine/riskengine_test.go b/pkg/riskengine/riskengine_test.go new file mode 100644 index 0000000..ca9b5fe --- /dev/null +++ b/pkg/riskengine/riskengine_test.go @@ -0,0 +1,40 @@ +package riskengine + +import "testing" + +func TestSQLBuiltinRules(t *testing.T) { + engine := NewEngine() + cases := []struct { + stmt string + want Action + }{ + {"select * from users where id = 1", ActionAllow}, + {"delete from users", ActionBlock}, + {"update users set name = 'x' where id = 1", ActionWarn}, + {"drop table users", ActionBlock}, + } + for _, tc := range cases { + got := engine.Assess("postgresql", "prod", tc.stmt) + if got.Action != tc.want { + t.Fatalf("%q action = %s, want %s (%+v)", tc.stmt, got.Action, tc.want, got) + } + } +} + +func TestUserRuleOverridesBuiltinByScopeAndPriority(t *testing.T) { + engine := NewEngine() + engine.LoadUserRules([]Rule{{ + ID: "prod-users-read-approval", + Description: "Require approval for users reads in prod", + Scope: RuleScope{DatasourceID: "prod", Entity: "users"}, + Enabled: true, + Priority: 500, + Action: ActionRequireApproval, + Reason: "sensitive table", + When: RuleCondition{Command: []string{"select"}}, + }}) + got := engine.Assess("postgresql", "prod", "select * from users") + if got.Action != ActionRequireApproval || got.RuleID != "prod-users-read-approval" { + t.Fatalf("unexpected assessment: %+v", got) + } +} diff --git a/release-verification/verify-checksums.sh b/release-verification/verify-checksums.sh new file mode 100644 index 0000000..b6e84f6 --- /dev/null +++ b/release-verification/verify-checksums.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash +set -euo pipefail + +if [ "$#" -ne 1 ]; then + echo "usage: $0 " >&2 + exit 2 +fi + +cd "$1" +if [ ! -f SHA256SUMS.txt ]; then + echo "missing SHA256SUMS.txt" >&2 + exit 1 +fi + +shasum -a 256 -c SHA256SUMS.txt