From cc3ebfa56a7072ce8d8f803e7cda3d6bd8554920 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 17 Nov 2025 19:54:07 +0000 Subject: [PATCH 01/37] Bump vega from 5.33.0 to 6.2.0 Bumps [vega](https://github.com/vega/vega) from 5.33.0 to 6.2.0. - [Release notes](https://github.com/vega/vega/releases) - [Commits](https://github.com/vega/vega/compare/v5.33.0...v6.2.0) --- updated-dependencies: - dependency-name: vega dependency-version: 6.2.0 dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- package.json | 2 +- yarn.lock | 537 +++++++++++++++++++++++++-------------------------- 2 files changed, 259 insertions(+), 280 deletions(-) diff --git a/package.json b/package.json index e8d88ab..66bfcfd 100644 --- a/package.json +++ b/package.json @@ -44,7 +44,7 @@ "redux-persist": "^6.0.0", "typescript": "^4.9.5", "validator": "^13.15.20", - "vega": "^5.32.0", + "vega": "^6.2.0", "vega-embed": "^6.21.0", "vega-lite": "^5.5.0", "vm-browserify": "^1.1.2" diff --git a/yarn.lock b/yarn.lock index 58d7257..77eb726 100644 --- a/yarn.lock +++ b/yarn.lock @@ -1134,21 +1134,16 @@ dependencies: dompurify "*" -"@types/estree@1.0.8", "@types/estree@^1.0.0", "@types/estree@^1.0.6": +"@types/estree@1.0.8", "@types/estree@^1.0.0", "@types/estree@^1.0.6", "@types/estree@^1.0.8": version "1.0.8" resolved "https://registry.npmjs.org/@types/estree/-/estree-1.0.8.tgz" integrity sha512-dWHzHa2WqEXI/O1E9OjrocMTKJl2mSrEolh1Iomrv6U+JuNwaHXsXx9bLu5gG7BUWFIN0skIQJQ/L1rIex4X6w== -"@types/geojson@*": +"@types/geojson@*", "@types/geojson@7946.0.16": version "7946.0.16" resolved "https://registry.npmjs.org/@types/geojson/-/geojson-7946.0.16.tgz" integrity sha512-6C8nqWur3j98U6+lXDfTUWIfgvZU+EumvpHKcYjujKH7woYyLj2sUmff0tRhrqM7BohUw7Pz3ZB1jj2gW9Fvmg== -"@types/geojson@7946.0.4": - version "7946.0.4" - resolved "https://registry.npmjs.org/@types/geojson/-/geojson-7946.0.4.tgz" - integrity sha512-MHmwBtCb7OCv1DSivz2UNJXPGU/1btAWRKlqJ2saEhVJkpkvqHMMaOpKg0v4sAbDWSQekHGvPVMM8nQ+Jen03Q== - "@types/hoist-non-react-statics@^3.3.1": version "3.3.1" resolved "https://registry.npmjs.org/@types/hoist-non-react-statics/-/hoist-non-react-statics-3.3.1.tgz" @@ -1911,7 +1906,7 @@ cytoscape@^3.29.3: dependencies: internmap "^1.0.0" -"d3-array@1 - 3", "d3-array@2 - 3", d3-array@3, d3-array@3.2.4, d3-array@^3.2.0, d3-array@^3.2.2: +"d3-array@1 - 3", "d3-array@2 - 3", d3-array@3, d3-array@3.2.4, d3-array@^3.2.0, d3-array@^3.2.4: version "3.2.4" resolved "https://registry.npmjs.org/d3-array/-/d3-array-3.2.4.tgz" integrity sha512-tdQAmyA18i4J7wprpYq8ClcxZy3SC31QMeByyCFyRt7BVHdREQZ5lpzoe5mFEYZUWe+oq8HBvk9JjpibyEV4Jg== @@ -1960,7 +1955,7 @@ d3-contour@4: dependencies: d3-array "^3.2.0" -d3-delaunay@6, d3-delaunay@^6.0.2: +d3-delaunay@6, d3-delaunay@^6.0.4: version "6.0.4" resolved "https://registry.npmjs.org/d3-delaunay/-/d3-delaunay-6.0.4.tgz" integrity sha512-mdjtIZ1XLAM8bm/hx3WwjfHt6Sggek7qH043O8KEjDXN40xi3vx/6pYSVTwLjEgiXQTbvaouWKynLBiUZ6SK6A== @@ -2024,7 +2019,7 @@ d3-geo-projection@^4.0.0: d3-array "1 - 3" d3-geo "1.12.0 - 3" -"d3-geo@1.12.0 - 3", d3-geo@3, d3-geo@^3.1.0: +"d3-geo@1.12.0 - 3", d3-geo@3, d3-geo@^3.1.1: version "3.1.1" resolved "https://registry.npmjs.org/d3-geo/-/d3-geo-3.1.1.tgz" integrity sha512-637ln3gXKXOwhalDzinUgY83KzNWZRKbYubaG+fGVuc/dxO64RRljtCTnf5ecMyE1RIdtqpkVcq0IbtU2S8j2Q== @@ -3694,13 +3689,6 @@ node-addon-api@^7.0.0: resolved "https://registry.npmjs.org/node-addon-api/-/node-addon-api-7.1.1.tgz" integrity sha512-5m3bsyrjFWE1xf7nz7YXdN4udnVtXK6/Yfgn5qnahL6bCkf2yKt4k3nuTKAtT4r3IG8JNR2ncsIMdZuAzJjHQQ== -node-fetch@^2.6.7: - version "2.7.0" - resolved "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz" - integrity sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A== - dependencies: - whatwg-url "^5.0.0" - normalize-path@^3.0.0: version "3.0.0" resolved "https://registry.npmjs.org/normalize-path/-/normalize-path-3.0.0.tgz" @@ -4649,11 +4637,6 @@ topojson-client@^3.1.0: dependencies: commander "2" -tr46@~0.0.3: - version "0.0.3" - resolved "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz" - integrity sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw== - "traverse@>=0.3.0 <0.4": version "0.3.9" resolved "https://registry.npmjs.org/traverse/-/traverse-0.3.9.tgz" @@ -4823,28 +4806,28 @@ validator@^13.15.20: resolved "https://registry.npmjs.org/validator/-/validator-13.15.20.tgz" integrity sha512-KxPOq3V2LmfQPP4eqf3Mq/zrT0Dqp2Vmx2Bn285LwVahLc+CsxOM0crBHczm8ijlcjZ0Q5Xd6LW3z3odTPnlrw== -vega-canvas@^1.2.7: - version "1.2.7" - resolved "https://registry.npmjs.org/vega-canvas/-/vega-canvas-1.2.7.tgz" - integrity sha512-OkJ9CACVcN9R5Pi9uF6MZBF06pO6qFpDYHWSKBJsdHP5o724KrsgR6UvbnXFH82FdsiTOff/HqjuaG8C7FL+9Q== +vega-canvas@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/vega-canvas/-/vega-canvas-2.0.0.tgz#4709deb68f9b4fd7475957bed99f16c38dbc07b8" + integrity sha512-9x+4TTw/USYST5nx4yN272sy9WcqSRjAR0tkQYZJ4cQIeon7uVsnohvoPQK1JZu7K1QXGUqzj08z0u/UegBVMA== -vega-crossfilter@~4.1.3: - version "4.1.3" - resolved "https://registry.npmjs.org/vega-crossfilter/-/vega-crossfilter-4.1.3.tgz" - integrity sha512-nyPJAXAUABc3EocUXvAL1J/IWotZVsApIcvOeZaUdEQEtZ7bt8VtP2nj3CLbHBA8FZZVV+K6SmdwvCOaAD4wFQ== +vega-crossfilter@~5.1.0: + version "5.1.0" + resolved "https://registry.yarnpkg.com/vega-crossfilter/-/vega-crossfilter-5.1.0.tgz#f4c56d9e0c31705cae41cd0e35abcdee20c0c483" + integrity sha512-EmVhfP3p6AM7o/lPan/QAoqjblI19BxWUlvl2TSs0xjQd8KbaYYbS4Ixt3cmEvl0QjRdBMF6CdJJ/cy9DTS4Fw== dependencies: - d3-array "^3.2.2" - vega-dataflow "^5.7.7" - vega-util "^1.17.3" + d3-array "^3.2.4" + vega-dataflow "^6.1.0" + vega-util "^2.1.0" -vega-dataflow@^5.7.7, vega-dataflow@^5.7.8, vega-dataflow@~5.7.7: - version "5.7.8" - resolved "https://registry.npmjs.org/vega-dataflow/-/vega-dataflow-5.7.8.tgz" - integrity sha512-jrllcIjSYU5Jh130RDR44o/SbUbJndLuoiM9IsKWW+a7HayKnfmbdHWm7MvCrj/YLupFZVojRaS1tTs53EXTdA== +vega-dataflow@^6.1.0, vega-dataflow@~6.1.0: + version "6.1.0" + resolved "https://registry.yarnpkg.com/vega-dataflow/-/vega-dataflow-6.1.0.tgz#1fc48ea6bbbe002d45a1a48eee67aea097a57c55" + integrity sha512-JxumGlODtFbzoQ4c/jQK8Tb/68ih0lrexlCozcMfTAwQ12XhTqCvlafh7MAKKTMBizjOfaQTHm4Jkyb1H5CfyQ== dependencies: - vega-format "^1.1.4" - vega-loader "^4.5.4" - vega-util "^1.17.4" + vega-format "^2.1.0" + vega-loader "^5.1.0" + vega-util "^2.1.0" vega-embed@6.5.1: version "6.5.1" @@ -4872,29 +4855,34 @@ vega-embed@^6.21.0: vega-themes "^2.15.0" vega-tooltip "^0.35.2" -vega-encode@~4.10.2: - version "4.10.2" - resolved "https://registry.npmjs.org/vega-encode/-/vega-encode-4.10.2.tgz" - integrity sha512-fsjEY1VaBAmqwt7Jlpz0dpPtfQFiBdP9igEefvumSpy7XUxOJmDQcRDnT3Qh9ctkv3itfPfI9g8FSnGcv2b4jQ== +vega-encode@~5.1.0: + version "5.1.0" + resolved "https://registry.yarnpkg.com/vega-encode/-/vega-encode-5.1.0.tgz#05f56b898822e09df96a5ca7f1017b9f9a1c4d3b" + integrity sha512-q26oI7B+MBQYcTQcr5/c1AMsX3FvjZLQOBi7yI0vV+GEn93fElDgvhQiYrgeYSD4Exi/jBPeUXuN6p4bLz16kA== dependencies: - d3-array "^3.2.2" + d3-array "^3.2.4" d3-interpolate "^3.0.1" - vega-dataflow "^5.7.7" - vega-scale "^7.4.2" - vega-util "^1.17.3" + vega-dataflow "^6.1.0" + vega-scale "^8.1.0" + vega-util "^2.1.0" + +vega-event-selector@^4.0.0, vega-event-selector@~4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/vega-event-selector/-/vega-event-selector-4.0.0.tgz#425e9f2671e858a1a45b4b6a7fc452ca0b22abbf" + integrity sha512-CcWF4m4KL/al1Oa5qSzZ5R776q8lRxCj3IafCHs5xipoEHrkgu1BWa7F/IH5HrDNXeIDnqOpSV1pFsAWRak4gQ== -vega-event-selector@^3.0.1, vega-event-selector@~3.0.1: +vega-event-selector@~3.0.1: version "3.0.1" resolved "https://registry.npmjs.org/vega-event-selector/-/vega-event-selector-3.0.1.tgz" integrity sha512-K5zd7s5tjr1LiOOkjGpcVls8GsH/f2CWCrWcpKy74gTCp+llCdwz0Enqo013ZlGaRNjfgD/o1caJRt3GSaec4A== -vega-expression@^5.2.0, vega-expression@^5.2.1, vega-expression@~5.2.0: - version "5.2.1" - resolved "https://registry.npmjs.org/vega-expression/-/vega-expression-5.2.1.tgz" - integrity sha512-9KKbI2q9qTI55NSjD/dVWg3aeCtw+gwyWCiLMM47ha6iXrAN9pQ+EKRJfxOHuoDfCTlJJTaUfnnXgbqm0HEszg== +vega-expression@^6.1.0, vega-expression@~6.1.0: + version "6.1.0" + resolved "https://registry.yarnpkg.com/vega-expression/-/vega-expression-6.1.0.tgz#6ce358a39b9b953806bff200f6f84f44163c9e38" + integrity sha512-hHgNx/fQ1Vn1u6vHSamH7lRMsOa/yQeHGGcWVmh8fZafLdwdhCM91kZD9p7+AleNpgwiwzfGogtpATFaMmDFYg== dependencies: - "@types/estree" "^1.0.0" - vega-util "^1.17.4" + "@types/estree" "^1.0.8" + vega-util "^2.1.0" vega-expression@~5.1.1: version "5.1.2" @@ -4904,65 +4892,65 @@ vega-expression@~5.1.1: "@types/estree" "^1.0.0" vega-util "^1.17.3" -vega-force@~4.2.2: - version "4.2.2" - resolved "https://registry.npmjs.org/vega-force/-/vega-force-4.2.2.tgz" - integrity sha512-cHZVaY2VNNIG2RyihhSiWniPd2W9R9kJq0znxzV602CgUVgxEfTKtx/lxnVCn8nNrdKAYrGiqIsBzIeKG1GWHw== +vega-force@~5.1.0: + version "5.1.0" + resolved "https://registry.yarnpkg.com/vega-force/-/vega-force-5.1.0.tgz#aa7cf8edbe2ae3bada070f343565dfb841e501a9" + integrity sha512-wdnchOSeXpF9Xx8Yp0s6Do9F7YkFeOn/E/nENtsI7NOcyHpICJ5+UkgjUo9QaQ/Yu+dIDU+sP/4NXsUtq6SMaQ== dependencies: d3-force "^3.0.0" - vega-dataflow "^5.7.7" - vega-util "^1.17.3" + vega-dataflow "^6.1.0" + vega-util "^2.1.0" -vega-format@^1.1.3, vega-format@^1.1.4, vega-format@~1.1.3: - version "1.1.4" - resolved "https://registry.npmjs.org/vega-format/-/vega-format-1.1.4.tgz" - integrity sha512-+oz6UvXjQSbweW9P8q+1o2qFYyBYPFax94j6a9PQMnCIWMovFSss1wEElljOT8CEpnHyS15yiGlmz4qbWTQwnQ== +vega-format@^2.1.0, vega-format@~2.1.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/vega-format/-/vega-format-2.1.0.tgz#4652c7ec9fb1b7ff9a2c50dcd498a36ba6146fda" + integrity sha512-i9Ht33IgqG36+S1gFDpAiKvXCPz+q+1vDhDGKK8YsgMxGOG4PzinKakI66xd7SdV4q97FgpR7odAXqtDN2wKqw== dependencies: - d3-array "^3.2.2" + d3-array "^3.2.4" d3-format "^3.1.0" d3-time-format "^4.1.0" - vega-time "^2.1.4" - vega-util "^1.17.4" + vega-time "^3.1.0" + vega-util "^2.1.0" -vega-functions@^5.18.0, vega-functions@~5.18.0: - version "5.18.1" - resolved "https://registry.npmjs.org/vega-functions/-/vega-functions-5.18.1.tgz" - integrity sha512-qEBAbo0jxGGebRvbX1zmxzmjwFz8/UtncRhzwk9/KcI0WudULNmCM1iTu+DGFRnNHdcKi6kUlwJBPIp7zDu3HQ== +vega-functions@^6.1.0, vega-functions@~6.1.0: + version "6.1.0" + resolved "https://registry.yarnpkg.com/vega-functions/-/vega-functions-6.1.0.tgz#d3726c46744f8a9769df5954a41a3d42fefad57e" + integrity sha512-yooEbWt0FWMBNoohwLsl25lEh08WsWabTXbbS+q0IXZzWSpX4Cyi45+q7IFyy/2L4oaIfGIIV14dgn3srQQcGA== dependencies: - d3-array "^3.2.2" + d3-array "^3.2.4" d3-color "^3.1.0" - d3-geo "^3.1.0" - vega-dataflow "^5.7.8" - vega-expression "^5.2.1" - vega-scale "^7.4.3" - vega-scenegraph "^4.13.2" - vega-selections "^5.6.1" - vega-statistics "^1.9.0" - vega-time "^2.1.4" - vega-util "^1.17.4" - -vega-geo@~4.4.3: - version "4.4.3" - resolved "https://registry.npmjs.org/vega-geo/-/vega-geo-4.4.3.tgz" - integrity sha512-+WnnzEPKIU1/xTFUK3EMu2htN35gp9usNZcC0ZFg2up1/Vqu6JyZsX0PIO51oXSIeXn9bwk6VgzlOmJUcx92tA== - dependencies: - d3-array "^3.2.2" + d3-geo "^3.1.1" + vega-dataflow "^6.1.0" + vega-expression "^6.1.0" + vega-scale "^8.1.0" + vega-scenegraph "^5.1.0" + vega-selections "^6.1.0" + vega-statistics "^2.0.0" + vega-time "^3.1.0" + vega-util "^2.1.0" + +vega-geo@~5.1.0: + version "5.1.0" + resolved "https://registry.yarnpkg.com/vega-geo/-/vega-geo-5.1.0.tgz#d8fe6ae912ad27cd2b1c21f545a74c07da093589" + integrity sha512-H8aBBHfthc3rzDbz/Th18+Nvp00J73q3uXGAPDQqizioDm/CoXCK8cX4pMePydBY9S6ikBiGJrLKFDa80wI20g== + dependencies: + d3-array "^3.2.4" d3-color "^3.1.0" - d3-geo "^3.1.0" - vega-canvas "^1.2.7" - vega-dataflow "^5.7.7" - vega-projection "^1.6.2" - vega-statistics "^1.9.0" - vega-util "^1.17.3" - -vega-hierarchy@~4.1.3: - version "4.1.3" - resolved "https://registry.npmjs.org/vega-hierarchy/-/vega-hierarchy-4.1.3.tgz" - integrity sha512-0Z+TYKRgOEo8XYXnJc2HWg1EGpcbNAhJ9Wpi9ubIbEyEHqIgjCIyFVN8d4nSfsJOcWDzsSmRqohBztxAhOCSaw== + d3-geo "^3.1.1" + vega-canvas "^2.0.0" + vega-dataflow "^6.1.0" + vega-projection "^2.1.0" + vega-statistics "^2.0.0" + vega-util "^2.1.0" + +vega-hierarchy@~5.1.0: + version "5.1.0" + resolved "https://registry.yarnpkg.com/vega-hierarchy/-/vega-hierarchy-5.1.0.tgz#423770dd1cb4684370f23a688dc5b6dad1399dbf" + integrity sha512-rZlU8QJNETlB6o73lGCPybZtw2fBBsRIRuFE77aCLFHdGsh6wIifhplVarqE9icBqjUHRRUOmcEYfzwVIPr65g== dependencies: d3-hierarchy "^3.1.2" - vega-dataflow "^5.7.7" - vega-util "^1.17.3" + vega-dataflow "^6.1.0" + vega-util "^2.1.0" vega-interpreter@^1.0.5: version "1.2.1" @@ -4971,15 +4959,15 @@ vega-interpreter@^1.0.5: dependencies: vega-util "^1.17.4" -vega-label@~1.3.1: - version "1.3.1" - resolved "https://registry.npmjs.org/vega-label/-/vega-label-1.3.1.tgz" - integrity sha512-Emx4b5s7pvuRj3fBkAJ/E2snCoZACfKAwxVId7f/4kYVlAYLb5Swq6W8KZHrH4M9Qds1XJRUYW9/Y3cceqzEFA== +vega-label@~2.1.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/vega-label/-/vega-label-2.1.0.tgz#bd977cd14e9b062fce31593a2db2819aa9efb2c9" + integrity sha512-/hgf+zoA3FViDBehrQT42Lta3t8In6YwtMnwjYlh72zNn1p3c7E3YUBwqmAqTM1x+tudgzMRGLYig+bX1ewZxQ== dependencies: - vega-canvas "^1.2.7" - vega-dataflow "^5.7.7" - vega-scenegraph "^4.13.1" - vega-util "^1.17.3" + vega-canvas "^2.0.0" + vega-dataflow "^6.1.0" + vega-scenegraph "^5.1.0" + vega-util "^2.1.0" vega-lite@^5.5.0: version "5.23.0" @@ -4993,78 +4981,77 @@ vega-lite@^5.5.0: vega-util "~1.17.2" yargs "~17.7.2" -vega-loader@^4.5.4, vega-loader@~4.5.3: - version "4.5.4" - resolved "https://registry.npmjs.org/vega-loader/-/vega-loader-4.5.4.tgz" - integrity sha512-AOJPsDVz009aTdD9hzigUaO/NFmuN1o83rzvZu/g37TJfhU+3DOvgnO0rnqJbnSOfcBkLWER6XghlKS3j77w4A== +vega-loader@^5.1.0, vega-loader@~5.1.0: + version "5.1.0" + resolved "https://registry.yarnpkg.com/vega-loader/-/vega-loader-5.1.0.tgz#69378fc4d46e8d4573ad308f76464e66b02579e6" + integrity sha512-GaY3BdSPbPNdtrBz8SYUBNmNd8mdPc3mtdZfdkFazQ0RD9m+Toz5oR8fKnTamNSk9fRTJX0Lp3uEqxrAlQVreg== dependencies: d3-dsv "^3.0.1" - node-fetch "^2.6.7" topojson-client "^3.1.0" - vega-format "^1.1.4" - vega-util "^1.17.4" + vega-format "^2.1.0" + vega-util "^2.1.0" -vega-parser@~6.6.0: - version "6.6.0" - resolved "https://registry.npmjs.org/vega-parser/-/vega-parser-6.6.0.tgz" - integrity sha512-jltyrwCTtWeidi/6VotLCybhIl+ehwnzvFWYOdWNUP0z/EskdB64YmawNwjCjzTBMemeiQtY6sJPPbewYqe3Vg== +vega-parser@~7.1.0: + version "7.1.0" + resolved "https://registry.yarnpkg.com/vega-parser/-/vega-parser-7.1.0.tgz#20ee0e70a6ecdb8cb34ef16deed484ad68c40850" + integrity sha512-g0lrYxtmYVW8G6yXpIS4J3Uxt9OUSkc0bLu5afoYDo4rZmoOOdll3x3ebActp5LHPW+usZIE+p5nukRS2vEc7Q== dependencies: - vega-dataflow "^5.7.7" - vega-event-selector "^3.0.1" - vega-functions "^5.18.0" - vega-scale "^7.4.2" - vega-util "^1.17.3" + vega-dataflow "^6.1.0" + vega-event-selector "^4.0.0" + vega-functions "^6.1.0" + vega-scale "^8.1.0" + vega-util "^2.1.0" -vega-projection@^1.6.2, vega-projection@~1.6.2: - version "1.6.2" - resolved "https://registry.npmjs.org/vega-projection/-/vega-projection-1.6.2.tgz" - integrity sha512-3pcVaQL9R3Zfk6PzopLX6awzrQUeYOXJzlfLGP2Xd93mqUepBa6m/reVrTUoSFXA3v9lfK4W/PS2AcVzD/MIcQ== +vega-projection@^2.1.0, vega-projection@~2.1.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/vega-projection/-/vega-projection-2.1.0.tgz#ce46291ef78a7418c75679103296d62f49afac14" + integrity sha512-EjRjVSoMR5ibrU7q8LaOQKP327NcOAM1+eZ+NO4ANvvAutwmbNVTmfA1VpPH+AD0AlBYc39ND/wnRk7SieDiXA== dependencies: - d3-geo "^3.1.0" + d3-geo "^3.1.1" d3-geo-projection "^4.0.0" - vega-scale "^7.4.2" + vega-scale "^8.1.0" -vega-regression@~1.3.1: - version "1.3.1" - resolved "https://registry.npmjs.org/vega-regression/-/vega-regression-1.3.1.tgz" - integrity sha512-AmccF++Z9uw4HNZC/gmkQGe6JsRxTG/R4QpbcSepyMvQN1Rj5KtVqMcmVFP1r3ivM4dYGFuPlzMWvuqp0iKMkQ== +vega-regression@~2.1.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/vega-regression/-/vega-regression-2.1.0.tgz#d3fd103e97a0aee55ae2a78ed81588fb5dcb9e03" + integrity sha512-HzC7MuoEwG1rIxRaNTqgcaYF03z/ZxYkQR2D5BN0N45kLnHY1HJXiEcZkcffTsqXdspLjn47yLi44UoCwF5fxQ== dependencies: - d3-array "^3.2.2" - vega-dataflow "^5.7.7" - vega-statistics "^1.9.0" - vega-util "^1.17.3" + d3-array "^3.2.4" + vega-dataflow "^6.1.0" + vega-statistics "^2.0.0" + vega-util "^2.1.0" -vega-runtime@^6.2.1, vega-runtime@~6.2.1: - version "6.2.1" - resolved "https://registry.npmjs.org/vega-runtime/-/vega-runtime-6.2.1.tgz" - integrity sha512-b4eot3tWKCk++INWqot+6sLn3wDTj/HE+tRSbiaf8aecuniPMlwJEK7wWuhVGeW2Ae5n8fI/8TeTViaC94bNHA== +vega-runtime@^7.1.0, vega-runtime@~7.1.0: + version "7.1.0" + resolved "https://registry.yarnpkg.com/vega-runtime/-/vega-runtime-7.1.0.tgz#1959d6168638f85bdce4d157117aca6ad1f69fac" + integrity sha512-mItI+WHimyEcZlZrQ/zYR3LwHVeyHCWwp7MKaBjkU8EwkSxEEGVceyGUY9X2YuJLiOgkLz/6juYDbMv60pfwYA== dependencies: - vega-dataflow "^5.7.7" - vega-util "^1.17.3" + vega-dataflow "^6.1.0" + vega-util "^2.1.0" -vega-scale@^7.4.2, vega-scale@^7.4.3, vega-scale@~7.4.2: - version "7.4.3" - resolved "https://registry.npmjs.org/vega-scale/-/vega-scale-7.4.3.tgz" - integrity sha512-f7SSN2YJowtrdkt7nJIR6YYhjDk8oB37q5So2/OxXQv5CBHipFPQSHS1ZVw9vD3V5wLnrZCxC4Ji27gmsTefgA== +vega-scale@^8.1.0, vega-scale@~8.1.0: + version "8.1.0" + resolved "https://registry.yarnpkg.com/vega-scale/-/vega-scale-8.1.0.tgz#a06b3aa8d60ae46ad8f3d89eae0e74eb3d1200e3" + integrity sha512-VEgDuEcOec8+C8+FzLcnAmcXrv2gAJKqQifCdQhkgnsLa978vYUgVfCut/mBSMMHbH8wlUV1D0fKZTjRukA1+A== dependencies: - d3-array "^3.2.2" + d3-array "^3.2.4" d3-interpolate "^3.0.1" d3-scale "^4.0.2" d3-scale-chromatic "^3.1.0" - vega-time "^2.1.4" - vega-util "^1.17.4" + vega-time "^3.1.0" + vega-util "^2.1.0" -vega-scenegraph@^4.13.1, vega-scenegraph@^4.13.2, vega-scenegraph@~4.13.1: - version "4.13.2" - resolved "https://registry.npmjs.org/vega-scenegraph/-/vega-scenegraph-4.13.2.tgz" - integrity sha512-eCutgcLzdUg23HLc6MTZ9pHCdH0hkqSmlbcoznspwT0ajjATk6M09JNyJddiaKR55HuQo03mBWsPeRCd5kOi0g== +vega-scenegraph@^5.1.0, vega-scenegraph@~5.1.0: + version "5.1.0" + resolved "https://registry.yarnpkg.com/vega-scenegraph/-/vega-scenegraph-5.1.0.tgz#3b3c0d871799fe84bc563256d7b9d54bc2e13368" + integrity sha512-4gA89CFIxkZX+4Nvl8SZF2MBOqnlj9J5zgdPh/HPx+JOwtzSlUqIhxFpFj7GWYfwzr/PyZnguBLPihPw1Og/cA== dependencies: d3-path "^3.1.0" d3-shape "^3.2.0" - vega-canvas "^1.2.7" - vega-loader "^4.5.4" - vega-scale "^7.4.3" - vega-util "^1.17.4" + vega-canvas "^2.0.0" + vega-loader "^5.1.0" + vega-scale "^8.1.0" + vega-util "^2.1.0" vega-schema-url-parser@^1.1.0: version "1.1.0" @@ -5076,35 +5063,35 @@ vega-schema-url-parser@^2.2.0: resolved "https://registry.npmjs.org/vega-schema-url-parser/-/vega-schema-url-parser-2.2.0.tgz" integrity sha512-yAtdBnfYOhECv9YC70H2gEiqfIbVkq09aaE4y/9V/ovEFmH9gPKaEgzIZqgT7PSPQjKhsNkb6jk6XvSoboxOBw== -vega-selections@^5.6.1: - version "5.6.1" - resolved "https://registry.npmjs.org/vega-selections/-/vega-selections-5.6.1.tgz" - integrity sha512-b7EwMkQcih4Sc+6i5eLUnIwtTisdyjIGUJ/U9Rog9scg5jPMe09BeqdMtiROGEnS/f/BI2vS68mz08OEVy308w== +vega-selections@^6.1.0: + version "6.1.0" + resolved "https://registry.yarnpkg.com/vega-selections/-/vega-selections-6.1.0.tgz#a3bf68cced5c9080b97ffaa44a1a4b07af67229e" + integrity sha512-WaHM7D7ghHceEfMsgFeaZnDToWL0mgCFtStVOobNh/OJLh0CL7yNKeKQBqRXJv2Lx74dPNf6nj08+52ytWfW7g== dependencies: d3-array "3.2.4" - vega-expression "^5.2.1" - vega-util "^1.17.4" + vega-expression "^6.1.0" + vega-util "^2.1.0" -vega-statistics@^1.9.0, vega-statistics@~1.9.0: - version "1.9.0" - resolved "https://registry.npmjs.org/vega-statistics/-/vega-statistics-1.9.0.tgz" - integrity sha512-GAqS7mkatpXcMCQKWtFu1eMUKLUymjInU0O8kXshWaQrVWjPIO2lllZ1VNhdgE0qGj4oOIRRS11kzuijLshGXQ== +vega-statistics@^2.0.0, vega-statistics@~2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/vega-statistics/-/vega-statistics-2.0.0.tgz#9c9636c20682ae98e8887f8fab0e82c2466a736a" + integrity sha512-dGPfDXnBlgXbZF3oxtkb8JfeRXd5TYHx25Z/tIoaa9jWua4Vf/AoW2wwh8J1qmMy8J03/29aowkp1yk4DOPazQ== dependencies: - d3-array "^3.2.2" + d3-array "^3.2.4" vega-themes@^2.15.0, vega-themes@^2.8.2: version "2.15.0" resolved "https://registry.npmjs.org/vega-themes/-/vega-themes-2.15.0.tgz" integrity sha512-DicRAKG9z+23A+rH/3w3QjJvKnlGhSbbUXGjBvYGseZ1lvj9KQ0BXZ2NS/+MKns59LNpFNHGi9us/wMlci4TOA== -vega-time@^2.1.3, vega-time@^2.1.4, vega-time@~2.1.3: - version "2.1.4" - resolved "https://registry.npmjs.org/vega-time/-/vega-time-2.1.4.tgz" - integrity sha512-DBMRps5myYnSAlvQ+oiX8CycJZjGQNqyGE04xaZrpOgHll7vlvezpET2FnGZC7wS3DsqMcPjnpnI1h7+qJox1Q== +vega-time@^3.1.0, vega-time@~3.1.0: + version "3.1.0" + resolved "https://registry.yarnpkg.com/vega-time/-/vega-time-3.1.0.tgz#4e20c5d60e3f7e827a33db29bd4855f40a0ae3cb" + integrity sha512-G93mWzPwNa6UYQRkr8Ujur9uqxbBDjDT/WpXjbDY0yygdSkRT+zXF+Sb4gjhW0nPaqdiwkn0R6kZcSPMj1bMNA== dependencies: - d3-array "^3.2.2" + d3-array "^3.2.4" d3-time "^3.1.0" - vega-util "^1.17.4" + vega-util "^2.1.0" vega-tooltip@^0.22.0: version "0.22.1" @@ -5122,107 +5109,112 @@ vega-tooltip@^0.35.2: optionalDependencies: "@rollup/rollup-linux-x64-gnu" "^4.24.4" -vega-transforms@~4.12.1: - version "4.12.1" - resolved "https://registry.npmjs.org/vega-transforms/-/vega-transforms-4.12.1.tgz" - integrity sha512-Qxo+xeEEftY1jYyKgzOGc9NuW4/MqGm1YPZ5WrL9eXg2G0410Ne+xL/MFIjHF4hRX+3mgFF4Io2hPpfy/thjLg== +vega-transforms@~5.1.0: + version "5.1.0" + resolved "https://registry.yarnpkg.com/vega-transforms/-/vega-transforms-5.1.0.tgz#4e95cd7c4773aa560928d10385a0d33ea2748caa" + integrity sha512-mj/sO2tSuzzpiXX8JSl4DDlhEmVwM/46MTAzTNQUQzJPMI/n4ChCjr/SdEbfEyzlD4DPm1bjohZGjLc010yuMg== dependencies: - d3-array "^3.2.2" - vega-dataflow "^5.7.7" - vega-statistics "^1.9.0" - vega-time "^2.1.3" - vega-util "^1.17.3" + d3-array "^3.2.4" + vega-dataflow "^6.1.0" + vega-statistics "^2.0.0" + vega-time "^3.1.0" + vega-util "^2.1.0" -vega-typings@~1.5.0: - version "1.5.0" - resolved "https://registry.npmjs.org/vega-typings/-/vega-typings-1.5.0.tgz" - integrity sha512-tcZ2HwmiQEOXIGyBMP8sdCnoFoVqHn4KQ4H0MQiHwzFU1hb1EXURhfc+Uamthewk4h/9BICtAM3AFQMjBGpjQA== +vega-typings@~2.1.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/vega-typings/-/vega-typings-2.1.0.tgz#1c1fe548c0f00997820246ade0d3d813b87bfd76" + integrity sha512-zdis4Fg4gv37yEvTTSZEVMNhp8hwyEl7GZ4X4HHddRVRKxWFsbyKvZx/YW5Z9Ox4sjxVA2qHzEbod4Fdx+SEJA== dependencies: - "@types/geojson" "7946.0.4" - vega-event-selector "^3.0.1" - vega-expression "^5.2.0" - vega-util "^1.17.3" + "@types/geojson" "7946.0.16" + vega-event-selector "^4.0.0" + vega-expression "^6.1.0" + vega-util "^2.1.0" vega-util@^1.13.1, vega-util@^1.17.2, vega-util@^1.17.3, vega-util@^1.17.4, vega-util@~1.17.2: version "1.17.4" resolved "https://registry.npmjs.org/vega-util/-/vega-util-1.17.4.tgz" integrity sha512-+y3ZW7dEqM8Ck+KRsd+jkMfxfE7MrQxUyIpNjkfhIpGEreym+aTn7XUw1DKXqclr8mqTQvbilPo16B3lnBr0wA== -vega-view-transforms@~4.6.1: - version "4.6.1" - resolved "https://registry.npmjs.org/vega-view-transforms/-/vega-view-transforms-4.6.1.tgz" - integrity sha512-RYlyMJu5kZV4XXjmyTQKADJWDB25SMHsiF+B1rbE1p+pmdQPlp5tGdPl9r5dUJOp3p8mSt/NGI8GPGucmPMxtw== - dependencies: - vega-dataflow "^5.7.7" - vega-scenegraph "^4.13.1" - vega-util "^1.17.3" - -vega-view@~5.16.0: - version "5.16.0" - resolved "https://registry.npmjs.org/vega-view/-/vega-view-5.16.0.tgz" - integrity sha512-Nxp1MEAY+8bphIm+7BeGFzWPoJnX9+hgvze6wqCAPoM69YiyVR0o0VK8M2EESIL+22+Owr0Fdy94hWHnmon5tQ== - dependencies: - d3-array "^3.2.2" - d3-timer "^3.0.1" - vega-dataflow "^5.7.7" - vega-format "^1.1.3" - vega-functions "^5.18.0" - vega-runtime "^6.2.1" - vega-scenegraph "^4.13.1" - vega-util "^1.17.3" - -vega-voronoi@~4.2.4: - version "4.2.4" - resolved "https://registry.npmjs.org/vega-voronoi/-/vega-voronoi-4.2.4.tgz" - integrity sha512-lWNimgJAXGeRFu2Pz8axOUqVf1moYhD+5yhBzDSmckE9I5jLOyZc/XvgFTXwFnsVkMd1QW1vxJa+y9yfUblzYw== - dependencies: - d3-delaunay "^6.0.2" - vega-dataflow "^5.7.7" - vega-util "^1.17.3" +vega-util@^2.1.0, vega-util@~2.1.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/vega-util/-/vega-util-2.1.0.tgz#54f42d6a80e5904ea9ac6c0327e6ac57601ce85f" + integrity sha512-PGfp0m0QCufDmcxKJCWQy4Ov23FoF8DSXmoJwSezi3itQaa2hbxK0+xwsTMP2vy4PR16Pu25HMzgMwXVW1+33w== -vega-wordcloud@~4.1.6: - version "4.1.6" - resolved "https://registry.npmjs.org/vega-wordcloud/-/vega-wordcloud-4.1.6.tgz" - integrity sha512-lFmF3u9/ozU0P+WqPjeThQfZm0PigdbXDwpIUCxczrCXKYJLYFmZuZLZR7cxtmpZ0/yuvRvAJ4g123LXbSZF8A== +vega-view-transforms@~5.1.0: + version "5.1.0" + resolved "https://registry.yarnpkg.com/vega-view-transforms/-/vega-view-transforms-5.1.0.tgz#1f31f75efcf99b38969e750043adb922fcec6f3e" + integrity sha512-fpigh/xn/32t+An1ShoY3MLeGzNdlbAp2+HvFKzPpmpMTZqJEWkk/J/wHU7Swyc28Ta7W1z3fO+8dZkOYO5TWQ== dependencies: - vega-canvas "^1.2.7" - vega-dataflow "^5.7.7" - vega-scale "^7.4.2" - vega-statistics "^1.9.0" - vega-util "^1.17.3" + vega-dataflow "^6.1.0" + vega-scenegraph "^5.1.0" + vega-util "^2.1.0" -vega@^5.32.0: - version "5.33.0" - resolved "https://registry.npmjs.org/vega/-/vega-5.33.0.tgz" - integrity sha512-jNAGa7TxLojOpMMMrKMXXBos4K6AaLJbCgGDOw1YEkLRjUkh12pcf65J2lMSdEHjcEK47XXjKiOUVZ8L+MniBA== +vega-view@~6.1.0: + version "6.1.0" + resolved "https://registry.yarnpkg.com/vega-view/-/vega-view-6.1.0.tgz#5596f78c5ebca8dcb57feca40fd31cb8265fd04e" + integrity sha512-hmHDm/zC65lb23mb9Tr9Gx0wkxP0TMS31LpMPYxIZpvInxvUn7TYitkOtz1elr63k2YZrgmF7ztdGyQ4iCQ5fQ== dependencies: - vega-crossfilter "~4.1.3" - vega-dataflow "~5.7.7" - vega-encode "~4.10.2" - vega-event-selector "~3.0.1" - vega-expression "~5.2.0" - vega-force "~4.2.2" - vega-format "~1.1.3" - vega-functions "~5.18.0" - vega-geo "~4.4.3" - vega-hierarchy "~4.1.3" - vega-label "~1.3.1" - vega-loader "~4.5.3" - vega-parser "~6.6.0" - vega-projection "~1.6.2" - vega-regression "~1.3.1" - vega-runtime "~6.2.1" - vega-scale "~7.4.2" - vega-scenegraph "~4.13.1" - vega-statistics "~1.9.0" - vega-time "~2.1.3" - vega-transforms "~4.12.1" - vega-typings "~1.5.0" - vega-util "~1.17.2" - vega-view "~5.16.0" - vega-view-transforms "~4.6.1" - vega-voronoi "~4.2.4" - vega-wordcloud "~4.1.6" + d3-array "^3.2.4" + d3-timer "^3.0.1" + vega-dataflow "^6.1.0" + vega-format "^2.1.0" + vega-functions "^6.1.0" + vega-runtime "^7.1.0" + vega-scenegraph "^5.1.0" + vega-util "^2.1.0" + +vega-voronoi@~5.1.0: + version "5.1.0" + resolved "https://registry.yarnpkg.com/vega-voronoi/-/vega-voronoi-5.1.0.tgz#92956b9d78f06e3918970fc84d06974e24b9f52f" + integrity sha512-uKdsoR9x60mz7eYtVG+NhlkdQXeVdMr6jHNAHxs+W+i6kawkUp5S9jp1xf1FmW/uZvtO1eqinHQNwATcDRsiUg== + dependencies: + d3-delaunay "^6.0.4" + vega-dataflow "^6.1.0" + vega-util "^2.1.0" + +vega-wordcloud@~5.1.0: + version "5.1.0" + resolved "https://registry.yarnpkg.com/vega-wordcloud/-/vega-wordcloud-5.1.0.tgz#7aa8dcbf6c83b193fe71fb6410be15ad2c7285e6" + integrity sha512-sSdNmT8y2D7xXhM2h76dKyaYn3PA4eV49WUUkfYfqHz/vpcu10GSAoFxLhQQTkbZXR+q5ZB63tFUow9W2IFo6g== + dependencies: + vega-canvas "^2.0.0" + vega-dataflow "^6.1.0" + vega-scale "^8.1.0" + vega-statistics "^2.0.0" + vega-util "^2.1.0" + +vega@^6.2.0: + version "6.2.0" + resolved "https://registry.yarnpkg.com/vega/-/vega-6.2.0.tgz#34c2de83b00e701e040029738b26f1ec992f327f" + integrity sha512-BIwalIcEGysJdQDjeVUmMWB3e50jPDNAMfLJscjEvpunU9bSt7X1OYnQxkg3uBwuRRI4nWfFZO9uIW910nLeGw== + dependencies: + vega-crossfilter "~5.1.0" + vega-dataflow "~6.1.0" + vega-encode "~5.1.0" + vega-event-selector "~4.0.0" + vega-expression "~6.1.0" + vega-force "~5.1.0" + vega-format "~2.1.0" + vega-functions "~6.1.0" + vega-geo "~5.1.0" + vega-hierarchy "~5.1.0" + vega-label "~2.1.0" + vega-loader "~5.1.0" + vega-parser "~7.1.0" + vega-projection "~2.1.0" + vega-regression "~2.1.0" + vega-runtime "~7.1.0" + vega-scale "~8.1.0" + vega-scenegraph "~5.1.0" + vega-statistics "~2.0.0" + vega-time "~3.1.0" + vega-transforms "~5.1.0" + vega-typings "~2.1.0" + vega-util "~2.1.0" + vega-view "~6.1.0" + vega-view-transforms "~5.1.0" + vega-voronoi "~5.1.0" + vega-wordcloud "~5.1.0" vite@^5.4.21: version "5.4.21" @@ -5275,19 +5267,6 @@ vscode-uri@~3.0.8: resolved "https://registry.npmjs.org/vscode-uri/-/vscode-uri-3.0.8.tgz" integrity sha512-AyFQ0EVmsOZOlAnxoFOGOq1SQDWAB7C6aqMGS23svWAllfOaxbuFvcT8D1i8z3Gyn8fraVeZNNmN6e9bxxXkKw== -webidl-conversions@^3.0.0: - version "3.0.1" - resolved "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz" - integrity sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ== - -whatwg-url@^5.0.0: - version "5.0.0" - resolved "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz" - integrity sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw== - dependencies: - tr46 "~0.0.3" - webidl-conversions "^3.0.0" - which-boxed-primitive@^1.1.0, which-boxed-primitive@^1.1.1: version "1.1.1" resolved "https://registry.npmjs.org/which-boxed-primitive/-/which-boxed-primitive-1.1.1.tgz" From dd900adeb6b8b4ed6183b0c7fc20f95d464fe750 Mon Sep 17 00:00:00 2001 From: Hurairah Mateen Date: Thu, 20 Nov 2025 16:06:46 +0500 Subject: [PATCH 02/37] Add BigQuery DataLoader --- .../data_formulator/data_loader/__init__.py | 4 +- .../data_loader/bigquery_data_loader.py | 318 ++++++++++++++++++ 2 files changed, 321 insertions(+), 1 deletion(-) create mode 100644 py-src/data_formulator/data_loader/bigquery_data_loader.py diff --git a/py-src/data_formulator/data_loader/__init__.py b/py-src/data_formulator/data_loader/__init__.py index 1c964f8..dadfaa1 100644 --- a/py-src/data_formulator/data_loader/__init__.py +++ b/py-src/data_formulator/data_loader/__init__.py @@ -5,6 +5,7 @@ from data_formulator.data_loader.s3_data_loader import S3DataLoader from data_formulator.data_loader.azure_blob_data_loader import AzureBlobDataLoader from data_formulator.data_loader.postgresql_data_loader import PostgreSQLDataLoader +from data_formulator.data_loader.bigquery_data_loader import BigQueryDataLoader DATA_LOADERS = { "mysql": MySQLDataLoader, @@ -12,7 +13,8 @@ "kusto": KustoDataLoader, "s3": S3DataLoader, "azure_blob": AzureBlobDataLoader, - "postgresql": PostgreSQLDataLoader + "postgresql": PostgreSQLDataLoader, + "bigquery": BigQueryDataLoader } __all__ = ["ExternalDataLoader", "MySQLDataLoader", "MSSQLDataLoader", "KustoDataLoader", "S3DataLoader", "AzureBlobDataLoader","PostgreSQLDataLoader","DATA_LOADERS"] diff --git a/py-src/data_formulator/data_loader/bigquery_data_loader.py b/py-src/data_formulator/data_loader/bigquery_data_loader.py new file mode 100644 index 0000000..ea60874 --- /dev/null +++ b/py-src/data_formulator/data_loader/bigquery_data_loader.py @@ -0,0 +1,318 @@ +import json +import logging +from typing import Dict, Any, List +import pandas as pd +import duckdb + +from data_formulator.data_loader.external_data_loader import ExternalDataLoader, sanitize_table_name + +try: + from google.cloud import bigquery + from google.oauth2 import service_account + BIGQUERY_AVAILABLE = True +except ImportError: + BIGQUERY_AVAILABLE = False + +log = logging.getLogger(__name__) + +class BigQueryDataLoader(ExternalDataLoader): + """BigQuery data loader implementation""" + + @staticmethod + def list_params() -> List[Dict[str, Any]]: + return [ + {"name": "project_id", "type": "text", "required": True, "description": "Google Cloud Project ID", "default": ""}, + {"name": "dataset_id", "type": "text", "required": False, "description": "Dataset ID(s) - leave empty for all, or specify one (e.g., 'billing') or multiple separated by commas (e.g., 'billing,enterprise_collected,ga_api')", "default": ""}, + {"name": "credentials_path", "type": "text", "required": False, "description": "Path to service account JSON file (optional)", "default": ""}, + {"name": "location", "type": "text", "required": False, "description": "BigQuery location (default: US)", "default": "US"} + ] + + @staticmethod + def auth_instructions() -> str: + return """BigQuery Authentication Instructions + +Authentication Options (choose one): + +Option 1 - Application Default Credentials (Recommended) + - Install Google Cloud SDK: https://cloud.google.com/sdk/docs/install + - Run `gcloud auth application-default login` in your terminal + - Leave `credentials_path` parameter empty + - Requires Google Cloud Project ID + +Option 2 - Service Account Key File + - Create a service account in Google Cloud Console + - Download the JSON key file + - Provide the full path to the JSON file in `credentials_path` parameter + - Grant the service account BigQuery Data Viewer role (or appropriate permissions) + +Option 3 - Environment Variables + - Set GOOGLE_APPLICATION_CREDENTIALS environment variable to point to your service account JSON file + - Leave `credentials_path` parameter empty + +Required Permissions: + - BigQuery Data Viewer (for reading data) + - BigQuery Job User (for running queries) + +Parameters: + - project_id: Your Google Cloud Project ID (required) + - dataset_id: Specific dataset to browse (optional - leave empty to see all datasets) + - location: BigQuery location/region (default: US) + - credentials_path: Path to service account JSON file (optional) + +Supported Operations: + - Browse datasets and tables + - Preview table schemas and data + - Import data from tables + - Execute custom SQL queries +""" + + def __init__(self, params: Dict[str, Any], duck_db_conn: duckdb.DuckDBPyConnection): + if not BIGQUERY_AVAILABLE: + raise ImportError("google-cloud-bigquery is required for BigQuery connections. Install with: pip install google-cloud-bigquery") + + self.params = params + self.duck_db_conn = duck_db_conn + self.project_id = params.get("project_id") + self.dataset_ids = [d.strip() for d in params.get("dataset_id", "").split(",") if d.strip()] # Support multiple datasets + self.location = params.get("location", "US") + + # Initialize BigQuery client + if params.get("credentials_path"): + credentials = service_account.Credentials.from_service_account_file(params["credentials_path"]) + self.client = bigquery.Client( + project=self.project_id, + credentials=credentials, + location=self.location + ) + else: + # Use default credentials (ADC) + self.client = bigquery.Client( + project=self.project_id, + location=self.location + ) + + def list_tables(self, table_filter: str = None) -> List[Dict[str, Any]]: + """List tables from BigQuery datasets""" + results = [] + + try: + log.info(f"Listing BigQuery datasets for project: {self.project_id}") + + # List datasets with timeout + datasets = list(self.client.list_datasets(max_results=50)) + log.info(f"Found {len(datasets)} datasets") + + # Limit to first 10 datasets if no specific dataset is specified + if not self.dataset_ids: + datasets = datasets[:10] + + for dataset in datasets: + dataset_id = dataset.dataset_id + + # Skip if we have specific datasets and this isn't one of them + if self.dataset_ids and dataset_id not in self.dataset_ids: + continue + + try: + log.info(f"Processing dataset: {dataset_id}") + # List tables in dataset with limit + tables = list(self.client.list_tables(dataset.reference, max_results=20)) + + for table in tables: + full_table_name = f"{self.project_id}.{dataset_id}.{table.table_id}" + + # Apply filter if provided + if table_filter and table_filter.lower() not in table.table_id.lower(): + continue + + # Get basic table info without full schema for performance + try: + table_ref = self.client.get_table(table.reference) + columns = [{"name": field.name, "type": field.field_type} for field in table_ref.schema[:10]] # Limit columns shown + + results.append({ + "name": full_table_name, + "metadata": { + "row_count": table_ref.num_rows or 0, + "columns": columns, + "sample_rows": [] # Empty for performance, can be populated later + } + }) + except Exception as e: + log.warning(f"Error getting schema for table {full_table_name}: {e}") + # Add table without detailed schema + results.append({ + "name": full_table_name, + "metadata": { + "row_count": 0, + "columns": [], + "sample_rows": [] + } + }) + + # Limit total results for performance + if len(results) >= 100: + log.info("Reached 100 table limit, stopping enumeration") + return results + + except Exception as e: + log.warning(f"Error accessing dataset {dataset_id}: {e}") + continue + + except Exception as e: + log.error(f"Error listing BigQuery tables: {e}") + + log.info(f"Returning {len(results)} tables") + return results + + def _convert_bigquery_dtypes(self, df: pd.DataFrame) -> pd.DataFrame: + """Convert BigQuery-specific dtypes to standard pandas dtypes""" + import json + + def safe_convert(x): + try: + if x is None or pd.isna(x): + return None + if isinstance(x, (dict, list)): + return json.dumps(x, default=str) + if hasattr(x, "__dict__"): + return json.dumps(x.__dict__, default=str) + s = str(x) + if "[object Object]" in s: + return json.dumps(x, default=str) + return s + except Exception: + return str(x) if x is not None else None + + for col in df.columns: + # Convert db_dtypes.DateDtype to standard datetime + if hasattr(df[col].dtype, "name") and "dbdate" in str(df[col].dtype).lower(): + df[col] = pd.to_datetime(df[col]) + # Convert other db_dtypes if needed + elif str(df[col].dtype).startswith("db_dtypes"): + try: + df[col] = df[col].astype(str) + except Exception: + pass + # Handle nested objects/JSON columns + elif df[col].dtype == "object": + df[col] = df[col].apply(safe_convert) + + return df + + def ingest_data(self, table_name: str, name_as: str | None = None, size: int = 1000000): + """Ingest data from BigQuery table into DuckDB with stable, de-duplicated column aliases.""" + if name_as is None: + name_as = table_name.split('.')[-1] + + name_as = sanitize_table_name(name_as) + + + table_ref = self.client.get_table(table_name) + + select_parts: list[str] = [] + used_aliases: dict[str, str] = {} # alias -> field_path + + def build_alias(field_path: str) -> str: + """ + Build a human-readable, globally unique alias from a BigQuery field path. + + Examples: + 'geo.country' -> 'geo_country' + 'device.category' -> 'device_category' + 'event_params.value' -> 'event_params_value' + """ + import re as _re + + # path "a.b.c" -> "a_b_c" + alias = field_path.replace('.', '_') + + # remove weird characters + alias = _re.sub(r'[^0-9a-zA-Z_]', '_', alias) + alias = _re.sub(r'_+', '_', alias).strip('_') or "col" + + # must start with letter or underscore + if not alias[0].isalpha() and alias[0] != '_': + alias = f"_{alias}" + + base_alias = alias + counter = 1 + while alias in used_aliases: + # same alias from another path – suffix and log once + alias = f"{base_alias}_{counter}" + counter += 1 + + used_aliases[alias] = field_path + return alias + + def add_field(field_path: str): + alias = build_alias(field_path) + select_parts.append(f"`{table_name}`.{field_path} AS `{alias}`") + + def process_field(field, parent_path: str = ""): + """ + Recursively process fields, flattening non-repeated RECORDs. + """ + current_path = f"{parent_path}.{field.name}" if parent_path else field.name + + # Flatten STRUCT / RECORD that is not REPEATED + if field.field_type == "RECORD" and field.mode != "REPEATED": + for subfield in field.fields: + process_field(subfield, current_path) + else: + # Regular field or REPEATED RECORD/array – select as a single column + add_field(current_path) + + # Process all top-level fields + for field in table_ref.schema: + process_field(field) + + if not select_parts: + raise ValueError(f"No fields found for table {table_name}") + + query = f"SELECT {', '.join(select_parts)} FROM `{table_name}` LIMIT {size}" + + df = self.client.query(query).to_dataframe() + + # Safety net: drop exact duplicate names if something slipped through + if df.columns.duplicated().any(): + dupes = df.columns[df.columns.duplicated()].tolist() + log.warning(f"Duplicate column names detected in DataFrame, dropping later ones: {dupes}") + df = df.loc[:, ~df.columns.duplicated()] + + + # Convert BigQuery-specific dtypes + df = self._convert_bigquery_dtypes(df) + + self.ingest_df_to_duckdb(df, name_as) + + def view_query_sample(self, query: str) -> List[Dict[str, Any]]: + """Execute query and return sample results""" + try: + # Add LIMIT if not present + if "LIMIT" not in query.upper(): + query += " LIMIT 10" + + df = self.client.query(query).to_dataframe() + return df.to_dict(orient="records") + except Exception as e: + log.error(f"Error executing query sample: {e}") + return [] + + def ingest_data_from_query(self, query: str, name_as: str) -> pd.DataFrame: + """Execute custom query and ingest results into DuckDB""" + name_as = sanitize_table_name(name_as) + + # Execute query and get DataFrame + df = self.client.query(query).to_dataframe() + + # Drop duplicate columns + df = df.loc[:, ~df.columns.duplicated()] + + # Convert BigQuery-specific dtypes + df = self._convert_bigquery_dtypes(df) + + # Use base class method to ingest DataFrame + self.ingest_df_to_duckdb(df, name_as) + + return df From 2562c5abe2a01576bea9e8e07e117d99394b384e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 2 Dec 2025 21:20:47 +0000 Subject: [PATCH 03/37] Bump validator from 13.15.20 to 13.15.22 Bumps [validator](https://github.com/validatorjs/validator.js) from 13.15.20 to 13.15.22. - [Release notes](https://github.com/validatorjs/validator.js/releases) - [Changelog](https://github.com/validatorjs/validator.js/blob/master/CHANGELOG.md) - [Commits](https://github.com/validatorjs/validator.js/compare/13.15.20...13.15.22) --- updated-dependencies: - dependency-name: validator dependency-version: 13.15.22 dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- package.json | 2 +- yarn.lock | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/package.json b/package.json index e8d88ab..f18ee1b 100644 --- a/package.json +++ b/package.json @@ -43,7 +43,7 @@ "redux": "^4.2.0", "redux-persist": "^6.0.0", "typescript": "^4.9.5", - "validator": "^13.15.20", + "validator": "^13.15.22", "vega": "^5.32.0", "vega-embed": "^6.21.0", "vega-lite": "^5.5.0", diff --git a/yarn.lock b/yarn.lock index 58d7257..9075550 100644 --- a/yarn.lock +++ b/yarn.lock @@ -4818,10 +4818,10 @@ uuid@^8.3.0: resolved "https://registry.npmjs.org/uuid/-/uuid-8.3.2.tgz" integrity sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg== -validator@^13.15.20: - version "13.15.20" - resolved "https://registry.npmjs.org/validator/-/validator-13.15.20.tgz" - integrity sha512-KxPOq3V2LmfQPP4eqf3Mq/zrT0Dqp2Vmx2Bn285LwVahLc+CsxOM0crBHczm8ijlcjZ0Q5Xd6LW3z3odTPnlrw== +validator@^13.15.22: + version "13.15.22" + resolved "https://registry.yarnpkg.com/validator/-/validator-13.15.22.tgz#5f847cf4a799107e5716fc87e5cf2a337a71eb14" + integrity sha512-uT/YQjiyLJP7HSrv/dPZqK9L28xf8hsNca01HSz1dfmI0DgMfjopp1rO/z13NeGF1tVystF0Ejx3y4rUKPw+bQ== vega-canvas@^1.2.7: version "1.2.7" From 8ff7b6ebb9cfbbf42098233ce8146f7c5dd797fa Mon Sep 17 00:00:00 2001 From: Chenglong Wang Date: Tue, 2 Dec 2025 16:22:45 -0800 Subject: [PATCH 04/37] simplify ui --- src/app/App.tsx | 55 +-- src/components/ComponentType.tsx | 10 +- src/views/About.tsx | 76 +---- src/views/ConceptCard.tsx | 552 ------------------------------- src/views/ConceptShelf.tsx | 7 +- src/views/DataFormulator.tsx | 128 +++---- src/views/DataLoadingChat.tsx | 2 +- src/views/DataThread.tsx | 4 +- src/views/DataView.tsx | 76 +++-- src/views/ExampleSessions.tsx | 11 +- src/views/SelectableDataGrid.tsx | 43 +-- src/views/VisualizationView.tsx | 69 +--- 12 files changed, 127 insertions(+), 906 deletions(-) diff --git a/src/app/App.tsx b/src/app/App.tsx index de2864e..14e3432 100644 --- a/src/app/App.tsx +++ b/src/app/App.tsx @@ -253,13 +253,13 @@ const TableMenu: React.FC = () => { {}}> - Database + connect to database } /> {}}> - clean data (image/messy text) + extract data (image/messy text) }/> { @@ -723,9 +723,7 @@ export const AppFC: FC = function AppFC(appProps) { {!isAboutPage && ( - + {focusedTableId !== undefined && = function AppFC(appProps) { mr: 2, height: '28px', my: 'auto', - borderRadius: 2, - border: '1px solid rgba(0, 0, 0, 0.1)', - boxShadow: '0 1px 3px rgba(0, 0, 0, 0.1)', '& .MuiToggleButton-root': { textTransform: 'none', - fontSize: '14px', fontWeight: 500, border: 'none', - borderRadius: 1, - px: 1, - py: 0.5, '&:hover': { backgroundColor: 'rgba(0, 0, 0, 0.04)', color: 'text.primary', }, - '&.Mui-selected': { - backgroundColor: alpha(theme.palette.primary.main, 0.1), - color: theme.palette.primary.main, - }, - '&:first-of-type': { - borderTopRightRadius: 0, - borderBottomRightRadius: 0, - }, - '&:last-of-type': { - borderTopLeftRadius: 0, - borderBottomLeftRadius: 0, - } - }, - '.mode-icon': { - animation: 'pulse 3s ease-out infinite', - '@keyframes pulse': { - '0%, 80%': { transform: 'scale(1)' }, - '90%': { transform: 'scale(1.3)' }, - '100%': { transform: 'scale(1)' }, - }, }, }} > - 🔍 Explore - ✏️ {generatedReports.length > 0 ? `Reports (${generatedReports.length})` : 'Reports'} @@ -915,24 +884,6 @@ export const AppFC: FC = function AppFC(appProps) { } ]); - let footer = - - - - - - @ {new Date().getFullYear()} - - let app = { - let newConcept = { + return { id: field.id, name: field.name, source: field.source, - transform: field.transform, tableRef: field.tableRef, - temporary: field.temporary, } as FieldItem; - return newConcept; } export interface Trigger { diff --git a/src/views/About.tsx b/src/views/About.tsx index 417bb72..d5000a9 100644 --- a/src/views/About.tsx +++ b/src/views/About.tsx @@ -191,32 +191,15 @@ export const About: FC<{}> = function About({ }) { }}> {/* Header with logo and title */} - - - - {toolName} v0.5 - + + {toolName} - - Turn (almost) any data into insights with AI agents, with the exploration paths you choose. + + Explore data with visualizations, powered by AI agents. + {actionButtons} {/* Interactive Features Carousel */} @@ -224,11 +207,6 @@ export const About: FC<{}> = function About({ }) { mx: 'auto', maxWidth: 1200, borderRadius: 3, - background: ` - linear-gradient(90deg, ${alpha(theme.palette.text.secondary, 0.02)} 1px, transparent 1px), - linear-gradient(0deg, ${alpha(theme.palette.text.secondary, 0.02)} 1px, transparent 1px) - `, - backgroundSize: '16px 16px', position: 'relative', }}> = function About({ }) { }}> {features[currentFeature].title} {features[currentFeature].description} @@ -302,7 +271,6 @@ export const About: FC<{}> = function About({ }) { ? theme.palette.primary.main : alpha(theme.palette.text.secondary, 0.2), cursor: 'pointer', - transition: 'all 0.3s ease', '&:hover': { bgcolor: index === currentFeature ? theme.palette.primary.main @@ -405,18 +373,7 @@ export const About: FC<{}> = function About({ }) { display: 'flex', justifyContent: 'center', textDecoration: 'none', - transition: 'box-shadow 0.3s ease', - animation: 'fadeSlideIn 0.5s ease-out', - '@keyframes fadeSlideIn': { - '0%': { - opacity: 0, - transform: 'translateX(30px)', - }, - '100%': { - opacity: 1, - transform: 'translateX(0)', - } - }, + animation: 'fadeSlideIn 0.1s ease-out', '&:hover': { boxShadow: '0 8px 24px rgba(0,0,0,0.2)', '& .description-overlay': { @@ -492,20 +449,13 @@ export const About: FC<{}> = function About({ }) { - + How does Data Formulator handle your data?
  • 📦 Data Storage: Uploaded data (csv, xlsx, json, clipboard, messy data etc.) is stored in browser's local storage only
  • @@ -514,7 +464,7 @@ export const About: FC<{}> = function About({ }) {
  • 🤖 LLM Endpoints: Small data samples are sent to LLM endpoints along with the prompt. Use your trusted model provider if working with private data.
- + Research Prototype from Microsoft Research
diff --git a/src/views/ConceptCard.tsx b/src/views/ConceptCard.tsx index c94c54b..8878aa6 100644 --- a/src/views/ConceptCard.tsx +++ b/src/views/ConceptCard.tsx @@ -70,124 +70,8 @@ export interface ConceptCardProps { sx?: SxProps } -const checkConceptIsEmpty = (field: FieldItem) => { - return field.name == "" && - ((field.source == "derived" && !field.transform?.description && (field.transform as ConceptTransformation).code == "") - || (field.source == "custom")) -} - -export const genFreshDerivedConcept = (parentIDs: string[], tableRef: string) => { - return { - id: `concept-${Date.now()}`, name: "", type: "string" as Type, - source: "derived", tableRef: tableRef, - transform: { parentIDs: parentIDs, code: "", description: ""} - } as FieldItem -} - -let ConceptReApplyButton: FC<{field: FieldItem, - focusedTable: DictTable, handleLoading: (loading: boolean) => void}> = function ConceptReApplyButton({ field, focusedTable, handleLoading }) { - - let dispatch = useDispatch(); - - let [codePreview, setCodePreview] = useState(field.transform?.code || ""); - let [tableRowsPreview, setTableRowsPreview] = useState([]); - let [applicationDialogOpen, setApplicationDialogOpen] = useState(false); - - let conceptShelfItems = useSelector((state: DataFormulatorState) => state.conceptShelfItems); - let activeModel = useSelector(dfSelectors.getActiveModel); - - let inputFields = field.transform?.parentIDs.map(pid => { - let parentConcept = conceptShelfItems.find(f => f.id == pid) as FieldItem; - return { - name: parentConcept.name, - } - }) - - let handleGeneratePreview = () => { - handleLoading(true); - - let requestTimeStamp = Date.now(); - - let message = { - method: 'POST', - headers: { 'Content-Type': 'application/json', }, - body: JSON.stringify({ - token: requestTimeStamp, - description: field.transform?.description || "", - input_fields: inputFields, - input_data: {name: focusedTable.id, rows: focusedTable.rows}, - output_name: field.name, - model: activeModel - }), - }; - - // timeout the request after 20 seconds - const controller = new AbortController() - const timeoutId = setTimeout(() => controller.abort(), 20000) - - fetch(getUrls().DERIVE_PY_CONCEPT, {...message, signal: controller.signal }) - .then((response) => response.json()) - .then((data) => { - let candidates = data["results"].filter((r: any) => r["status"] == "ok"); - - if (candidates.length > 0) { - setTableRowsPreview(candidates[0]["content"]['rows']); - setCodePreview(candidates[0]["code"]); - setApplicationDialogOpen(true); - } - handleLoading(false); - }).catch((error) => { - handleLoading(false); - }); - } - let handleApply = () => { - dispatch(dfActions.extendTableWithNewFields({ - tableId: focusedTable.id, - values: tableRowsPreview.map(r => r[field.name]), - columnName: field.name, - previousName: undefined, - parentIDs: field.transform?.parentIDs || [] - })); - } - - let colNames: string[] = tableRowsPreview.length > 0 ? Object.keys(tableRowsPreview[0]) : []; - let colDefs = colNames.map(n => ({ - id: n, - label: n, - dataType: "string" as Type, - source: field.name == n ? "derived" as FieldSource : "original" as FieldSource - })); - return ( - <> - - { handleGeneratePreview() }}> - - - - { setApplicationDialogOpen(false) }}> - Preview: apply concept {field.name} to {focusedTable.displayId} - - - transformation code - - preview of the applied concept - - - - - - - - - ) -} export const ConceptCard: FC = function ConceptCard({ field, sx }) { // concept cards are draggable cards that can be dropped into encoding shelf let theme = useTheme(); @@ -223,90 +107,18 @@ export const ConceptCard: FC = function ConceptCard({ field, s let border = "hidden"; const cursorStyle = isDragging ? "grabbing" : "grab"; - let editOption = field.source == "derived" && ( - - { - setEditMode(!editMode) - dispatch(dfActions.setFocusedTable(field.tableRef)); - }}> - - - ); - - let deriveOption = (field.source == "derived" || field.source == "original") && ( - - t.id == field.tableRef)?.virtual != undefined} - color="primary" aria-label="derive new concept" component="span" onClick={() => { - if (conceptShelfItems.filter(f => f.source == "derived" && f.name == "" - && f.transform?.parentIDs.includes(field.id)).length > 0) { - return - } - handleUpdateConcept(genFreshDerivedConcept([field.id], field.tableRef)); - }} > - - - - ); let deleteOption = !(field.source == "original") && f.source == "derived" && f.transform?.parentIDs.includes(field.id)).length > 0 - - } onClick={() => { handleDeleteConcept(field.id); }}> ; - let reApplyOption = focusedTable && field.source == "derived" - && focusedTable.id != field.tableRef - && !focusedTable.names.includes(field.name) - && field.transform?.parentIDs.every(pid => focusedTable.names.includes((conceptShelfItems.find(f => f.id == pid) as FieldItem).name)) - && ( - - - - ); - - let cleanupOption = focusedTable && field.source == "derived" && focusedTableId != field.tableRef - && field.transform?.parentIDs.every(pid => focusedTable.names.includes((conceptShelfItems.find(f => f.id == pid) as FieldItem).name)) && focusedTable.names.includes(field.name) && ( - remove {field.name} from {focusedTable.displayId}
}> - { - dispatch(dfActions.removeDerivedField({ - tableId: focusedTableId as string, - fieldId: field.id - })); - }}> - - - - ); - - let specialOptions = [ - reApplyOption, - cleanupOption, - ] - let cardHeaderOptions = [ deleteOption, - deriveOption, - editOption, ] - const editModeCard = field.source == "derived" && ( - - { setEditMode(false); }} /> - - ); - const [anchorEl, setAnchorEl] = React.useState(null); const open = Boolean(anchorEl); const handleDTypeClick = (event: React.MouseEvent) => { @@ -373,17 +185,8 @@ export const ConceptCard: FC = function ConceptCard({ field, s {cardHeaderOptions} - {reApplyOption || cleanupOption ? : ""} - {specialOptions}
- - {editModeCard} - ) @@ -397,361 +200,6 @@ export interface ConceptFormProps { turnOffEditMode?: () => void, } -export const DerivedConceptFormV2: FC = function DerivedConceptFormV2({ concept, handleUpdateConcept, handleDeleteConcept, turnOffEditMode }) { - - let theme = useTheme(); - // use tables for infer domains - let tables = useSelector((state: DataFormulatorState) => state.tables); - - let conceptTransform = concept.transform as ConceptTransformation; - - let formattedCode = conceptTransform.code; - - const conceptShelfItems = useSelector((state: DataFormulatorState) => state.conceptShelfItems); - - const [name, setName] = useState(concept.name); - const handleNameChange = (event: React.ChangeEvent) => { setName(event.target.value); }; - - // states related to transformation functions, they are only valid when the type is "derived" - const [transformCode, setTransformCode] = useState(formattedCode); - const [transformDesc, setTransformDesc] = useState(conceptTransform.description || ""); - const [transformParentIDs, setTransformParentIDs] = useState(conceptTransform.parentIDs || []); - - const [derivedFieldRef, setDerivedFieldRef] = useState(undefined); - const [tempExtTable, setTempExtTable] = useState<{tableRef: string, rows: any[]} | undefined>(undefined); - - const [codeDialogOpen, setCodeDialogOpen] = useState(false); - - - let dispatch = useDispatch(); - - const [codeGenInProgress, setCodeGenInProgress] = useState(false); - - let nameField = ( - f.name == name && f.id != concept.id) ? "this name already exists" : ""} - size="small" onChange={handleNameChange} required error={name == "" || conceptShelfItems.some(f => f.name == name && f.id != concept.id)} - />) - - let cardTopComponents = undefined; - let cardBottomComponents = undefined; - - let childrenConceptIDs = [concept.id]; - while (true) { - let newChildrens = conceptShelfItems.filter(f => f.source == "derived" - && !childrenConceptIDs.includes(f.id) - && f.transform?.parentIDs.some(pid => childrenConceptIDs.includes(pid))) - .map(f => f.id); - if (newChildrens.length == 0) { - break - } - childrenConceptIDs = [...childrenConceptIDs, ...newChildrens]; - } - - // this might be a hack, but it works - // the first parent is the concept that the user initially clicks to create the derived concept, thus its tableRef is the affiliated table - // since this locks out other tables to be used as parents, the affiliated table is an invariant for tables created here - let affiliatedTableId = conceptShelfItems.find(f => f.id == conceptTransform.parentIDs[0])?.tableRef; - - cardTopComponents = [ - nameField, - - derive from fields: - - , - ] - - let parentConcepts = transformParentIDs.map((parentID) => conceptShelfItems.filter(c => c.id == parentID)[0]); - let viewExamples: any = ""; - - if (transformCode && tempExtTable) { - - let colNames: [string[], string] = [parentConcepts.map(f => f.name), name]; - let colDefs = [...colNames[0], colNames[1]].map(n => ({ - id: n, - label: n, - dataType: "string" as Type, - source: "original" as FieldSource - })); - - viewExamples = ( - - { - let newRow = structuredClone(r); - if (derivedFieldRef && derivedFieldRef != name) { - newRow[name] = r[derivedFieldRef]; - delete newRow[derivedFieldRef]; - } - return newRow; - }).slice(0, 5)} columnDefs={colDefs} rowsPerPageNum={5} compact={true} maxCellWidth={100} /> - - ) - } - - let codeArea = ( - - {codeGenInProgress ? - - : ''} - - {viewExamples ? result on sample data : ""} - {viewExamples} - {transformCode ? transformation code : ""} - {transformCode ? : ""} - - - ) - - let handleProcessResults = (status: string, results: {code: string, content: any}[]) : void => { - setCodeGenInProgress(false); - if (status == "ok") { - - console.log(`[fyi] just received results`); - console.log(results); - - if (results.length > 0) { - let candidate = results[0]; - setTransformCode(candidate.code); - setTempExtTable({ - tableRef: parentConcepts[0].tableRef, - rows: candidate.content.rows, - }); - - dispatch(dfActions.addMessages({ - "timestamp": Date.now(), - "type": "success", - "component": "Field Card", - "value": `Find ${results.length} candidate transformations for concept "${name}".` - })); - } else { - dispatch(dfActions.addMessages({ - "timestamp": Date.now(), - "type": "info", - "component": "Field Card", - "value": `Find ${results.length} candidate transformations for concept "${name}", please try again.` - })); - } - } else { - // TODO: add warnings to show the user - setTransformCode(""); - dispatch(dfActions.addMessages({ - "timestamp": Date.now(), - "type": "error", - "component": "Field Card", - "value": "unable to generate the desired transformation, please try again." - })); - } - } - - let inputFields = parentConcepts.map(c => { - return { - name: c.name, - } - }) - - // pick the dataset with the right parents - let inputTable = tables.find(t => parentConcepts[0].tableRef == t.id) || tables[0]; - - let inputExtTable = { - name: inputTable.id, - rows: inputTable.rows - }; - - let codeDialogBox = { - setTransformDesc(desc); - setDerivedFieldRef(name); - setCodeGenInProgress(true); - }} - size={'small'} - /> - - cardBottomComponents = [ - codeDialogBox, - - {codeArea} - - ] - - const checkDerivedConceptDiff = () => { - let nameTypeNeq = (concept.name != name); - return (nameTypeNeq - || formattedCode != transformCode - || conceptTransform.description != transformDesc - || conceptTransform.parentIDs.toString() != transformParentIDs.toString()); - } - - let saveDisabledMsg = []; - if (name == "" || conceptShelfItems.some(f => f.name == name && f.id != concept.id)) { - saveDisabledMsg.push("concept name is empty") - } - if (concept.source == "derived") { - if (transformCode == "") { - saveDisabledMsg.push("transformation is not specified") - } - } - - return ( - - :not(style)': { margin: "4px", /*width: '25ch'*/ }, }} - noValidate - autoComplete="off"> - {cardTopComponents} - {cardBottomComponents} - - { setCodeDialogOpen(true); }}> - - - { setCodeDialogOpen(false); }}> - - Transformations from {parentConcepts.map(c => c.name).join(", ")} - to {name} - - {codeDialogBox} - - {codeGenInProgress ? - - : ''} - - - - transformation result on sample data - - - - {viewExamples} - - - - - transformation code - - - - - - - - - - - - f.source == "derived" && f.transform?.parentIDs.includes(concept.id)).length > 0} - onClick={() => { handleDeleteConcept(concept.id); }}> - - - - - - - - - - ); -} - - export interface CodexDialogBoxProps { inputData: {name: string, rows: any[]}, outputName: string, diff --git a/src/views/ConceptShelf.tsx b/src/views/ConceptShelf.tsx index 532b9db..6272d98 100644 --- a/src/views/ConceptShelf.tsx +++ b/src/views/ConceptShelf.tsx @@ -15,7 +15,6 @@ import { Button, Divider, IconButton, - Collapse, } from '@mui/material'; import CleaningServicesIcon from '@mui/icons-material/CleaningServices'; @@ -239,11 +238,11 @@ export const ConceptShelf: FC = function ConceptShelf() { display: 'flex', flexDirection: 'row', flexShrink: 0, // Prevent panel from shrinking - width: conceptPanelOpen ? 296 : 32, + width: conceptPanelOpen ? 240 : 32, borderLeft: conceptPanelOpen ? 'none' : '1px solid', borderLeftColor: conceptPanelOpen ? 'transparent' : theme.palette.divider, pl: conceptPanelOpen ? 0 : 1, - transition: 'width 0.3s ease', // Smooth transition + transition: 'width 0.1s linear', // Smooth transition overflow: 'hidden', position: 'relative', }}> @@ -296,7 +295,7 @@ export const ConceptShelf: FC = function ConceptShelf() { right: 0, width: '100%', height: '100%', - background: 'rgba(255,255,255,0.8)', + background: 'rgba(255,255,255,0.95)', pointerEvents: 'none', zIndex: 1 }, diff --git a/src/views/DataFormulator.tsx b/src/views/DataFormulator.tsx index d348b03..282c0c3 100644 --- a/src/views/DataFormulator.tsx +++ b/src/views/DataFormulator.tsx @@ -210,7 +210,7 @@ export const DataFormulatorFC = ({ }) => { let borderBoxStyle = { border: '1px solid rgba(0,0,0,0.1)', borderRadius: '16px', - boxShadow: '0 0 5px rgba(0,0,0,0.1)', + //boxShadow: '0 0 5px rgba(0,0,0,0.1)', } const fixedSplitPane = ( @@ -246,17 +246,6 @@ export const DataFormulatorFC = ({ }) => {
); - let exampleMessyText=`Rank NOC Gold Silver Bronze Total -1 South Korea 5 1 1 7 -2 France* 0 1 1 2 - United States 0 1 1 2 -4 China 0 1 0 1 - Germany 0 1 0 1 -6 Mexico 0 0 1 1 - Turkey 0 0 1 1 -Totals (7 entries) 5 5 5 15 -` - let footer = @@ -280,88 +269,51 @@ Totals (7 entries) 5 5 5 15 let dataUploadRequestBox = + }}> - - - {toolName} + + {toolName} - Turn data into insights with AI agents, with the exploration paths you choose. - - - - load some data - - - - Messy data}/> - - Examples} /> - - files} disabled={false} /> - - clipboard} disabled={false} /> - - Database} /> - {/*
*/} - {/* (csv, tsv, xlsx, json or database) */} - - Load structured data from CSV, Excel, JSON, database, or extract data from{' '} - Example of a screenshot of data: }> - screenshots - {' '} - and{' '} - Example of a messy text block: {exampleMessyText}
}> - text blocks - {' '} - using AI. - + + Explore data with visualizations, powered by AI agents. + + + + To begin, + extract}/>{' '} + data from images or text documents, load {' '} + examples}/>, + upload data from{' '} + clipboard} disabled={false}/> or {' '} + files} disabled={false}/>, + + or connect to a{' '} + database}/>. - - - - or, explore examples + + + + demos - - - {exampleSessions.map((session) => ( - handleLoadExampleSession(session)} - /> - ))} - + + {exampleSessions.map((session) => ( + handleLoadExampleSession(session)} + /> + ))} diff --git a/src/views/DataLoadingChat.tsx b/src/views/DataLoadingChat.tsx index 932540a..7158ee2 100644 --- a/src/views/DataLoadingChat.tsx +++ b/src/views/DataLoadingChat.tsx @@ -294,7 +294,7 @@ export const DataLoadingChatDialog: React.FC = ({ bu sx={{ '& .MuiDialog-paper': { maxWidth: '100%', maxHeight: 840, minWidth: 800 } }} > - Vibe Data Loader + Extract Data {dataCleanBlocks.length > 0 && @@ -1225,7 +1225,7 @@ export const DataThread: FC<{sx?: SxProps}> = function ({ sx }) { flexWrap: drawerOpen ? 'wrap' : 'nowrap', gap: 1, p: 1, - transition: 'max-width 0.3s ease-in-out', // Smooth width transition + transition: 'max-width 0.1s linear', // Smooth width transition }}> {Object.entries(leafTableGroups).map(([groupId, leafTables], i) => { diff --git a/src/views/DataView.tsx b/src/views/DataView.tsx index 54c1128..cf7e3af 100644 --- a/src/views/DataView.tsx +++ b/src/views/DataView.tsx @@ -10,7 +10,7 @@ import { Typography, Box, Link, Breadcrumbs, useTheme, Fade } from '@mui/materia import '../scss/DataView.scss'; import { DictTable } from '../components/ComponentType'; -import { DataFormulatorState, dfActions } from '../app/dfSlice'; +import { DataFormulatorState, dfActions, dfSelectors } from '../app/dfSlice'; import { useDispatch, useSelector } from 'react-redux'; import { Type } from '../data/types'; import { createTableFromFromObjectArray } from '../data/utils'; @@ -30,22 +30,8 @@ export const FreeDataViewFC: FC = function DataView() { const conceptShelfItems = useSelector((state: DataFormulatorState) => state.conceptShelfItems); const focusedTableId = useSelector((state: DataFormulatorState) => state.focusedTableId); - - let derivedFields = conceptShelfItems.filter(f => f.source == "derived" && f.name != ""); - - // we only change extTable when conceptShelfItems and tables changes - let tableToRender = useMemo(()=>{ - if (derivedFields.some(f => f.tableRef == focusedTableId)) { - return tables.map(table => { - // try to let table figure out all fields are derivable from the table - let rows = structuredClone(table.rows); - let extTable = createTableFromFromObjectArray(`${table.id}`, rows, table.anchored, table.derive); - return extTable - }) - } else { - return tables; - } - }, [tables, conceptShelfItems]) + const focusedChartId = useSelector((state: DataFormulatorState) => state.focusedChartId); + const allCharts = useSelector(dfSelectors.getAllCharts); useEffect(() => { if(focusedTableId == undefined && tables.length > 0) { @@ -132,30 +118,60 @@ export const FreeDataViewFC: FC = function DataView() { } - let coreTables = tableToRender.filter(t => t.derive == undefined || t.anchored); - let tempTables = tableToRender.filter(t => t.derive && !t.anchored); + // Get all predecessors of the focused table (including the focused table itself) + const getPredecessors = (tableId: string | undefined): DictTable[] => { + if (!tableId) return []; + const table = tables.find(t => t.id === tableId); + if (!table) return []; + + const predecessors: DictTable[] = []; + const visited = new Set(); + + const traverse = (id: string) => { + if (visited.has(id)) return; + visited.add(id); + + const t = tables.find(tbl => tbl.id === id); + if (!t) return; + + // First traverse sources (to get them in order) + if (t.derive?.source) { + t.derive.source.forEach(sourceId => traverse(sourceId)); + } + + predecessors.push(t); + }; + + traverse(tableId); + return predecessors; + }; + + // Get the table ID from the focused chart + const focusedChart = allCharts.find(c => c.id === focusedChartId); + const chartTableId = focusedChart?.tableRef; + + const predecessorTables = getPredecessors(chartTableId); let genTableLink = (t: DictTable) => { dispatch(dfActions.setFocusedTable(t.id)) }}> - {t.displayId || t.id} + {t.displayId || t.id} ; return ( - - - - {coreTables.map(t => genTableLink(t))} - - {/* */} - - - {tempTables.map(t => genTableLink(t))} + + + {predecessorTables.length > 0 && (predecessorTables[predecessorTables.length - 1].derive ? + : + )} + + + {predecessorTables.map(t => genTableLink(t))} - {renderTableBody(tableToRender.find(t => t.id == focusedTableId))} + {renderTableBody(tables.find(t => t.id == focusedTableId))} ); } \ No newline at end of file diff --git a/src/views/ExampleSessions.tsx b/src/views/ExampleSessions.tsx index 8dd82f4..f87e0e6 100644 --- a/src/views/ExampleSessions.tsx +++ b/src/views/ExampleSessions.tsx @@ -61,18 +61,10 @@ export const ExampleSessionCard: React.FC<{ }> = ({ session, theme, onClick, disabled }) => { return ( = ({ {columnDefs.map((columnDef, index) => { let backgroundColor = "white"; let borderBottomColor = theme.palette.primary.main; - if (columnDef.source == "derived") { - backgroundColor = alpha(theme.palette.derived.main, 0.05); - borderBottomColor = theme.palette.derived.main; - } else if (columnDef.source == "custom") { + if (columnDef.source == "custom") { backgroundColor = alpha(theme.palette.custom.main, 0.05); borderBottomColor = theme.palette.custom.main; } else { @@ -260,9 +257,7 @@ export const SelectableDataGrid: React.FC = ({ <> {columnDefs.map((column, colIndex) => { let backgroundColor = "white"; - if (column.source == "derived") { - backgroundColor = alpha(theme.palette.derived.main, 0.05); - } else if (column.source == "custom") { + if (column.source == "custom") { backgroundColor = alpha(theme.palette.custom.main, 0.05); } else { backgroundColor = "rgba(255,255,255,0.05)"; @@ -284,10 +279,10 @@ export const SelectableDataGrid: React.FC = ({ /> - - + {virtual && } {`${rowCount} rows`} @@ -303,7 +298,6 @@ export const SelectableDataGrid: React.FC = ({ > = ({ )} - {!virtual && - { - // Create CSV content - const csvContent = [ - Object.keys(rows[0]).join(','), // Header row - ...rows.map(row => Object.values(row).map(value => - // Handle values that need quotes (contain commas or quotes) - typeof value === 'string' && (value.includes(',') || value.includes('"')) - ? `"${value.replace(/"/g, '""')}"` - : value - ).join(',')) - ].join('\n'); - - // Create and trigger download - const blob = new Blob([csvContent], { type: 'text/csv;charset=utf-8;' }); - const link = document.createElement('a'); - const url = URL.createObjectURL(blob); - link.setAttribute('href', url); - link.setAttribute('download', `${tableName}.csv`); - link.style.visibility = 'hidden'; - document.body.appendChild(link); - link.click(); - document.body.removeChild(link); - }} - > - - - }
diff --git a/src/views/VisualizationView.tsx b/src/views/VisualizationView.tsx index a838b88..e25949d 100644 --- a/src/views/VisualizationView.tsx +++ b/src/views/VisualizationView.tsx @@ -38,7 +38,6 @@ import _ from 'lodash'; import ButtonGroup from '@mui/material/ButtonGroup'; import embed from 'vega-embed'; -import AnimateOnChange from 'react-animate-on-change' import '../scss/VisualizationView.scss'; import { useDispatch, useSelector } from 'react-redux'; @@ -82,11 +81,8 @@ import { MuiMarkdown, getOverrides } from 'mui-markdown'; import { dfSelectors } from '../app/dfSlice'; import { ChartRecBox } from './ChartRecBox'; -import { ConceptShelf } from './ConceptShelf'; import { CodeExplanationCard, ConceptExplCards, extractConceptExplanations } from './ExplComponents'; import CodeIcon from '@mui/icons-material/Code'; -import ToggleButton from '@mui/material/ToggleButton'; -import ToggleButtonGroup from '@mui/material/ToggleButtonGroup'; export interface VisPanelProps { } @@ -317,23 +313,9 @@ const VegaChartRenderer: FC<{ true ); - - embed('#' + elementId, { ...assembledChart }, { actions: true, renderer: "svg" }).then(function (result) { - if (result.view.container()?.getElementsByTagName("svg")) { - let comp = result.view.container()?.getElementsByTagName("svg")[0]; - if (comp) { - const { width, height } = comp.getBoundingClientRect(); - comp?.setAttribute("style", `width: ${width * scaleFactor}px; height: ${height * scaleFactor}px;`); - } - } - - if (result.view.container()?.getElementsByTagName("canvas")) { - let comp = result.view.container()?.getElementsByTagName("canvas")[0]; - if (comp && scaleFactor != 1) { - const { width, height } = comp.getBoundingClientRect(); - comp?.setAttribute("style", `width: ${width * scaleFactor}px; height: ${height * scaleFactor}px;`); - } - } + embed('#' + elementId, { ...assembledChart }, { actions: true, renderer: "canvas" }) + .then(function (result) { + // any post-processing of the canvas can go here }).catch((error) => { //console.error('Chart rendering error:', error); }); @@ -634,32 +616,11 @@ export const ChartEditorFC: FC<{}> = function ChartEditorFC({}) { transformCode = `${table.derive.code}` } - // Handle explanation mode changes - const handleExplanationModeChange = ( - event: React.MouseEvent, - newMode: 'none' | 'code' | 'explanation' | 'concepts', - ) => { - // If clicking the same mode that's already active, turn it off - if (newMode === explanationMode) { - setExplanationMode('none'); - setCodeViewOpen(false); - setCodeExplViewOpen(false); - setConceptExplanationsOpen(false); - } else if (newMode !== null) { - // Otherwise, switch to the new mode - setExplanationMode(newMode); - setCodeViewOpen(newMode === 'code'); - setCodeExplViewOpen(newMode === 'explanation'); - setConceptExplanationsOpen(newMode === 'concepts'); - } - }; - // Check if concepts are available const availableConcepts = extractConceptExplanations(table); const hasConcepts = availableConcepts.length > 0; let derivedTableItems = (resultTable?.derive || table.derive) ? [ - , = function ChartEditorFC({}) { ] : []; let chartActionButtons = [ - - - data: {table.virtual ? : ""} {table.displayId || table.id} - - , ...derivedTableItems, - , saveButton, duplicateButton, deleteButton, @@ -899,16 +854,16 @@ export const ChartEditorFC: FC<{}> = function ChartEditorFC({}) { let focusedElement = - + @@ -1032,11 +987,11 @@ export const ChartEditorFC: FC<{}> = function ChartEditorFC({}) { return {synthesisRunning ? - - : ''} + position: "absolute", height: "calc(100%)", width: "calc(100%)", zIndex: 1001, + backgroundColor: "rgba(243, 243, 243, 0.8)", display: "flex", alignItems: "center" + }}> + + : ''} {chartUnavailable ? "" : chartResizer} {content} From 6f78c1f694d19b6266f308019300dcf509905431 Mon Sep 17 00:00:00 2001 From: Chenglong Wang Date: Wed, 3 Dec 2025 16:35:59 -0800 Subject: [PATCH 05/37] some cleanup --- src/app/dfSlice.tsx | 69 ++------ src/views/ConceptCard.tsx | 144 +--------------- src/views/DataFormulator.tsx | 63 +++---- src/views/EncodingShelfCard.tsx | 2 +- src/views/ModelSelectionDialog.tsx | 262 +++++++++-------------------- src/views/ReportView.tsx | 5 +- src/views/ViewUtils.tsx | 7 +- src/views/VisualizationView.tsx | 2 +- 8 files changed, 132 insertions(+), 422 deletions(-) diff --git a/src/app/dfSlice.tsx b/src/app/dfSlice.tsx index af1fd32..393d134 100644 --- a/src/app/dfSlice.tsx +++ b/src/app/dfSlice.tsx @@ -52,12 +52,6 @@ export interface ModelConfig { api_version?: string; } -// Define model slot types -export const MODEL_SLOT_TYPES = ['generation', 'hint'] as const; -export type ModelSlotType = typeof MODEL_SLOT_TYPES[number]; - -// Derive ModelSlots interface from the constant -export type ModelSlots = Partial>; export interface ClientConfig { formulateTimeoutSeconds: number; @@ -83,7 +77,7 @@ export interface DataFormulatorState { sessionId: string | undefined; models: ModelConfig[]; - modelSlots: ModelSlots; + selectedModelId: string | undefined; testedModels: {id: string, status: 'ok' | 'error' | 'testing' | 'unknown', message: string}[]; tables : DictTable[]; @@ -138,7 +132,7 @@ const initialState: DataFormulatorState = { sessionId: undefined, models: [], - modelSlots: {}, + selectedModelId: undefined, testedModels: [], tables: [], @@ -371,7 +365,7 @@ export const dataFormulatorSlice = createSlice({ // models should not be loaded again, especially they may be from others state.agentRules = state.agentRules || initialState.agentRules; state.models = state.models || []; - state.modelSlots = state.modelSlots || {}; + state.selectedModelId = state.selectedModelId || undefined; state.testedModels = state.testedModels || []; state.dataLoaderConnectParams = state.dataLoaderConnectParams || {}; state.serverConfig = initialState.serverConfig; @@ -425,25 +419,16 @@ export const dataFormulatorSlice = createSlice({ state.agentRules = action.payload; }, selectModel: (state, action: PayloadAction) => { - state.modelSlots = { ...state.modelSlots, generation: action.payload }; - }, - setModelSlot: (state, action: PayloadAction<{slotType: ModelSlotType, modelId: string | undefined}>) => { - state.modelSlots = { ...state.modelSlots, [action.payload.slotType]: action.payload.modelId }; - }, - setModelSlots: (state, action: PayloadAction) => { - state.modelSlots = action.payload; + state.selectedModelId = action.payload; }, addModel: (state, action: PayloadAction) => { state.models = [...state.models, action.payload]; }, removeModel: (state, action: PayloadAction) => { state.models = state.models.filter(model => model.id != action.payload); - // Remove the model from all slots if it's assigned - Object.keys(state.modelSlots).forEach(slotType => { - if (state.modelSlots[slotType as ModelSlotType] === action.payload) { - state.modelSlots[slotType as ModelSlotType] = undefined; - } - }); + if (state.selectedModelId == action.payload) { + state.selectedModelId = undefined; + } }, updateModelStatus: (state, action: PayloadAction<{id: string, status: 'ok' | 'error' | 'testing' | 'unknown', message: string}>) => { let id = action.payload.id; @@ -691,12 +676,7 @@ export const dataFormulatorSlice = createSlice({ if (index != -1) { conceptShelfItems[index] = concept; } else { - if (concept.source != "derived") { - conceptShelfItems = [concept, ...conceptShelfItems]; - } else { - // insert the new concept right after the first parent - conceptShelfItems.splice(conceptShelfItems.findIndex(f => f.id == concept.transform?.parentIDs[0]) + 1, 0, concept) - } + conceptShelfItems = [concept, ...conceptShelfItems]; } state.conceptShelfItems = conceptShelfItems; }, @@ -708,20 +688,7 @@ export const dataFormulatorSlice = createSlice({ && Object.entries(chart.encodingMap).some(([channel, encoding]) => encoding.fieldID && conceptID == encoding.fieldID))) { console.log("cannot delete!") } else { - let field = state.conceptShelfItems.find(f => f.id == conceptID); - if (field?.source == "derived") { - // delete generated column from the derived table - let table = state.tables.find(t => t.id == field.tableRef) as DictTable; - let fieldIndex = table.names.indexOf(field.name); - table.names = table.names.slice(0, fieldIndex).concat(table.names.slice(fieldIndex + 1)); - delete table.metadata[field.name]; - table.rows = table.rows.map(row => { - delete row[field.name]; - return row; - }); - } state.conceptShelfItems = state.conceptShelfItems.filter(f => f.id != conceptID); - for (let chart of allCharts) { for (let [channel, encoding] of Object.entries(chart.encodingMap)) { if (encoding.fieldID && conceptID == encoding.fieldID) { @@ -776,9 +743,6 @@ export const dataFormulatorSlice = createSlice({ state.conceptShelfItems = state.conceptShelfItems.filter(field => !(field.source == "custom" && !(fieldNamesFromTables.includes(field.name) || fieldIdsReferredByCharts.includes(field.id)))) - - // consider cleaning up other fields if - }, addMessages: (state, action: PayloadAction) => { state.messages = [...state.messages, action.payload]; @@ -939,12 +903,8 @@ export const dataFormulatorSlice = createSlice({ ...state.testedModels.filter(t => !defaultModels.map((m: ModelConfig) => m.id).includes(t.id)) ] - if (defaultModels.length > 0) { - for (const slotType of MODEL_SLOT_TYPES) { - if (state.modelSlots[slotType] == undefined) { - state.modelSlots[slotType] = defaultModels[0].id; - } - } + if (defaultModels.length > 0 && state.selectedModelId == undefined) { + state.selectedModelId = defaultModels[0].id; } // console.log("load model complete"); @@ -970,14 +930,7 @@ export const dataFormulatorSlice = createSlice({ export const dfSelectors = { getActiveModel: (state: DataFormulatorState) : ModelConfig => { - return state.models.find(m => m.id == state.modelSlots.generation) || state.models[0]; - }, - getModelBySlot: (state: DataFormulatorState, slotType: ModelSlotType) : ModelConfig | undefined => { - const modelId = state.modelSlots[slotType]; - return modelId ? state.models.find(m => m.id === modelId) : undefined; - }, - getAllSlotTypes: () : ModelSlotType[] => { - return [...MODEL_SLOT_TYPES]; + return state.models.find(m => m.id == state.selectedModelId) || state.models[0]; }, getActiveBaseTableIds: (state: DataFormulatorState) => { let focusedTableId = state.focusedTableId; diff --git a/src/views/ConceptCard.tsx b/src/views/ConceptCard.tsx index 8878aa6..5fab7b4 100644 --- a/src/views/ConceptCard.tsx +++ b/src/views/ConceptCard.tsx @@ -12,31 +12,13 @@ import 'prismjs/themes/prism.css'; //Example style, you can use another import { useTheme } from '@mui/material/styles'; import { - Chip, Card, Box, - CardContent, Typography, IconButton, - Button, TextField, - FormControl, - InputLabel, - SelectChangeEvent, - MenuItem, - Checkbox, - Menu, - ButtonGroup, Tooltip, - styled, LinearProgress, - Dialog, - FormControlLabel, - DialogActions, - DialogTitle, - DialogContent, - Divider, - Select, SxProps, } from '@mui/material'; @@ -119,24 +101,10 @@ export const ConceptCard: FC = function ConceptCard({ field, s deleteOption, ] - const [anchorEl, setAnchorEl] = React.useState(null); - const open = Boolean(anchorEl); - const handleDTypeClick = (event: React.MouseEvent) => { - setAnchorEl(event.currentTarget); - }; - const handleDTypeClose = () => { - setAnchorEl(null); - }; - let typeIcon = ( - - {getIconFromType(focusedTable?.metadata[field.name]?.type || Type.Auto)} - + + {getIconFromType(focusedTable?.metadata[field.name]?.type)} + ) let fieldNameEntry = field.name != "" ? = function ConceptCard({ field, s ) return cardComponent; -} - -export interface ConceptFormProps { - concept: FieldItem, - handleUpdateConcept: (conept: FieldItem) => void, - handleDeleteConcept: (conceptID: string) => void, - turnOffEditMode?: () => void, -} - -export interface CodexDialogBoxProps { - inputData: {name: string, rows: any[]}, - outputName: string, - inputFields: {name: string}[], - initialDescription: string, - callWhenSubmit: (desc: string) => void, - handleProcessResults: (status: string, results: {code: string, content: any[]}[]) => void, // return processed cnadidates for the ease of logging - size: "large" | "small", -} - - -export const PyCodexDialogBox: FC = function ({ - initialDescription, inputFields, inputData, outputName, callWhenSubmit, handleProcessResults, size="small" }) { - - let activeModel = useSelector(dfSelectors.getActiveModel); - - let [description, setDescription] = useState(initialDescription); - let [requestTimeStamp, setRequestTimeStamp] = useState(0); - - let defaultInstruction = `Derive ${outputName} from ${inputFields.map(f => f.name).join(", ")}`; - - let formulateButton = - { - - setRequestTimeStamp(Date.now()); - //setTransformCode(""); - - console.log(`[fyi] just sent request "${description}" at ${requestTimeStamp}`); - - let message = { - method: 'POST', - headers: { 'Content-Type': 'application/json', }, - body: JSON.stringify({ - token: requestTimeStamp, - description: description, - input_fields: inputFields, - input_data: inputData, - output_name: outputName, - model: activeModel - }), - }; - - callWhenSubmit(description); - - // timeout the request after 20 seconds - const controller = new AbortController() - const timeoutId = setTimeout(() => controller.abort(), 20000) - - fetch(getUrls().DERIVE_PY_CONCEPT, {...message, signal: controller.signal }) - .then((response) => response.json()) - .then((data) => { - let candidates = data["results"].filter((r: any) => r["status"] == "ok"); - handleProcessResults(data["status"], candidates); - }).catch((error) => { - handleProcessResults("error", []); - }); - }}> - - - - - let textBox = - transformation prompt - { - if (event.key === "Enter" || event.key === "Tab") { - // write your functionality here - let target = event.target as HTMLInputElement; - if (target.value == "" && target.placeholder != "") { - target.value = target.placeholder; - setDescription(defaultInstruction); - event.preventDefault(); - } - } - }} - value={description} - placeholder={defaultInstruction} onChange={(event: any) => { setDescription(event.target.value) }} - variant="standard" - /> - - - return textBox; -} +} \ No newline at end of file diff --git a/src/views/DataFormulator.tsx b/src/views/DataFormulator.tsx index 282c0c3..ead4e94 100644 --- a/src/views/DataFormulator.tsx +++ b/src/views/DataFormulator.tsx @@ -9,6 +9,7 @@ import { DataFormulatorState, dfActions, dfSelectors, + ModelConfig, } from '../app/dfSlice' import _ from 'lodash'; @@ -58,16 +59,10 @@ export const DataFormulatorFC = ({ }) => { const tables = useSelector((state: DataFormulatorState) => state.tables); const models = useSelector((state: DataFormulatorState) => state.models); - const modelSlots = useSelector((state: DataFormulatorState) => state.modelSlots); + const selectedModelId = useSelector((state: DataFormulatorState) => state.selectedModelId); const viewMode = useSelector((state: DataFormulatorState) => state.viewMode); const theme = useTheme(); - const noBrokenModelSlots= useSelector((state: DataFormulatorState) => { - const slotTypes = dfSelectors.getAllSlotTypes(); - return slotTypes.every( - slotType => state.modelSlots[slotType] !== undefined && state.testedModels.find(t => t.id == state.modelSlots[slotType])?.status != 'error'); - }); - const dispatch = useDispatch(); const handleLoadExampleSession = (session: ExampleSession) => { @@ -136,11 +131,12 @@ export const DataFormulatorFC = ({ }) => { useEffect(() => { const findWorkingModel = async () => { - let assignedModels = models.filter(m => Object.values(modelSlots).includes(m.id)); - let unassignedModels = models.filter(m => !Object.values(modelSlots).includes(m.id)); - - // Test assigned models in parallel for faster loading - const assignedPromises = assignedModels.map(async (model) => { + let selectedModel = models.find(m => m.id == selectedModelId); + let otherModels = models.filter(m => m.id != selectedModelId); + + let modelsToTest = [selectedModel, ...otherModels].filter(m => m != undefined); + + let testModel = async (model: ModelConfig) => { const message = { method: 'POST', headers: { 'Content-Type': 'application/json', }, @@ -150,32 +146,24 @@ export const DataFormulatorFC = ({ }) => { const response = await fetch(getUrls().TEST_MODEL, {...message }); const data = await response.json(); const status = data["status"] || 'error'; - dispatch(dfActions.updateModelStatus({id: model.id, status, message: data["message"] || ""})); - return { model, status }; + return {model, status, message: data["message"] || ""}; } catch (error) { - dispatch(dfActions.updateModelStatus({id: model.id, status: 'error', message: (error as Error).message || 'Failed to test model'})); - return { model, status: 'error' }; + return {model, status: 'error', message: (error as Error).message || 'Failed to test model'}; } - }); - - await Promise.all(assignedPromises); - + } + // Then test unassigned models sequentially until one works - for (let model of unassignedModels) { - const message = { - method: 'POST', - headers: { 'Content-Type': 'application/json', }, - body: JSON.stringify({ model }), + for (let model of modelsToTest) { + let testResult = await testModel(model); + dispatch(dfActions.updateModelStatus({ + id: model.id, + status: testResult.status, + message: testResult.message + })); + if (testResult.status == 'ok') { + dispatch(dfActions.selectModel(model.id)); + return; }; - try { - const response = await fetch(getUrls().TEST_MODEL, {...message }); - const data = await response.json(); - const status = data["status"] || 'error'; - dispatch(dfActions.updateModelStatus({id: model.id, status, message: data["message"] || ""})); - if (status == 'ok') break; - } catch (error) { - dispatch(dfActions.updateModelStatus({id: model.id, status: 'error', message: (error as Error).message || 'Failed to test model'})); - } } }; @@ -268,6 +256,11 @@ export const DataFormulatorFC = ({ }) => { let dataUploadRequestBox = @@ -324,7 +317,7 @@ export const DataFormulatorFC = ({ }) => { {tables.length > 0 ? fixedSplitPane : dataUploadRequestBox} - {!noBrokenModelSlots && ( + {selectedModelId == undefined && ( = function ({ chartId const timeoutId = setTimeout(() => controller.abort(), config.formulateTimeoutSeconds * 1000); fetch(engine, {...message, signal: controller.signal }) - .then((response) => response.json()) + .then((response: Response) => response.json()) .then((data) => { dispatch(dfActions.changeChartRunningStatus({chartId, status: false})) diff --git a/src/views/ModelSelectionDialog.tsx b/src/views/ModelSelectionDialog.tsx index 7e7957a..d2ef7f7 100644 --- a/src/views/ModelSelectionDialog.tsx +++ b/src/views/ModelSelectionDialog.tsx @@ -9,8 +9,6 @@ import { DataFormulatorState, dfActions, ModelConfig, - ModelSlots, - ModelSlotType, dfSelectors, } from '../app/dfSlice' @@ -43,6 +41,7 @@ import { Paper, Box, Divider, + Checkbox, } from '@mui/material'; @@ -55,6 +54,7 @@ import VisibilityOffIcon from '@mui/icons-material/VisibilityOff'; import CheckCircleOutlineIcon from '@mui/icons-material/CheckCircleOutline'; import ErrorOutlineIcon from '@mui/icons-material/ErrorOutline'; import HelpOutlineIcon from '@mui/icons-material/HelpOutline'; +import InfoOutlinedIcon from '@mui/icons-material/InfoOutlined'; import { getUrls } from '../app/utils'; @@ -81,12 +81,11 @@ export const ModelSelectionButton: React.FC<{}> = ({ }) => { const dispatch = useDispatch(); const models = useSelector((state: DataFormulatorState) => state.models); - const modelSlots = useSelector((state: DataFormulatorState) => state.modelSlots); + const selectedModelId = useSelector((state: DataFormulatorState) => state.selectedModelId); const testedModels = useSelector((state: DataFormulatorState) => state.testedModels); const [modelDialogOpen, setModelDialogOpen] = useState(false); const [showKeys, setShowKeys] = useState(false); - const [tempModelSlots, setTempModelSlots] = useState(modelSlots); const [providerModelOptions, setProviderModelOptions] = useState<{[key: string]: string[]}>({ 'openai': [], 'azure': [], @@ -104,19 +103,7 @@ export const ModelSelectionButton: React.FC<{}> = ({ }) => { } // Helper functions for slot management - const updateTempSlot = (slotType: ModelSlotType, modelId: string | undefined) => { - setTempModelSlots(prev => ({ ...prev, [slotType]: modelId })); - }; - - const isModelAssignedToSlot = (modelId: string, slotType: ModelSlotType) => { - return tempModelSlots[slotType] === modelId; - }; - - // Ensure tempModelSlots is updated when modelSlots changes - React.useEffect(() => { - setTempModelSlots(modelSlots); - }, [modelSlots]); - + const [tempSelectedModelId, setTempSelectedModelId] = useState(selectedModelId); const [newEndpoint, setNewEndpoint] = useState(""); // openai, azure, ollama etc const [newModel, setNewModel] = useState(""); const [newApiKey, setNewApiKey] = useState(""); @@ -188,126 +175,13 @@ export const ModelSelectionButton: React.FC<{}> = ({ }) => { let readyToTest = newModel && (newApiKey || newApiBase); - // Create enhanced slot assignment summary component - const SlotAssignmentSummary: React.FC = () => { - const slotTypes = dfSelectors.getAllSlotTypes(); - - return ( - - Model Assignments - - {slotTypes.map(slotType => { - const assignedModelId = tempModelSlots[slotType]; - const assignedModel = assignedModelId ? models.find(m => m.id === assignedModelId) : undefined; - - let modelExplanation = ""; - if (slotType == 'generation') { - modelExplanation = "exploration planning, code generation"; - } else if (slotType == 'hint') { - modelExplanation = "background data type inference, code explanation"; - } - - return ( - - - Model for {slotType} - - - - {modelExplanation} - - - - - - ); - })} - - - 💡 Tip: Use powerful models for generation tasks and faster models for hints. - - - ); - }; - let newModelEntry = - + + = ({ }) => { updateModelStatus(model, status, data["message"] || ""); // Only assign to slot if test is successful if (status === 'ok') { - for (let slotType of dfSelectors.getAllSlotTypes()) { - if (!tempModelSlots[slotType]) { - updateTempSlot(slotType, id); - } - } + setTempSelectedModelId(id); } }).catch((error) => { updateModelStatus(model, 'error', error.message) @@ -491,8 +361,9 @@ export const ModelSelectionButton: React.FC<{}> = ({ }) => { let modelTable = - + + Provider API Key Model @@ -519,10 +390,8 @@ export const ModelSelectionButton: React.FC<{}> = ({ }) => { const borderStyle = ['error'].includes(status) ? '1px dashed lightgray' : undefined; const noBorderStyle = ['error'].includes(status) ? 'none' : undefined; + const disabledStyle = status != 'ok' ? { cursor: 'default', opacity: 0.5 } : undefined; - // Check if model is assigned to any slot - const isAssignedToAnySlot = dfSelectors.getAllSlotTypes().some(slotType => isModelAssignedToSlot(model.id, slotType)); - return ( = ({ }) => { sx={{ '& .MuiTableCell-root': { fontSize: '0.75rem' }, '&:hover': { backgroundColor: '#f8f9fa' }, - backgroundColor: isAssignedToAnySlot ? alpha(theme.palette.success.main, 0.07) : '#fff' + border: tempSelectedModelId == model.id ? `2px solid ${theme.palette.primary.main}` : 'none', + cursor: status == 'ok' ? 'pointer' : 'default', }} + onClick={() => status == 'ok' && setTempSelectedModelId(tempSelectedModelId == model.id ? undefined : model.id)} > - + + setTempSelectedModelId(tempSelectedModelId == model.id ? undefined : model.id)} /> + + {model.endpoint} - + {model.api_key ? (showKeys ? = ({ }) => { : None } - + {model.model} - + {model.api_base ? ( = ({ }) => { )} - + {model.api_version ? ( {model.api_version} @@ -609,11 +485,9 @@ export const ModelSelectionButton: React.FC<{}> = ({ }) => { onClick={()=>{ dispatch(dfActions.removeModel(model.id)); // Remove from all slots if assigned - dfSelectors.getAllSlotTypes().forEach(slotType => { - if (isModelAssignedToSlot(model.id, slotType)) { - updateTempSlot(slotType, undefined); - } - }); + if (tempSelectedModelId == model.id) { + setTempSelectedModelId(undefined); + } }} sx={{ p: 0.75 }} > @@ -636,30 +510,20 @@ export const ModelSelectionButton: React.FC<{}> = ({ }) => { ) })} {newModelEntry} - - - - Configuration: Based on LiteLLM. See supported providers. - Use 'openai' provider for OpenAI-compatible APIs. - - - -
- let notAllSlotsReady = Object.values(tempModelSlots).filter(id => id).length !== dfSelectors.getAllSlotTypes().length - || Object.values(tempModelSlots).filter(id => id).some(id => getStatus(id) !== 'ok'); + let modelNotReady = tempSelectedModelId == undefined || getStatus(tempSelectedModelId) !== 'ok'; - let genModelName = models.find(m => m.id == modelSlots['generation'])?.model || 'Unknown'; - let hintModelName = models.find(m => m.id == modelSlots['hint'])?.model || 'Unknown'; - let modelNames = (genModelName == hintModelName) ? genModelName : `${genModelName} / ${hintModelName}`; + let tempModel = models.find(m => m.id == tempSelectedModelId); + let tempModelName = tempModel ? `${tempModel.endpoint}/${tempModel.model}` : 'Please select a model'; + let selectedModelName = models.find(m => m.id == selectedModelId)?.model || 'Unselected'; return <> - - = ({ }) => { } }} > - Configure Models for Different Tasks + Select a model - - - - available models - - + + + + • Models with strong code generation capabilities (e.g., gpt-5.1, claude-sonnet-4-5) provide best experience. + + + • Model configuration based on LiteLLM. See supported providers. + Use openai provider for OpenAI-compatible APIs. + + + • Example configuration: {`{ "provider": "openai", "api_key": "sk-...", "model": "gpt-5.1", "api_base": "", "api_version": "" }`} + + + {modelTable} @@ -688,13 +588,13 @@ export const ModelSelectionButton: React.FC<{}> = ({ }) => { {showKeys ? 'hide' : 'show'} keys )} - + dispatch(dfActions.selectModel(tempSelectedModelId)); + setModelDialogOpen(false);}}>Use {tempModelName} diff --git a/src/views/ReportView.tsx b/src/views/ReportView.tsx index 244a9e1..22562c2 100644 --- a/src/views/ReportView.tsx +++ b/src/views/ReportView.tsx @@ -218,7 +218,7 @@ export const ReportView: FC = () => { const charts = useSelector((state: DataFormulatorState) => state.charts); const tables = useSelector((state: DataFormulatorState) => state.tables); - const modelSlot = useSelector((state: DataFormulatorState) => state.modelSlots); + const selectedModelId = useSelector((state: DataFormulatorState) => state.selectedModelId); const models = useSelector((state: DataFormulatorState) => state.models); const conceptShelfItems = useSelector((state: DataFormulatorState) => state.conceptShelfItems); const config = useSelector((state: DataFormulatorState) => state.config); @@ -606,7 +606,8 @@ export const ReportView: FC = () => { const reportId = `report-${Date.now()}-${Math.floor(Math.random() * 10000)}`; try { - const model = models.find(m => m.id === modelSlot.generation); + let model = models.find(m => m.id == selectedModelId); + if (!model) { throw new Error('No model selected'); } diff --git a/src/views/ViewUtils.tsx b/src/views/ViewUtils.tsx index 91d3f27..3d654d3 100644 --- a/src/views/ViewUtils.tsx +++ b/src/views/ViewUtils.tsx @@ -10,9 +10,8 @@ import { BooleanIcon, NumericalIcon, StringIcon, DateIcon, UnknownIcon } from '. import AutoFixHighIcon from '@mui/icons-material/AutoFixHigh'; import BarChartIcon from '@mui/icons-material/BarChart'; +import CommitIcon from '@mui/icons-material/Commit'; -import prettier from "prettier"; -import parserBabel from 'prettier/parser-babel'; import { DictTable } from '../components/ComponentType'; export const groupConceptItems = (conceptShelfItems: FieldItem[], tables: DictTable[]) => { @@ -45,7 +44,7 @@ export const getIconFromType = (t: Type | undefined): JSX.Element => { case Type.Auto: return ; } - return ; + return ; }; export const getIconFromDtype = (t: "quantitative" | "nominal" | "ordinal" | "temporal" | "auto"): JSX.Element => { @@ -61,5 +60,5 @@ export const getIconFromDtype = (t: "quantitative" | "nominal" | "ordinal" | "te case "auto": return ; } - return ; + return ; }; \ No newline at end of file diff --git a/src/views/VisualizationView.tsx b/src/views/VisualizationView.tsx index e25949d..a9371a1 100644 --- a/src/views/VisualizationView.tsx +++ b/src/views/VisualizationView.tsx @@ -774,7 +774,7 @@ export const ChartEditorFC: FC<{}> = function ChartEditorFC({}) { let chartActionItems = isDataStale ? [] : ( - {(table.virtual || table.rows.length > 1000) && !(chartUnavailable || encodingShelfEmpty) ? ( + {(table.virtual || table.rows.length > 5000) && !(chartUnavailable || encodingShelfEmpty) ? ( visualizing From 13b6877636c014c5ca1799969efdea4dc1d6577f Mon Sep 17 00:00:00 2001 From: Chenglong Wang Date: Wed, 3 Dec 2025 17:37:51 -0800 Subject: [PATCH 06/37] fix some db connection issue --- .../data_loader/mysql_data_loader.py | 218 +++++++++++++----- pyproject.toml | 1 + requirements.txt | 1 + src/views/DBTableManager.tsx | 24 +- 4 files changed, 183 insertions(+), 61 deletions(-) diff --git a/py-src/data_formulator/data_loader/mysql_data_loader.py b/py-src/data_formulator/data_loader/mysql_data_loader.py index 184eb0f..d397223 100644 --- a/py-src/data_formulator/data_loader/mysql_data_loader.py +++ b/py-src/data_formulator/data_loader/mysql_data_loader.py @@ -1,18 +1,22 @@ import json +import logging import pandas as pd import duckdb +import pymysql from data_formulator.data_loader.external_data_loader import ExternalDataLoader, sanitize_table_name from data_formulator.security import validate_sql_query -from typing import Dict, Any, Optional +from typing import Dict, Any, Optional, List + +logger = logging.getLogger(__name__) class MySQLDataLoader(ExternalDataLoader): @staticmethod - def list_params() -> bool: + def list_params() -> List[Dict[str, Any]]: params_list = [ {"name": "user", "type": "string", "required": True, "default": "root", "description": ""}, {"name": "password", "type": "string", "required": False, "default": "", "description": "leave blank for no password"}, @@ -53,93 +57,187 @@ def __init__(self, params: Dict[str, Any], duck_db_conn: duckdb.DuckDBPyConnecti self.params = params self.duck_db_conn = duck_db_conn - # Install and load the MySQL extension - self.duck_db_conn.install_extension("mysql") - self.duck_db_conn.load_extension("mysql") + # Establish native MySQL connection using pymysql + port = self.params.get('port', 3306) + if isinstance(port, str): + port = int(port) if port.strip() else 3306 + if port is None: + port = 3306 - attach_string = "" - for key, value in self.params.items(): - if value is not None and value != "": - attach_string += f"{key}={value} " - - # Detach existing mysqldb connection if it exists try: - self.duck_db_conn.execute("DETACH mysqldb;") - except: - pass # Ignore if mysqldb doesn't exist - - # Register MySQL connection - self.duck_db_conn.execute(f"ATTACH '{attach_string}' AS mysqldb (TYPE mysql);") - - def list_tables(self, table_filter: str = None): - tables_df = self.duck_db_conn.execute(f""" - SELECT TABLE_SCHEMA, TABLE_NAME FROM mysqldb.information_schema.tables + self.mysql_conn = pymysql.connect( + host=self.params.get('host', 'localhost'), + user=self.params.get('user', 'root'), + password=self.params.get('password', ''), + database=self.params.get('database', 'mysql'), + port=port, + cursorclass=pymysql.cursors.DictCursor, + charset='utf8mb4' + ) + self.database = self.params.get('database', 'mysql') + logger.info(f"Successfully connected to MySQL database: {self.database}") + except Exception as e: + logger.error(f"Failed to connect to MySQL: {e}") + raise + + def _execute_query(self, query: str) -> pd.DataFrame: + """Execute a query using native MySQL connection and return a DataFrame.""" + try: + with self.mysql_conn.cursor() as cursor: + cursor.execute(query) + rows = cursor.fetchall() + if rows: + return pd.DataFrame(rows) + else: + # Return empty DataFrame with column names + return pd.DataFrame() + except Exception as e: + logger.error(f"Error executing MySQL query: {e}") + # Try to reconnect if connection was lost + self._reconnect_if_needed() + raise + + def _reconnect_if_needed(self): + """Attempt to reconnect to MySQL if the connection was lost.""" + try: + self.mysql_conn.ping(reconnect=True) + except Exception as e: + logger.warning(f"Reconnection attempt failed: {e}") + # Try to create a new connection + port = self.params.get('port', 3306) + if isinstance(port, str): + port = int(port) if port.strip() else 3306 + if port is None: + port = 3306 + + self.mysql_conn = pymysql.connect( + host=self.params.get('host', 'localhost'), + user=self.params.get('user', 'root'), + password=self.params.get('password', ''), + database=self.params.get('database', 'mysql'), + port=port, + cursorclass=pymysql.cursors.DictCursor, + charset='utf8mb4' + ) + + def list_tables(self, table_filter: str = None) -> List[Dict[str, Any]]: + # Get list of tables from MySQL directly + tables_query = """ + SELECT TABLE_SCHEMA, TABLE_NAME + FROM information_schema.tables WHERE table_schema NOT IN ('information_schema', 'mysql', 'performance_schema', 'sys') - """).fetch_df() + AND TABLE_TYPE = 'BASE TABLE' + """ + tables_df = self._execute_query(tables_query) + + if tables_df.empty: + return [] results = [] - for schema, table_name in tables_df.values: - - full_table_name = f"mysqldb.{schema}.{table_name}" + for _, row in tables_df.iterrows(): + schema = row['TABLE_SCHEMA'] + table_name = row['TABLE_NAME'] # Apply table filter if provided if table_filter and table_filter.lower() not in table_name.lower(): continue - # Get column information using DuckDB's information schema - columns_df = self.duck_db_conn.execute(f"DESCRIBE {full_table_name}").df() - columns = [{ - 'name': row['column_name'], - 'type': row['column_type'] - } for _, row in columns_df.iterrows()] - - # Get sample data - sample_df = self.duck_db_conn.execute(f"SELECT * FROM {full_table_name} LIMIT 10").df() - sample_rows = json.loads(sample_df.to_json(orient="records")) - - # get row count - row_count = self.duck_db_conn.execute(f"SELECT COUNT(*) FROM {full_table_name}").fetchone()[0] - - table_metadata = { - "row_count": row_count, - "columns": columns, - "sample_rows": sample_rows - } - - results.append({ - "name": full_table_name, - "metadata": table_metadata - }) + full_table_name = f"{schema}.{table_name}" + + try: + # Get column information from MySQL + columns_query = f""" + SELECT COLUMN_NAME, DATA_TYPE + FROM information_schema.columns + WHERE TABLE_SCHEMA = '{schema}' AND TABLE_NAME = '{table_name}' + ORDER BY ORDINAL_POSITION + """ + columns_df = self._execute_query(columns_query) + columns = [{ + 'name': col_row['COLUMN_NAME'], + 'type': col_row['DATA_TYPE'] + } for _, col_row in columns_df.iterrows()] + + # Get sample data + sample_query = f"SELECT * FROM `{schema}`.`{table_name}` LIMIT 10" + sample_df = self._execute_query(sample_query) + sample_rows = json.loads(sample_df.to_json(orient="records", date_format='iso')) + + # Get row count + count_query = f"SELECT COUNT(*) as cnt FROM `{schema}`.`{table_name}`" + count_df = self._execute_query(count_query) + row_count = int(count_df['cnt'].iloc[0]) if not count_df.empty else 0 + + table_metadata = { + "row_count": row_count, + "columns": columns, + "sample_rows": sample_rows + } + + results.append({ + "name": full_table_name, + "metadata": table_metadata + }) + except Exception as e: + logger.warning(f"Error processing table {full_table_name}: {e}") + continue return results def ingest_data(self, table_name: str, name_as: Optional[str] = None, size: int = 1000000): - # Create table in the main DuckDB database from MySQL data + """Fetch data from MySQL and ingest into DuckDB.""" if name_as is None: name_as = table_name.split('.')[-1] name_as = sanitize_table_name(name_as) - self.duck_db_conn.execute(f""" - CREATE OR REPLACE TABLE main.{name_as} AS - SELECT * FROM {table_name} - LIMIT {size} - """) + # Parse table name to handle schema.table format + if '.' in table_name: + parts = table_name.split('.') + schema = parts[0] + tbl = parts[1] + query = f"SELECT * FROM `{schema}`.`{tbl}` LIMIT {size}" + else: + query = f"SELECT * FROM `{table_name}` LIMIT {size}" + + # Fetch data from MySQL + df = self._execute_query(query) + + if df.empty: + logger.warning(f"No data fetched from table {table_name}") + return + + # Ingest into DuckDB using the base class method + self.ingest_df_to_duckdb(df, name_as) + logger.info(f"Successfully ingested {len(df)} rows from {table_name} into DuckDB table {name_as}") def view_query_sample(self, query: str) -> str: result, error_message = validate_sql_query(query) if not result: raise ValueError(error_message) - return json.loads(self.duck_db_conn.execute(query).df().head(10).to_json(orient="records")) + # Execute query via native MySQL connection + df = self._execute_query(query) + return json.loads(df.head(10).to_json(orient="records", date_format='iso')) def ingest_data_from_query(self, query: str, name_as: str) -> pd.DataFrame: - # Execute the query and get results as a DataFrame + """Execute custom query and ingest results into DuckDB.""" result, error_message = validate_sql_query(query) if not result: raise ValueError(error_message) - df = self.duck_db_conn.execute(query).df() - # Use the base class's method to ingest the DataFrame - self.ingest_df_to_duckdb(df, sanitize_table_name(name_as)) \ No newline at end of file + # Execute query via native MySQL connection + df = self._execute_query(query) + + # Ingest into DuckDB using the base class method + self.ingest_df_to_duckdb(df, sanitize_table_name(name_as)) + return df + + def __del__(self): + """Clean up MySQL connection when the loader is destroyed.""" + try: + if hasattr(self, 'mysql_conn') and self.mysql_conn: + self.mysql_conn.close() + except Exception: + pass \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 1454f73..9a520ab 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,6 +34,7 @@ dependencies = [ "vega_datasets", "litellm", "duckdb", + "pymysql", "pyodbc", "numpy", "vl-convert-python", diff --git a/requirements.txt b/requirements.txt index 17adffe..5357205 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,6 +12,7 @@ python-dotenv vega_datasets litellm duckdb +pymysql boto3 pyodbc vl-convert-python diff --git a/src/views/DBTableManager.tsx b/src/views/DBTableManager.tsx index de26bb9..ec6dbf4 100644 --- a/src/views/DBTableManager.tsx +++ b/src/views/DBTableManager.tsx @@ -1027,6 +1027,28 @@ export const DataLoaderForm: React.FC<{ let [isConnecting, setIsConnecting] = useState(false); let [mode, setMode] = useState<"view tables" | "query">("view tables"); + + // Initialize params with default values if not already set + React.useEffect(() => { + const defaultParams: Record = {}; + let needsUpdate = false; + + for (const paramDef of paramDefs) { + if (params[paramDef.name] === undefined && paramDef.default !== undefined) { + defaultParams[paramDef.name] = String(paramDef.default); + needsUpdate = true; + } else if (params[paramDef.name] !== undefined) { + defaultParams[paramDef.name] = params[paramDef.name]; + } + } + + if (needsUpdate) { + dispatch(dfActions.updateDataLoaderConnectParams({ + dataLoaderType, + params: { ...params, ...defaultParams } + })); + } + }, [dataLoaderType, paramDefs]); const toggleDisplaySamples = (tableName: string) => { setDisplaySamples({...displaySamples, [tableName]: !displaySamples[tableName]}); } @@ -1170,7 +1192,7 @@ export const DataLoaderForm: React.FC<{ required={paramDef.required} key={paramDef.name} label={paramDef.name} - value={params[paramDef.name]} + value={params[paramDef.name] ?? paramDef.default ?? ''} placeholder={paramDef.description} onChange={(event: any) => { dispatch(dfActions.updateDataLoaderConnectParam({ From 900ea2dd85104d91e06f8c94bd4be3d41c0a44d8 Mon Sep 17 00:00:00 2001 From: Chenglong Wang Date: Wed, 3 Dec 2025 18:12:13 -0800 Subject: [PATCH 07/37] fix and test --- .../data_loader/mysql_data_loader.py | 52 +++++--- .../data_loader/postgresql_data_loader.py | 22 +++- src/app/App.tsx | 74 +++++++---- src/views/DBTableManager.tsx | 98 +++++++-------- src/views/DataLoadingChat.tsx | 46 +++++-- src/views/ModelSelectionDialog.tsx | 17 ++- src/views/TableSelectionView.tsx | 116 +++++++++++------- 7 files changed, 262 insertions(+), 163 deletions(-) diff --git a/py-src/data_formulator/data_loader/mysql_data_loader.py b/py-src/data_formulator/data_loader/mysql_data_loader.py index d397223..9d68015 100644 --- a/py-src/data_formulator/data_loader/mysql_data_loader.py +++ b/py-src/data_formulator/data_loader/mysql_data_loader.py @@ -57,24 +57,38 @@ def __init__(self, params: Dict[str, Any], duck_db_conn: duckdb.DuckDBPyConnecti self.params = params self.duck_db_conn = duck_db_conn - # Establish native MySQL connection using pymysql - port = self.params.get('port', 3306) + # Get params as-is from frontend + host = self.params.get('host', '') + user = self.params.get('user', '') + password = self.params.get('password', '') + database = self.params.get('database', '') + + # Validate required params + if not host: + raise ValueError("MySQL host is required") + if not user: + raise ValueError("MySQL user is required") + if not database: + raise ValueError("MySQL database is required") + + # Handle port (only field with sensible default) + port = self.params.get('port', '') if isinstance(port, str): - port = int(port) if port.strip() else 3306 - if port is None: + port = int(port) if port else 3306 + elif not port: port = 3306 try: self.mysql_conn = pymysql.connect( - host=self.params.get('host', 'localhost'), - user=self.params.get('user', 'root'), - password=self.params.get('password', ''), - database=self.params.get('database', 'mysql'), + host=host, + user=user, + password=password, + database=database, port=port, cursorclass=pymysql.cursors.DictCursor, charset='utf8mb4' ) - self.database = self.params.get('database', 'mysql') + self.database = database logger.info(f"Successfully connected to MySQL database: {self.database}") except Exception as e: logger.error(f"Failed to connect to MySQL: {e}") @@ -103,18 +117,22 @@ def _reconnect_if_needed(self): self.mysql_conn.ping(reconnect=True) except Exception as e: logger.warning(f"Reconnection attempt failed: {e}") - # Try to create a new connection - port = self.params.get('port', 3306) + # Try to create a new connection using stored params + host = self.params.get('host', '') + user = self.params.get('user', '') + password = self.params.get('password', '') + + port = self.params.get('port', '') if isinstance(port, str): - port = int(port) if port.strip() else 3306 - if port is None: + port = int(port) if port else 3306 + elif not port: port = 3306 self.mysql_conn = pymysql.connect( - host=self.params.get('host', 'localhost'), - user=self.params.get('user', 'root'), - password=self.params.get('password', ''), - database=self.params.get('database', 'mysql'), + host=host, + user=user, + password=password, + database=self.database, port=port, cursorclass=pymysql.cursors.DictCursor, charset='utf8mb4' diff --git a/py-src/data_formulator/data_loader/postgresql_data_loader.py b/py-src/data_formulator/data_loader/postgresql_data_loader.py index e35671e..ba95d1d 100644 --- a/py-src/data_formulator/data_loader/postgresql_data_loader.py +++ b/py-src/data_formulator/data_loader/postgresql_data_loader.py @@ -29,15 +29,29 @@ def __init__(self, params: Dict[str, Any], duck_db_conn: duckdb.DuckDBPyConnecti self.params = params self.duck_db_conn = duck_db_conn + # Get params as-is from frontend + host = self.params.get('host', '') + port = self.params.get('port', '') or '5432' # Only port has a sensible default + user = self.params.get('user', '') + database = self.params.get('database', '') + password = self.params.get('password', '') + + # Validate required params + if not host: + raise ValueError("PostgreSQL host is required") + if not user: + raise ValueError("PostgreSQL user is required") + if not database: + raise ValueError("PostgreSQL database is required") + try: # Install and load the Postgres extension self.duck_db_conn.install_extension("postgres") self.duck_db_conn.load_extension("postgres") # Prepare the connection string for Postgres - port = self.params.get('port', '5432') - password_part = f" password={self.params.get('password', '')}" if self.params.get('password') else "" - attach_string = f"host={self.params['host']} port={port} user={self.params['user']}{password_part} dbname={self.params['database']}" + password_part = f" password={password}" if password else "" + attach_string = f"host={host} port={port} user={user}{password_part} dbname={database}" # Detach existing postgres connection if it exists try: @@ -47,7 +61,7 @@ def __init__(self, params: Dict[str, Any], duck_db_conn: duckdb.DuckDBPyConnecti # Register Postgres connection self.duck_db_conn.execute(f"ATTACH '{attach_string}' AS mypostgresdb (TYPE postgres);") - print(f"Successfully connected to PostgreSQL database: {self.params['database']}") + print(f"Successfully connected to PostgreSQL database: {database}") except Exception as e: print(f"Failed to connect to PostgreSQL: {e}") diff --git a/src/app/App.tsx b/src/app/App.tsx index 14e3432..2b5dbd5 100644 --- a/src/app/App.tsx +++ b/src/app/App.tsx @@ -169,7 +169,7 @@ export const ExportStateButton: React.FC<{}> = ({ }) => { // Fields to exclude from serialization const excludedFields = new Set([ 'models', - 'modelSlots', + 'selectedModelId', 'testedModels', 'dataLoaderConnectParams', 'sessionId', @@ -221,7 +221,19 @@ export interface AppFCProps { // Extract menu components into separate components to prevent full app re-renders const TableMenu: React.FC = () => { const [anchorEl, setAnchorEl] = useState(null); + const [openDialog, setOpenDialog] = useState<'database' | 'extract' | 'paste' | 'upload' | null>(null); + const fileInputRef = React.useRef(null); const open = Boolean(anchorEl); + + const handleOpenDialog = (dialog: 'database' | 'extract' | 'paste' | 'upload') => { + setAnchorEl(null); + if (dialog === 'upload') { + // For file upload, trigger the hidden file input + fileInputRef.current?.click(); + } else { + setOpenDialog(dialog); + } + }; return ( <> @@ -246,40 +258,48 @@ const TableMenu: React.FC = () => { }} aria-labelledby="add-table-button" sx={{ - '& .MuiMenuItem-root': { padding: 0, margin: 0 } , - '& .MuiTypography-root': { fontSize: 14, display: 'flex', alignItems: 'center', textTransform: 'none',gap: 1 } + '& .MuiMenuItem-root': { padding: '4px 8px' }, + '& .MuiTypography-root': { fontSize: 14, display: 'flex', alignItems: 'center', textTransform: 'none', gap: 1 } }} > - {}}> - - connect to database - - } /> + handleOpenDialog('database')}> + + connect to database + - {}}> - + handleOpenDialog('extract')}> + extract data (image/messy text) - }/> +
- { - e.preventDefault(); - e.stopPropagation(); - }}> - - paste data (csv/tsv) - - } disabled={false} /> + handleOpenDialog('paste')}> + + paste data (csv/tsv) + - {}} > - - upload data file (csv/tsv/json) - - } disabled={false} /> + handleOpenDialog('upload')}> + + upload data file (csv/tsv/json) + + + {/* Dialogs rendered outside the Menu to avoid keyboard event issues */} + setOpenDialog(null)} + /> + setOpenDialog(null)} + /> + setOpenDialog(null)} + /> + ); }; diff --git a/src/views/DBTableManager.tsx b/src/views/DBTableManager.tsx index ec6dbf4..9bb97eb 100644 --- a/src/views/DBTableManager.tsx +++ b/src/views/DBTableManager.tsx @@ -222,11 +222,18 @@ export class TableStatisticsView extends React.Component void, + // Controlled mode props + open?: boolean, + onClose?: () => void }> = function DBTableSelectionDialog({ buttonElement, sx, + onOpen, + open: controlledOpen, + onClose, }) { const theme = useTheme(); @@ -236,7 +243,14 @@ export const DBTableSelectionDialog: React.FC<{ const tables = useSelector((state: DataFormulatorState) => state.tables); const serverConfig = useSelector((state: DataFormulatorState) => state.serverConfig); - const [tableDialogOpen, setTableDialogOpen] = useState(false); + const [internalOpen, setInternalOpen] = useState(false); + + // Support both controlled and uncontrolled modes + const isControlled = controlledOpen !== undefined; + const tableDialogOpen = isControlled ? controlledOpen : internalOpen; + const setTableDialogOpen = isControlled + ? (open: boolean) => { if (!open && onClose) onClose(); } + : setInternalOpen; const [tableAnalysisMap, setTableAnalysisMap] = useState>({}); // maps data loader type to list of param defs @@ -939,31 +953,34 @@ export const DBTableSelectionDialog: React.FC<{ return ( <> - - Install Data Formulator locally to use database.
- Link: e.stopPropagation()} - > - https://github.com/microsoft/data-formulator - - - ) : ""} - placement="top" - > - - - -
+ {buttonElement && ( + + Install Data Formulator locally to use database.
+ Link: e.stopPropagation()} + > + https://github.com/microsoft/data-formulator + + + ) : ""} + placement="top" + > + + + +
+ )} {setTableDialogOpen(false)}} @@ -1028,27 +1045,6 @@ export const DataLoaderForm: React.FC<{ let [isConnecting, setIsConnecting] = useState(false); let [mode, setMode] = useState<"view tables" | "query">("view tables"); - // Initialize params with default values if not already set - React.useEffect(() => { - const defaultParams: Record = {}; - let needsUpdate = false; - - for (const paramDef of paramDefs) { - if (params[paramDef.name] === undefined && paramDef.default !== undefined) { - defaultParams[paramDef.name] = String(paramDef.default); - needsUpdate = true; - } else if (params[paramDef.name] !== undefined) { - defaultParams[paramDef.name] = params[paramDef.name]; - } - } - - if (needsUpdate) { - dispatch(dfActions.updateDataLoaderConnectParams({ - dataLoaderType, - params: { ...params, ...defaultParams } - })); - } - }, [dataLoaderType, paramDefs]); const toggleDisplaySamples = (tableName: string) => { setDisplaySamples({...displaySamples, [tableName]: !displaySamples[tableName]}); } @@ -1192,8 +1188,8 @@ export const DataLoaderForm: React.FC<{ required={paramDef.required} key={paramDef.name} label={paramDef.name} - value={params[paramDef.name] ?? paramDef.default ?? ''} - placeholder={paramDef.description} + value={params[paramDef.name] ?? ''} + placeholder={paramDef.default ? `e.g. ${paramDef.default}` : paramDef.description} onChange={(event: any) => { dispatch(dfActions.updateDataLoaderConnectParam({ dataLoaderType, paramName: paramDef.name, diff --git a/src/views/DataLoadingChat.tsx b/src/views/DataLoadingChat.tsx index 7158ee2..a3273a5 100644 --- a/src/views/DataLoadingChat.tsx +++ b/src/views/DataLoadingChat.tsx @@ -267,26 +267,48 @@ export const DataLoadingChat: React.FC = () => { }; export interface DataLoadingChatDialogProps { - buttonElement: any; + buttonElement?: any; disabled?: boolean; + onOpen?: () => void; + // Controlled mode props + open?: boolean; + onClose?: () => void; } -export const DataLoadingChatDialog: React.FC = ({ buttonElement, disabled = false }) => { - const [dialogOpen, setDialogOpen] = useState(false); +export const DataLoadingChatDialog: React.FC = ({ + buttonElement, + disabled = false, + onOpen, + open: controlledOpen, + onClose, +}) => { + const [internalOpen, setInternalOpen] = useState(false); const dispatch = useDispatch(); const dataCleanBlocks = useSelector((state: DataFormulatorState) => state.dataCleanBlocks); + // Support both controlled and uncontrolled modes + const isControlled = controlledOpen !== undefined; + const dialogOpen = isControlled ? controlledOpen : internalOpen; + const setDialogOpen = isControlled + ? (open: boolean) => { if (!open && onClose) onClose(); } + : setInternalOpen; + return ( <> - + {buttonElement && ( + + )} setDialogOpen(false)} diff --git a/src/views/ModelSelectionDialog.tsx b/src/views/ModelSelectionDialog.tsx index d2ef7f7..c6f559b 100644 --- a/src/views/ModelSelectionDialog.tsx +++ b/src/views/ModelSelectionDialog.tsx @@ -168,6 +168,10 @@ export const ModelSelectionButton: React.FC<{}> = ({ }) => { .then((data) => { let status = data["status"] || 'error'; updateModelStatus(model, status, data["message"] || ""); + // Auto-select the first good model if none is currently selected + if (status === 'ok' && !tempSelectedModelId) { + setTempSelectedModelId(model.id); + } }).catch((error) => { updateModelStatus(model, 'error', error.message) }); @@ -180,7 +184,6 @@ export const ModelSelectionButton: React.FC<{}> = ({ }) => { sx={{ '&:last-child td, &:last-child th': { border: 0 }, padding: "6px 6px"}} > - = ({ }) => { - Provider API Key Model @@ -404,11 +406,6 @@ export const ModelSelectionButton: React.FC<{}> = ({ }) => { }} onClick={() => status == 'ok' && setTempSelectedModelId(tempSelectedModelId == model.id ? undefined : model.id)} > - - setTempSelectedModelId(tempSelectedModelId == model.id ? undefined : model.id)} /> - {model.endpoint} @@ -537,14 +534,13 @@ export const ModelSelectionButton: React.FC<{}> = ({ }) => { > Select a model - @@ -580,6 +576,7 @@ export const ModelSelectionButton: React.FC<{}> = ({ }) => { {modelTable} + {!serverConfig.DISABLE_DISPLAY_KEYS && ( diff --git a/src/views/TableSelectionView.tsx b/src/views/TableSelectionView.tsx index 80b8fce..4245025 100644 --- a/src/views/TableSelectionView.tsx +++ b/src/views/TableSelectionView.tsx @@ -312,8 +312,11 @@ export const DatasetSelectionDialog: React.FC<{ buttonElement: any }> = function } export interface TableUploadDialogProps { - buttonElement: any; - disabled: boolean; + buttonElement?: any; + disabled?: boolean; + onOpen?: () => void; + // For external control of file input + fileInputRef?: React.RefObject; } const getUniqueTableName = (baseName: string, existingNames: Set): string => { @@ -326,9 +329,10 @@ const getUniqueTableName = (baseName: string, existingNames: Set): strin return uniqueName; }; -export const TableUploadDialog: React.FC = ({ buttonElement, disabled }) => { +export const TableUploadDialog: React.FC = ({ buttonElement, disabled, onOpen, fileInputRef }) => { const dispatch = useDispatch(); - const inputRef = React.useRef(null); + const internalRef = React.useRef(null); + const inputRef = fileInputRef || internalRef; const existingTables = useSelector((state: DataFormulatorState) => state.tables); const existingNames = new Set(existingTables.map(t => t.id)); const serverConfig = useSelector((state: DataFormulatorState) => state.serverConfig); @@ -432,43 +436,52 @@ export const TableUploadDialog: React.FC = ({ buttonElem inputRef={inputRef} onChange={handleFileUpload} /> - - Install Data Formulator locally to enable file upload.
- Link: e.stopPropagation()} + {buttonElement && ( + + Install Data Formulator locally to enable file upload.
+ Link: e.stopPropagation()} + > + https://github.com/microsoft/data-formulator + +
+ ) : ""} + placement="top" + > + + - - + {buttonElement} + + + + )} ); } export interface TableCopyDialogProps { - buttonElement: any; - disabled: boolean; + buttonElement?: any; + disabled?: boolean; + onOpen?: () => void; + // Controlled mode props + open?: boolean; + onClose?: () => void; } export interface TableURLDialogProps { @@ -549,9 +562,19 @@ export const TableURLDialog: React.FC = ({ buttonElement, d } -export const TableCopyDialogV2: React.FC = ({ buttonElement, disabled }) => { +export const TableCopyDialogV2: React.FC = ({ + buttonElement, + disabled, + onOpen, + open: controlledOpen, + onClose, +}) => { - const [dialogOpen, setDialogOpen] = useState(false); + const [internalOpen, setInternalOpen] = useState(false); + + // Support both controlled and uncontrolled modes + const isControlled = controlledOpen !== undefined; + const dialogOpen = isControlled ? controlledOpen : internalOpen; const [tableContent, setTableContent] = useState(""); const [tableContentType, setTableContentType] = useState<'text' | 'image'>('text'); @@ -641,14 +664,18 @@ export const TableCopyDialogV2: React.FC = ({ buttonElemen const handleCloseDialog = useCallback(() => { - setDialogOpen(false); + if (isControlled) { + onClose?.(); + } else { + setInternalOpen(false); + } // Reset state when closing setTableContent(""); setDisplayContent(""); setIsLargeContent(false); setIsOverSizeLimit(false); setShowFullContent(false); - }, []); + }, [isControlled, onClose]); let dialog = = ({ buttonElemen ; return <> - + {buttonElement && ( + + )} {dialog} ; } From f21698ac2b24e6b553c431eb6f76c3149485931e Mon Sep 17 00:00:00 2001 From: Chenglong Wang Date: Wed, 3 Dec 2025 18:13:46 -0800 Subject: [PATCH 08/37] version bump --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 9a520ab..f06aa27 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "data_formulator" -version = "0.5.0.2" +version = "0.5.0.3" requires-python = ">=3.9" authors = [ From 8b0a52335d92290dbe5f832ebbb1092262d2fb61 Mon Sep 17 00:00:00 2001 From: Chenglong Wang Date: Wed, 3 Dec 2025 18:38:17 -0800 Subject: [PATCH 09/37] fix accessibility issues --- src/app/App.tsx | 80 ++++++++++++++++++++++-------------------- src/views/About.tsx | 11 +++--- src/views/DataView.tsx | 7 +--- 3 files changed, 47 insertions(+), 51 deletions(-) diff --git a/src/app/App.tsx b/src/app/App.tsx index 2b5dbd5..5106a52 100644 --- a/src/app/App.tsx +++ b/src/app/App.tsx @@ -687,14 +687,19 @@ export const AppFC: FC = function AppFC(appProps) { {toolName} - + + {!isAboutPage && ( {focusedTableId !== undefined && = function AppFC(appProps) { href="https://youtu.be/3ndlwt0Wi3c" target="_blank" rel="noopener noreferrer" + aria-label="Watch Video" sx={{ color: 'inherit', '&:hover': { @@ -821,6 +822,7 @@ export const AppFC: FC = function AppFC(appProps) { href="https://github.com/microsoft/data-formulator" target="_blank" rel="noopener noreferrer" + aria-label="View on GitHub" sx={{ color: 'inherit', '&:hover': { @@ -837,6 +839,7 @@ export const AppFC: FC = function AppFC(appProps) { href="https://pypi.org/project/data-formulator/" target="_blank" rel="noopener noreferrer" + aria-label="Pip Install" sx={{ color: 'inherit', '&:hover': { @@ -844,7 +847,7 @@ export const AppFC: FC = function AppFC(appProps) { } }} > - + @@ -853,6 +856,7 @@ export const AppFC: FC = function AppFC(appProps) { href="https://discord.gg/mYCZMQKYZb" target="_blank" rel="noopener noreferrer" + aria-label="Join Discord" sx={{ color: 'inherit', '&:hover': { diff --git a/src/views/About.tsx b/src/views/About.tsx index d5000a9..3b9bae3 100644 --- a/src/views/About.tsx +++ b/src/views/About.tsx @@ -453,16 +453,13 @@ export const About: FC<{}> = function About({ }) { How does Data Formulator handle your data? -
    -
  • 📦 Data Storage: Uploaded data (csv, xlsx, json, clipboard, messy data etc.) is stored in browser's local storage only
  • -
  • ⚙️ Data Processing: Local installation runs Python on your machine; online demo sends the data to server for data transformations (but not stored)
  • -
  • 🗄️ Database: Only available for locally installed Data Formulator (a DuckDB database file is created in temp directory to store data); not available in online demo
  • -
  • 🤖 LLM Endpoints: Small data samples are sent to LLM endpoints along with the prompt. Use your trusted model provider if working with private data.
  • -
+
  • Data Storage: Uploaded data (csv, xlsx, json, clipboard, messy data etc.) is stored in browser's local storage only
  • +
  • Data Processing: Local installation runs Python on your machine; online demo sends the data to server for data transformations (but not stored)
  • +
  • Database: Only available for locally installed Data Formulator (a DuckDB database file is created in temp directory to store data); not available in online demo
  • +
  • LLM Endpoints: Small data samples are sent to LLM endpoints along with the prompt. Use your trusted model provider if working with private data.
  • Research Prototype from Microsoft Research diff --git a/src/views/DataView.tsx b/src/views/DataView.tsx index cf7e3af..3d42e45 100644 --- a/src/views/DataView.tsx +++ b/src/views/DataView.tsx @@ -148,7 +148,7 @@ export const FreeDataViewFC: FC = function DataView() { // Get the table ID from the focused chart const focusedChart = allCharts.find(c => c.id === focusedChartId); - const chartTableId = focusedChart?.tableRef; + const chartTableId = focusedChart?.tableRef || focusedTableId; const predecessorTables = getPredecessors(chartTableId); @@ -162,11 +162,6 @@ export const FreeDataViewFC: FC = function DataView() { - - {predecessorTables.length > 0 && (predecessorTables[predecessorTables.length - 1].derive ? - : - )} - {predecessorTables.map(t => genTableLink(t))} From 26824a48a164f8b1622e99589fce0e01b5243c55 Mon Sep 17 00:00:00 2001 From: Chenglong Wang Date: Thu, 4 Dec 2025 17:41:02 -0800 Subject: [PATCH 10/37] small fixes --- .../data_formulator/workflows/exploration_flow.py | 1 - src/views/About.tsx | 5 +++++ src/views/ModelSelectionDialog.tsx | 15 ++++----------- 3 files changed, 9 insertions(+), 12 deletions(-) diff --git a/py-src/data_formulator/workflows/exploration_flow.py b/py-src/data_formulator/workflows/exploration_flow.py index de3554b..dc241a8 100644 --- a/py-src/data_formulator/workflows/exploration_flow.py +++ b/py-src/data_formulator/workflows/exploration_flow.py @@ -8,7 +8,6 @@ from typing import Dict, List, Any, Optional, Tuple, Generator from data_formulator.agents.agent_exploration import ExplorationAgent -from data_formulator.agents.agent_interactive_explore import InteractiveExploreAgent from data_formulator.agents.agent_py_data_rec import PythonDataRecAgent from data_formulator.agents.agent_sql_data_rec import SQLDataRecAgent from data_formulator.agents.client_utils import Client diff --git a/src/views/About.tsx b/src/views/About.tsx index 3b9bae3..8277733 100644 --- a/src/views/About.tsx +++ b/src/views/About.tsx @@ -188,6 +188,11 @@ export const About: FC<{}> = function About({ }) { overflowY: "auto", width: '100%', height: '100%', + background: ` + linear-gradient(90deg, ${alpha(theme.palette.text.secondary, 0.01)} 1px, transparent 1px), + linear-gradient(0deg, ${alpha(theme.palette.text.secondary, 0.01)} 1px, transparent 1px) + `, + backgroundSize: '16px 16px', }}> {/* Header with logo and title */} diff --git a/src/views/ModelSelectionDialog.tsx b/src/views/ModelSelectionDialog.tsx index c6f559b..2c97d27 100644 --- a/src/views/ModelSelectionDialog.tsx +++ b/src/views/ModelSelectionDialog.tsx @@ -253,7 +253,7 @@ export const ModelSelectionButton: React.FC<{}> = ({ }) => { fullWidth value={newModel} onChange={(event) => { setNewModel(event.target.value); }} - placeholder="model name" + placeholder="e.g., gpt-5.1" error={newEndpoint != "" && !newModel} slotProps={{ input: { @@ -265,7 +265,7 @@ export const ModelSelectionButton: React.FC<{}> = ({ }) => {
    = ({ }) => { }} value={newApiVersion} onChange={(event: any) => { setNewApiVersion(event.target.value); }} autoComplete='off' - placeholder="api_version" + placeholder="optional" /> @@ -565,14 +565,7 @@ export const ModelSelectionButton: React.FC<{}> = ({ }) => { fontSize: '0.7rem' }}>openai provider for OpenAI-compatible APIs. - - • Example configuration: {`{ "provider": "openai", "api_key": "sk-...", "model": "gpt-5.1", "api_base": "", "api_version": "" }`} - + {modelTable} From 0b0931a9a52215c7f8af1f888c85f67f1df65166 Mon Sep 17 00:00:00 2001 From: Chenglong Wang Date: Thu, 4 Dec 2025 17:45:01 -0800 Subject: [PATCH 11/37] Update py-src/data_formulator/data_loader/mysql_data_loader.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- .../data_loader/mysql_data_loader.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/py-src/data_formulator/data_loader/mysql_data_loader.py b/py-src/data_formulator/data_loader/mysql_data_loader.py index 9d68015..32c3a45 100644 --- a/py-src/data_formulator/data_loader/mysql_data_loader.py +++ b/py-src/data_formulator/data_loader/mysql_data_loader.py @@ -210,14 +210,23 @@ def ingest_data(self, table_name: str, name_as: Optional[str] = None, size: int name_as = sanitize_table_name(name_as) - # Parse table name to handle schema.table format + # Validate and sanitize table name components + sanitized_size = None + try: + sanitized_size = int(size) + if sanitized_size <= 0: + raise ValueError("Size must be a positive integer.") + except Exception: + raise ValueError("Size parameter must be a positive integer.") + if '.' in table_name: parts = table_name.split('.') - schema = parts[0] - tbl = parts[1] - query = f"SELECT * FROM `{schema}`.`{tbl}` LIMIT {size}" + schema = sanitize_table_name(parts[0]) + tbl = sanitize_table_name(parts[1]) + query = f"SELECT * FROM `{schema}`.`{tbl}` LIMIT {sanitized_size}" else: - query = f"SELECT * FROM `{table_name}` LIMIT {size}" + sanitized_table_name = sanitize_table_name(table_name) + query = f"SELECT * FROM `{sanitized_table_name}` LIMIT {sanitized_size}" # Fetch data from MySQL df = self._execute_query(query) From c345c704b117ec529044f173e59e6071d3505e40 Mon Sep 17 00:00:00 2001 From: Chenglong Wang Date: Thu, 4 Dec 2025 17:45:55 -0800 Subject: [PATCH 12/37] Update src/views/VisualizationView.tsx Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/views/VisualizationView.tsx | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/views/VisualizationView.tsx b/src/views/VisualizationView.tsx index a9371a1..efad3a0 100644 --- a/src/views/VisualizationView.tsx +++ b/src/views/VisualizationView.tsx @@ -313,6 +313,10 @@ const VegaChartRenderer: FC<{ true ); + // Use "canvas" renderer for Vega charts instead of "svg". + // Reason: Canvas provides better performance for large datasets and complex charts, + // and avoids some SVG rendering issues in certain browsers. Note that this may affect + // accessibility and text selection. If SVG features are needed, consider reverting. embed('#' + elementId, { ...assembledChart }, { actions: true, renderer: "canvas" }) .then(function (result) { // any post-processing of the canvas can go here From f280be6965c3a5f9b0958a9237ac62ceb46e321d Mon Sep 17 00:00:00 2001 From: Chenglong Wang Date: Thu, 4 Dec 2025 17:46:49 -0800 Subject: [PATCH 13/37] Update py-src/data_formulator/data_loader/mysql_data_loader.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- .../data_loader/mysql_data_loader.py | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/py-src/data_formulator/data_loader/mysql_data_loader.py b/py-src/data_formulator/data_loader/mysql_data_loader.py index 32c3a45..a64d0c0 100644 --- a/py-src/data_formulator/data_loader/mysql_data_loader.py +++ b/py-src/data_formulator/data_loader/mysql_data_loader.py @@ -261,10 +261,18 @@ def ingest_data_from_query(self, query: str, name_as: str) -> pd.DataFrame: self.ingest_df_to_duckdb(df, sanitize_table_name(name_as)) return df - def __del__(self): - """Clean up MySQL connection when the loader is destroyed.""" - try: - if hasattr(self, 'mysql_conn') and self.mysql_conn: + def close(self): + """Explicitly close the MySQL connection.""" + if hasattr(self, 'mysql_conn') and self.mysql_conn: + try: self.mysql_conn.close() - except Exception: - pass \ No newline at end of file + except Exception as e: + logger.warning(f"Error closing MySQL connection: {e}") + + def __enter__(self): + """Support context manager entry.""" + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + """Support context manager exit and cleanup.""" + self.close() \ No newline at end of file From 72b65e71adb75ef2b42de5faef726c28414d6f2d Mon Sep 17 00:00:00 2001 From: Chenglong Wang Date: Thu, 4 Dec 2025 18:53:22 -0800 Subject: [PATCH 14/37] Update py-src/data_formulator/data_loader/mysql_data_loader.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- .../data_loader/mysql_data_loader.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/py-src/data_formulator/data_loader/mysql_data_loader.py b/py-src/data_formulator/data_loader/mysql_data_loader.py index a64d0c0..6188a7a 100644 --- a/py-src/data_formulator/data_loader/mysql_data_loader.py +++ b/py-src/data_formulator/data_loader/mysql_data_loader.py @@ -165,25 +165,25 @@ def list_tables(self, table_filter: str = None) -> List[Dict[str, Any]]: try: # Get column information from MySQL - columns_query = f""" - SELECT COLUMN_NAME, DATA_TYPE - FROM information_schema.columns - WHERE TABLE_SCHEMA = '{schema}' AND TABLE_NAME = '{table_name}' - ORDER BY ORDINAL_POSITION - """ - columns_df = self._execute_query(columns_query) + columns_query = ( + "SELECT COLUMN_NAME, DATA_TYPE " + "FROM information_schema.columns " + "WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s " + "ORDER BY ORDINAL_POSITION" + ) + columns_df = self._execute_query(columns_query, (schema, table_name)) columns = [{ 'name': col_row['COLUMN_NAME'], 'type': col_row['DATA_TYPE'] } for _, col_row in columns_df.iterrows()] # Get sample data - sample_query = f"SELECT * FROM `{schema}`.`{table_name}` LIMIT 10" + sample_query = "SELECT * FROM `{}`.`{}` LIMIT 10".format(schema, table_name) sample_df = self._execute_query(sample_query) sample_rows = json.loads(sample_df.to_json(orient="records", date_format='iso')) # Get row count - count_query = f"SELECT COUNT(*) as cnt FROM `{schema}`.`{table_name}`" + count_query = "SELECT COUNT(*) as cnt FROM `{}`.`{}`".format(schema, table_name) count_df = self._execute_query(count_query) row_count = int(count_df['cnt'].iloc[0]) if not count_df.empty else 0 From 350c138dc68c2baf58265404ea33ada3e5d0e148 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 5 Dec 2025 02:54:20 +0000 Subject: [PATCH 15/37] Initial plan From f4b2d7e4f8c3742220977b499eb79d4a76e2179d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 5 Dec 2025 02:54:38 +0000 Subject: [PATCH 16/37] Initial plan From 95dc5dac87144ff20207d49e9b65a7b1a58fcf80 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 5 Dec 2025 02:57:32 +0000 Subject: [PATCH 17/37] Fix password sanitization in PostgreSQL and add __del__ to MySQL data loader Co-authored-by: Chenglong-MS <93549116+Chenglong-MS@users.noreply.github.com> --- py-src/data_formulator/data_loader/mysql_data_loader.py | 4 ++++ .../data_loader/postgresql_data_loader.py | 9 +++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/py-src/data_formulator/data_loader/mysql_data_loader.py b/py-src/data_formulator/data_loader/mysql_data_loader.py index 6188a7a..8f7b3a9 100644 --- a/py-src/data_formulator/data_loader/mysql_data_loader.py +++ b/py-src/data_formulator/data_loader/mysql_data_loader.py @@ -275,4 +275,8 @@ def __enter__(self): def __exit__(self, exc_type, exc_val, exc_tb): """Support context manager exit and cleanup.""" + self.close() + + def __del__(self): + """Clean up MySQL connection when the loader is destroyed.""" self.close() \ No newline at end of file diff --git a/py-src/data_formulator/data_loader/postgresql_data_loader.py b/py-src/data_formulator/data_loader/postgresql_data_loader.py index ba95d1d..655616f 100644 --- a/py-src/data_formulator/data_loader/postgresql_data_loader.py +++ b/py-src/data_formulator/data_loader/postgresql_data_loader.py @@ -44,12 +44,16 @@ def __init__(self, params: Dict[str, Any], duck_db_conn: duckdb.DuckDBPyConnecti if not database: raise ValueError("PostgreSQL database is required") + # Create a sanitized version for logging (excludes password) + sanitized_attach_string = f"host={host} port={port} user={user} dbname={database}" + try: # Install and load the Postgres extension self.duck_db_conn.install_extension("postgres") self.duck_db_conn.load_extension("postgres") # Prepare the connection string for Postgres + # Note: attach_string contains sensitive credentials - do not log it password_part = f" password={password}" if password else "" attach_string = f"host={host} port={port} user={user}{password_part} dbname={database}" @@ -64,8 +68,9 @@ def __init__(self, params: Dict[str, Any], duck_db_conn: duckdb.DuckDBPyConnecti print(f"Successfully connected to PostgreSQL database: {database}") except Exception as e: - print(f"Failed to connect to PostgreSQL: {e}") - raise + # Log error with sanitized connection string to avoid exposing password + print(f"Failed to connect to PostgreSQL ({sanitized_attach_string}): Connection failed") + raise ValueError(f"Failed to connect to PostgreSQL database '{database}' on host '{host}': {type(e).__name__}") def list_tables(self): try: From 0436063b2b65ef45997255671e0b00c27f40d85d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 5 Dec 2025 02:57:39 +0000 Subject: [PATCH 18/37] Add __del__ method to MySQLDataLoader for connection cleanup Co-authored-by: Chenglong-MS <93549116+Chenglong-MS@users.noreply.github.com> --- .../data_formulator/data_loader/mysql_data_loader.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/py-src/data_formulator/data_loader/mysql_data_loader.py b/py-src/data_formulator/data_loader/mysql_data_loader.py index 6188a7a..5dd8e10 100644 --- a/py-src/data_formulator/data_loader/mysql_data_loader.py +++ b/py-src/data_formulator/data_loader/mysql_data_loader.py @@ -275,4 +275,12 @@ def __enter__(self): def __exit__(self, exc_type, exc_val, exc_tb): """Support context manager exit and cleanup.""" - self.close() \ No newline at end of file + self.close() + + def __del__(self): + """Clean up MySQL connection when the loader is destroyed.""" + try: + self.close() + except Exception: + # Silently ignore exceptions during garbage collection/interpreter shutdown + pass \ No newline at end of file From 3017e363fb6a5a746feab25bfc65f7bd184ca06d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 5 Dec 2025 02:58:32 +0000 Subject: [PATCH 19/37] Improve error messages and add exception handling to __del__ method Co-authored-by: Chenglong-MS <93549116+Chenglong-MS@users.noreply.github.com> --- py-src/data_formulator/data_loader/mysql_data_loader.py | 6 +++++- .../data_formulator/data_loader/postgresql_data_loader.py | 5 +++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/py-src/data_formulator/data_loader/mysql_data_loader.py b/py-src/data_formulator/data_loader/mysql_data_loader.py index 8f7b3a9..e9a202b 100644 --- a/py-src/data_formulator/data_loader/mysql_data_loader.py +++ b/py-src/data_formulator/data_loader/mysql_data_loader.py @@ -279,4 +279,8 @@ def __exit__(self, exc_type, exc_val, exc_tb): def __del__(self): """Clean up MySQL connection when the loader is destroyed.""" - self.close() \ No newline at end of file + try: + self.close() + except Exception: + # Ignore errors during destruction to prevent exceptions in garbage collection + pass \ No newline at end of file diff --git a/py-src/data_formulator/data_loader/postgresql_data_loader.py b/py-src/data_formulator/data_loader/postgresql_data_loader.py index 655616f..3aa33e1 100644 --- a/py-src/data_formulator/data_loader/postgresql_data_loader.py +++ b/py-src/data_formulator/data_loader/postgresql_data_loader.py @@ -69,8 +69,9 @@ def __init__(self, params: Dict[str, Any], duck_db_conn: duckdb.DuckDBPyConnecti except Exception as e: # Log error with sanitized connection string to avoid exposing password - print(f"Failed to connect to PostgreSQL ({sanitized_attach_string}): Connection failed") - raise ValueError(f"Failed to connect to PostgreSQL database '{database}' on host '{host}': {type(e).__name__}") + error_type = type(e).__name__ + print(f"Failed to connect to PostgreSQL ({sanitized_attach_string}): {error_type}") + raise ValueError(f"Failed to connect to PostgreSQL database '{database}' on host '{host}': {error_type}") def list_tables(self): try: From 5912ff90a192f64331d3fbd7ab7ab1b5e48c206e Mon Sep 17 00:00:00 2001 From: Chenglong Wang Date: Thu, 4 Dec 2025 21:43:02 -0800 Subject: [PATCH 20/37] fix issues that copilot introduced, bruh --- .../data_loader/mysql_data_loader.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/py-src/data_formulator/data_loader/mysql_data_loader.py b/py-src/data_formulator/data_loader/mysql_data_loader.py index e9a202b..7910484 100644 --- a/py-src/data_formulator/data_loader/mysql_data_loader.py +++ b/py-src/data_formulator/data_loader/mysql_data_loader.py @@ -94,11 +94,16 @@ def __init__(self, params: Dict[str, Any], duck_db_conn: duckdb.DuckDBPyConnecti logger.error(f"Failed to connect to MySQL: {e}") raise - def _execute_query(self, query: str) -> pd.DataFrame: - """Execute a query using native MySQL connection and return a DataFrame.""" + def _execute_query(self, query: str, params: tuple = None) -> pd.DataFrame: + """Execute a query using native MySQL connection and return a DataFrame. + + Args: + query: SQL query string. Use %s for parameterized queries. + params: Optional tuple of parameters for parameterized queries. + """ try: with self.mysql_conn.cursor() as cursor: - cursor.execute(query) + cursor.execute(query, params) rows = cursor.fetchall() if rows: return pd.DataFrame(rows) @@ -139,14 +144,15 @@ def _reconnect_if_needed(self): ) def list_tables(self, table_filter: str = None) -> List[Dict[str, Any]]: - # Get list of tables from MySQL directly + # Get list of tables from the connected database + # Filter by the specific database we're connected to for better performance tables_query = """ SELECT TABLE_SCHEMA, TABLE_NAME FROM information_schema.tables - WHERE table_schema NOT IN ('information_schema', 'mysql', 'performance_schema', 'sys') + WHERE TABLE_SCHEMA = %s AND TABLE_TYPE = 'BASE TABLE' """ - tables_df = self._execute_query(tables_query) + tables_df = self._execute_query(tables_query, (self.database,)) if tables_df.empty: return [] From 64d69e605273e95243df1eb31de8b72f83051d8d Mon Sep 17 00:00:00 2001 From: KaranPradhan266 Date: Fri, 5 Dec 2025 01:38:42 -0800 Subject: [PATCH 21/37] Expand chart type supports: Pie Chart --- src/components/ChartTemplates.tsx | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/components/ChartTemplates.tsx b/src/components/ChartTemplates.tsx index 0f94d6a..e5e2420 100644 --- a/src/components/ChartTemplates.tsx +++ b/src/components/ChartTemplates.tsx @@ -3,6 +3,8 @@ import { ChartTemplate } from "./ComponentType"; import InsightsIcon from '@mui/icons-material/Insights'; +import PublicIcon from '@mui/icons-material/Public'; +import PieChartOutlineIcon from '@mui/icons-material/PieChartOutline'; import React from "react"; // Import all chart icons statically so they are included in the build @@ -322,6 +324,24 @@ const barCharts: ChartTemplate[] = [ } ] +const pieCharts: ChartTemplate[] = [ + { + "chart": "Pie Chart", + "icon": , + "template": { + "mark": "arc", + "encoding": { } + }, + "channels": ["theta", "color", "column", "row"], + "paths": { + "theta": ["encoding", "theta"], + "color": ["encoding", "color"], + "column": ["encoding", "column"], + "row": ["encoding", "row"] + } + } +] + let lineCharts = [ { "chart": "Line Chart", @@ -422,6 +442,7 @@ export const CHART_TEMPLATES : {[key: string] : ChartTemplate[]} = { "table": tablePlots, "scatter": scatterPlots, "bar": barCharts, + "pie": pieCharts, "line": lineCharts, "custom": customCharts, } \ No newline at end of file From bf477f45008c4bac95a39036d3e3ee261f2f5512 Mon Sep 17 00:00:00 2001 From: KaranPradhan266 Date: Fri, 5 Dec 2025 03:04:38 -0800 Subject: [PATCH 22/37] Expand chart type supports: US Map --- src/components/ChartTemplates.tsx | 50 +++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/src/components/ChartTemplates.tsx b/src/components/ChartTemplates.tsx index e5e2420..1d00bbb 100644 --- a/src/components/ChartTemplates.tsx +++ b/src/components/ChartTemplates.tsx @@ -324,6 +324,55 @@ const barCharts: ChartTemplate[] = [ } ] +const mapCharts: ChartTemplate[] = [ + { + "chart": "US Map with Points", + "icon": , + "template": { + "width": 500, + "height": 300, + "layer": [ + { + "data": { + "url": "https://vega.github.io/vega-lite/data/us-10m.json", + "format": { + "type": "topojson", + "feature": "states" + } + }, + "projection": { + "type": "albersUsa" + }, + "mark": { + "type": "geoshape", + "fill": "lightgray", + "stroke": "white" + } + }, + { + "projection": { + "type": "albersUsa" + }, + "mark": "circle", + "encoding": { + "longitude": { }, + "latitude": { }, + "size": {}, + "color": {} + } + } + ] + }, + "channels": ["longitude", "latitude", "color", "size"], + "paths": { + "longitude": ["layer", 1, "encoding", "longitude"], + "latitude": ["layer", 1, "encoding", "latitude"], + "color": ["layer", 1, "encoding", "color"], + "size": ["layer", 1, "encoding", "size"] + } + } +] + const pieCharts: ChartTemplate[] = [ { "chart": "Pie Chart", @@ -442,6 +491,7 @@ export const CHART_TEMPLATES : {[key: string] : ChartTemplate[]} = { "table": tablePlots, "scatter": scatterPlots, "bar": barCharts, + "map": mapCharts, "pie": pieCharts, "line": lineCharts, "custom": customCharts, From 5c3e801d6b8bebb775bfe27c92df5dd349096810 Mon Sep 17 00:00:00 2001 From: Chenglong Wang Date: Fri, 5 Dec 2025 16:54:12 -0800 Subject: [PATCH 23/37] simplify --- .../agents/agent_interactive_explore.py | 34 +-- src/app/App.tsx | 19 ++ src/views/ChartRecBox.tsx | 206 +++++++----------- src/views/VisualizationView.tsx | 4 +- 4 files changed, 113 insertions(+), 150 deletions(-) diff --git a/py-src/data_formulator/agents/agent_interactive_explore.py b/py-src/data_formulator/agents/agent_interactive_explore.py index 28fc0de..e1c4f2b 100644 --- a/py-src/data_formulator/agents/agent_interactive_explore.py +++ b/py-src/data_formulator/agents/agent_interactive_explore.py @@ -67,54 +67,42 @@ * when the exploration context is provided, make your suggestion based on the context as well as the original dataset; otherwise leverage the original dataset to suggest questions. Guidelines for question suggestions: -1. Suggest a list of question_groups of interesting analytical questions that are not obvious that can uncover nontrivial insights, including both breadth and depth questions. - +1. Suggest a list of question_groups of interesting analytical questions that are not obvious that can uncover nontrivial insights. 2. Use a diverse language style to display the questions (can be questions, statements etc) 3. If there are multiple datasets in a thread, consider relationships between them 4. CONCISENESS: the questions should be concise and to the point 5. QUESTION GROUP GENERATION: - different questions groups should cover different aspects of the data analysis for user to choose from. - - each question_group should include both 'breadth_questions' and 'depth_questions': - - breadth_questions: a group of questions that are all relatively simple that helps the user understand the data in a broad sense. - - depth_questions: a sequence of questions that build on top of each other to answer a specific aspect of the user's goal. - - you have a budget of generating 4 questions in total (or as directed by the user). - - allocate 2-3 questions to 'breadth_questions' and 2-3 questions to 'depth_questions' based on the user's goal and the data. - - each question group should slightly lean towards 'breadth' or 'depth' exploration, but not too much. - - the more focused area can have more questions than the other area. + - each question_group is a sequence of 'questions' that builds on top of each other to answer the user's goal. - each question group should have a difficulty level (easy / medium / hard), - simple questions should be short -- single sentence exploratory questions - medium questions can be 1-2 sentences exploratory questions - hard questions should introduce some new analysis concept but still make it concise - if suitable, include a group of questions that are related to statistical analysis: forecasting, regression, or clustering. 6. QUESTIONS WITHIN A QUESTION GROUP: - - all questions should be a new question based on the thread of exploration the user provided, do not repeat questions that have already been explored in the thread + - raise new questions that are related to the user's goal, do not repeat questions that have already been explored in the context provided to you. - if the user provides a start question, suggested questions should be related to the start question. - - when suggesting 'breadth_questions' in a question_group, they should be a group of questions: - - they are related to the user's goal, they should each explore a different aspect of the user's goal in parallel. - - questions should consider different fields, metrics and statistical methods. - - each question within the group should be distinct from each other that they will lead to different insights and visualizations - - when suggesting 'depth_questions' in a question_group, they should be a sequence of questions: - - start of the question should provide an overview of the data in the direction going to be explored, and it will be refined in the subsequent questions. - - they progressively dive deeper into the data, building on top of the previous question. - - each question should be related to the previous question, introducing refined analysis (e.g., updated computation, filtering, different grouping, etc.) + - the questions should progressively dive deeper into the data, building on top of the previous question. + - start of the question should provide an overview of the data in the direction going to be explored. + - followup questions should refine the previous question, introducing refined analysis to deep dive into the data (e.g., updated computation, filtering, different grouping, etc.) + - don't jump too far from the previous question so that readers can understand the flow of the questions. - every question should be answerable with a visualization. 7. FORMATTING: - - include "breadth_questions" and "depth_questions" in the question group: - - each question group should have 2-3 questions (or as directed by the user). + - include "questions" in the question group: + - each question group should have 2-4 questions (or as directed by the user). - For each question group, include a 'goal' that summarizes the goal of the question group. - The goal should all be a short single sentence (<12 words). - Meaning of the 'goal' should be clear that the user won't misunderstand the actual question descibed in 'text'. - It should capture the key computation and exploration direction of the question (do not omit any information that may lead to ambiguity), but also keep it concise. - include the **bold** keywords for the attributes / metrics that are important to the question, especially when the goal mentions fields / metrics in the original dataset (don't have to be exact match) - include 'difficulty' to indicate the difficulty of the question, it should be one of 'easy', 'medium', 'hard' - - a 'focus' field to indicate whether the overall question group leans more on 'breadth' or 'depth' exploration. Output should be a list of json objects in the following format, each line should be a json object representing a question group, starting with 'data: ': Format: -data: {"breadth_questions": [...], "depth_questions": [...], "goal": ..., "difficulty": ..., "focus": "..."} -data: {"breadth_questions": [...], "depth_questions": [...], "goal": ..., "difficulty": ..., "focus": "..."} +data: {"questions": [...], "goal": ..., "difficulty": ...} +data: {"questions": [...], "goal": ..., "difficulty": ...} ... // more question groups ''' diff --git a/src/app/App.tsx b/src/app/App.tsx index 5106a52..4effca9 100644 --- a/src/app/App.tsx +++ b/src/app/App.tsx @@ -648,6 +648,7 @@ export const AppFC: FC = function AppFC(appProps) { "sans-serif" ].join(",") }, + // Default Material UI palette palette: { primary: { main: blue[700] @@ -665,6 +666,24 @@ export const AppFC: FC = function AppFC(appProps) { main: '#bf5600', // New accessible color, original (#ed6c02) has insufficient color contrast of 3.11 }, }, + // Microsoft Fluent UI palette (alternative option) + // palette: { + // primary: { + // main: '#0078d4' // Fluent UI themePrimary (Microsoft Blue) + // }, + // secondary: { + // main: '#8764b8' // Fluent UI purple + // }, + // derived: { + // main: '#ffb900', // Fluent UI yellow/gold + // }, + // custom: { + // main: '#d83b01', // Fluent UI orange (Office orange) + // }, + // warning: { + // main: '#a4262c', // Fluent UI red (accessible) + // }, + // }, }); // Check if we're on the about page diff --git a/src/views/ChartRecBox.tsx b/src/views/ChartRecBox.tsx index e36db7b..347d437 100644 --- a/src/views/ChartRecBox.tsx +++ b/src/views/ChartRecBox.tsx @@ -220,13 +220,11 @@ export const IdeaChip: FC<{ height: 'auto', borderRadius: 2, border: `1px solid ${alpha(styleColor, 0.2)}`, - boxShadow: '0 1px 2px rgba(0,0,0,0.05)', - transition: 'all 0.2s ease-in-out', + transition: 'all 0.1s ease-in-out', backgroundColor: alpha(theme.palette.background.paper, 0.9), cursor: disabled ? 'default' : 'pointer', opacity: disabled ? 0.6 : 1, '&:hover': disabled ? 'none' : { - boxShadow: '0 2px 6px rgba(0,0,0,0.1)', borderColor: alpha(styleColor, 0.7), transform: 'translateY(-1px)', }, @@ -243,7 +241,7 @@ export const IdeaChip: FC<{ export const AgentIdeaChip: FC<{ mini?: boolean, - idea: {breadth_questions: string[], depth_questions: string[], goal: string, difficulty: 'easy' | 'medium' | 'hard', focus: 'breadth' | 'depth'} + idea: {questions: string[], goal: string, difficulty: 'easy' | 'medium' | 'hard'} theme: Theme, onClick: () => void, sx?: SxProps, @@ -288,13 +286,11 @@ export const AgentIdeaChip: FC<{ height: 'auto', borderRadius: 2, border: `1px solid ${alpha(styleColor, 0.2)}`, - boxShadow: '0 1px 2px rgba(0,0,0,0.05)', - transition: 'all 0.2s ease-in-out', + transition: 'all 0.1s ease-in-out', backgroundColor: alpha(theme.palette.background.paper, 0.9), cursor: disabled ? 'default' : 'pointer', opacity: disabled ? 0.6 : 1, '&:hover': disabled ? 'none' : { - boxShadow: '0 2px 6px rgba(0,0,0,0.1)', borderColor: alpha(styleColor, 0.7), transform: 'translateY(-1px)', }, @@ -302,8 +298,7 @@ export const AgentIdeaChip: FC<{ }} onClick={disabled ? undefined : onClick} > - {idea.focus === 'breadth' && } - {idea.focus === 'depth' && } + {ideaTextComponent} @@ -348,9 +343,9 @@ export const ChartRecBox: FC = function ({ tableId, placeHolde const [ideas, setIdeas] = useState<{text: string, goal: string, difficulty: 'easy' | 'medium' | 'hard'}[]>([]); const [agentIdeas, setAgentIdeas] = useState<{ - breadth_questions: string[], depth_questions: string[], goal: string, - difficulty: 'easy' | 'medium' | 'hard', - focus: 'breadth' | 'depth' }[]>([]); + questions: string[], goal: string, + difficulty: 'easy' | 'medium' | 'hard' }[]>([]); + const [thinkingBuffer, setThinkingBuffer] = useState(""); let thinkingBufferEffect = ; @@ -465,29 +460,18 @@ export const ChartRecBox: FC = function ({ tableId, placeHolde if (mode === "agent") { let questions = dataBlocks.map(block => ({ - breadth_questions: block.breadth_questions, - depth_questions: block.depth_questions, + questions: block.questions, goal: block.goal, - difficulty: block.difficulty, - focus: block.focus + difficulty: block.difficulty })); const newIdeas = questions.map((question: any) => ({ - breadth_questions: question.breadth_questions, - depth_questions: question.depth_questions, + questions: question.questions, goal: question.goal, - difficulty: question.difficulty, - focus: question.focus + difficulty: question.difficulty })); if (runNextIdea) { runNextIdea = false; - for (let i = 1; i < newIdeas[0].breadth_questions.length; i++) { - setTimeout(() => { - deriveDataFromNL(newIdeas[0].breadth_questions[i]); - }, i + 1 * 1000); - } - setTimeout(() => { - exploreDataFromNL(newIdeas[0].depth_questions); - }, newIdeas[0].breadth_questions.length + 1 * 1000); + exploreDataFromNL(newIdeas[0].questions); } setAgentIdeas(newIdeas); } else { @@ -1189,8 +1173,9 @@ export const ChartRecBox: FC = function ({ tableId, placeHolde const showTableSelector = availableTables.length > 1 && currentTable; + return ( - + = function ({ tableId, placeHolde {isFormulating && ( = function ({ tableId, placeHolde = function ({ tableId, placeHolde }, "& .MuiInput-input": { fontSize: '14px' }, "& .MuiInput-underline:before": { + borderBottom: 'none', borderBottomColor: alpha(modeColor, 0.42) }, "& .MuiInput-underline:hover:not(.Mui-disabled):before": { + borderBottom: 'none', + borderBottomColor: modeColor + }, + "& .MuiInput-underline:hover:(.Mui-disabled):before": { + borderBottom: 'none', borderBottomColor: modeColor }, "& .MuiInput-underline:after": { + borderBottom: 'none', borderBottomColor: modeColor } }} @@ -1335,13 +1311,12 @@ export const ChartRecBox: FC = function ({ tableId, placeHolde } }} value={prompt} - label={mode === "agent" ? "Where should the agent go?" : "What do you want to explore?"} + // label={mode === "agent" ? "Where should the agent go?" : "What do you want to explore?"} placeholder={`${getQuestion()}`} fullWidth multiline - variant="standard" maxRows={4} - minRows={1} + minRows={2} /> {} { @@ -1383,80 +1358,61 @@ export const ChartRecBox: FC = function ({ tableId, placeHolde } - {/* Ideas Chips Section */} - {mode === 'interactive' && (ideas.length > 0 || thinkingBuffer) && ( - - {ideas.length > 0 && ( - - ideas - - )} - - {ideas.map((idea, index) => ( - { - focusNextChartRef.current = true; - setPrompt(idea.text); - deriveDataFromNL(idea.text); - }} - disabled={isFormulating} - sx={{ - width: '46%', - }} - /> - ))} - {isLoadingIdeas && thinkingBuffer && thinkingBufferEffect} - - - )} - {mode === 'agent' && (agentIdeas.length > 0 || thinkingBuffer) && ( - - {agentIdeas.length > 0 && - - directions - - } - - {agentIdeas.map((idea, index) => ( - { - focusNextChartRef.current = true; - exploreDataFromNL(idea.depth_questions); - idea.breadth_questions.forEach((question, index) => { - setTimeout(() => { - setPrompt(question); - deriveDataFromNL(question); - }, (index + 1) * 1000); // 1000ms delay between each call - }); - }} - disabled={isFormulating} - sx={{ - width: '46%', - }} - /> - ))} - {isLoadingIdeas && thinkingBuffer && thinkingBufferEffect} - - - )} + {mode === 'interactive' && (ideas.length > 0 || thinkingBuffer) && ( + + {ideas.map((idea, index) => ( + { + focusNextChartRef.current = true; + setPrompt(idea.text); + deriveDataFromNL(idea.text); + }} + disabled={isFormulating} + sx={{ + width: 'calc(50% - 16px)', + }} + /> + ))} + {isLoadingIdeas && thinkingBuffer && thinkingBufferEffect} + + )} + {mode === 'agent' && (agentIdeas.length > 0 || thinkingBuffer) && ( + + {agentIdeas.map((idea, index) => ( + { + focusNextChartRef.current = true; + exploreDataFromNL(idea.questions); + }} + disabled={isFormulating} + sx={{ + width: 'calc(50% - 16px)', + }} + /> + ))} + {isLoadingIdeas && thinkingBuffer && thinkingBufferEffect} + + )} ); }; \ No newline at end of file diff --git a/src/views/VisualizationView.tsx b/src/views/VisualizationView.tsx index efad3a0..07f9819 100644 --- a/src/views/VisualizationView.tsx +++ b/src/views/VisualizationView.tsx @@ -1047,8 +1047,8 @@ export const VisualizationViewFC: FC = function VisualizationView {focusedTableId ? : null} - - or, select a chart type + + or, start with a chart type {chartSelectionBox} From 042bf2ca2db3d7162344ab5c0354d8bf6698aa2e Mon Sep 17 00:00:00 2001 From: Chenglong Wang Date: Fri, 5 Dec 2025 17:02:16 -0800 Subject: [PATCH 24/37] fixes --- src/app/App.tsx | 46 +++++++++++++++++++-------------------- src/views/ChartRecBox.tsx | 6 +---- src/views/DataThread.tsx | 17 ++++----------- 3 files changed, 28 insertions(+), 41 deletions(-) diff --git a/src/app/App.tsx b/src/app/App.tsx index 4effca9..fdf0396 100644 --- a/src/app/App.tsx +++ b/src/app/App.tsx @@ -649,41 +649,41 @@ export const AppFC: FC = function AppFC(appProps) { ].join(",") }, // Default Material UI palette - palette: { - primary: { - main: blue[700] - }, - secondary: { - main: purple[700] - }, - derived: { - main: yellow[700], - }, - custom: { - main: orange[700], //lightsalmon - }, - warning: { - main: '#bf5600', // New accessible color, original (#ed6c02) has insufficient color contrast of 3.11 - }, - }, - // Microsoft Fluent UI palette (alternative option) // palette: { // primary: { - // main: '#0078d4' // Fluent UI themePrimary (Microsoft Blue) + // main: blue[700] // }, // secondary: { - // main: '#8764b8' // Fluent UI purple + // main: purple[700] // }, // derived: { - // main: '#ffb900', // Fluent UI yellow/gold + // main: yellow[700], // }, // custom: { - // main: '#d83b01', // Fluent UI orange (Office orange) + // main: orange[700], //lightsalmon // }, // warning: { - // main: '#a4262c', // Fluent UI red (accessible) + // main: '#bf5600', // New accessible color, original (#ed6c02) has insufficient color contrast of 3.11 // }, // }, + // Microsoft Fluent UI palette (alternative option) + palette: { + primary: { + main: '#0078d4' // Fluent UI themePrimary (Microsoft Blue) + }, + secondary: { + main: '#8764b8' // Fluent UI purple + }, + derived: { + main: '#ffb900', // Fluent UI yellow/gold + }, + custom: { + main: '#d83b01', // Fluent UI orange (Office orange) + }, + warning: { + main: '#a4262c', // Fluent UI red (accessible) + }, + }, }); // Check if we're on the about page diff --git a/src/views/ChartRecBox.tsx b/src/views/ChartRecBox.tsx index 347d437..72a2ba4 100644 --- a/src/views/ChartRecBox.tsx +++ b/src/views/ChartRecBox.tsx @@ -1261,19 +1261,15 @@ export const ChartRecBox: FC = function ({ tableId, placeHolde "& .MuiInput-input": { fontSize: '14px' }, "& .MuiInput-underline:before": { borderBottom: 'none', - borderBottomColor: alpha(modeColor, 0.42) }, "& .MuiInput-underline:hover:not(.Mui-disabled):before": { borderBottom: 'none', - borderBottomColor: modeColor }, - "& .MuiInput-underline:hover:(.Mui-disabled):before": { + "& .MuiInput-underline:(.Mui-disabled):before": { borderBottom: 'none', - borderBottomColor: modeColor }, "& .MuiInput-underline:after": { borderBottom: 'none', - borderBottomColor: modeColor } }} disabled={isFormulating || isLoadingIdeas} diff --git a/src/views/DataThread.tsx b/src/views/DataThread.tsx index 520c1ab..d873edb 100644 --- a/src/views/DataThread.tsx +++ b/src/views/DataThread.tsx @@ -333,15 +333,6 @@ let buildChartCard = ( position: 'relative', ...(unread && { boxShadow: '0 0 6px rgba(255, 152, 0, 0.15), 0 0 12px rgba(255, 152, 0, 0.15)', - animation: 'glow 2s ease-in-out infinite alternate', - '@keyframes glow': { - '0%': { - boxShadow: '0 0 6px rgba(255, 152, 0, 0.15), 0 0 12px rgba(255, 152, 0, 0.15)', - }, - '100%': { - boxShadow: '0 0 8px rgba(255, 152, 0, 0.2), 0 0 16px rgba(255, 152, 0, 0.2)', - }, - }, }) }}> {chartElement.element} @@ -651,7 +642,7 @@ let SingleThreadGroupView: FC<{ padding: 0.25, '&:hover': { transform: 'scale(1.3)', - transition: 'all 0.2s ease' + transition: 'all 0.1s linear' }, '&.Mui-disabled': { color: 'rgba(0, 0, 0, 0.5)' @@ -687,7 +678,7 @@ let SingleThreadGroupView: FC<{ padding: 0.25, '&:hover': { transform: 'scale(1.2)', - transition: 'all 0.2s ease' + transition: 'all 0.1s linear' } }} onClick={(event) => { @@ -706,7 +697,7 @@ let SingleThreadGroupView: FC<{ {tableDeleteEnabled && { event.stopPropagation(); @@ -720,7 +711,7 @@ let SingleThreadGroupView: FC<{ { event.stopPropagation(); From 4b04973deaae6c063b69a0277d9d6d713dc2053d Mon Sep 17 00:00:00 2001 From: Hurairah Mateen Date: Mon, 8 Dec 2025 15:35:59 +0500 Subject: [PATCH 25/37] Update py-src/data_formulator/data_loader/bigquery_data_loader.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- py-src/data_formulator/data_loader/bigquery_data_loader.py | 1 - 1 file changed, 1 deletion(-) diff --git a/py-src/data_formulator/data_loader/bigquery_data_loader.py b/py-src/data_formulator/data_loader/bigquery_data_loader.py index ea60874..effed85 100644 --- a/py-src/data_formulator/data_loader/bigquery_data_loader.py +++ b/py-src/data_formulator/data_loader/bigquery_data_loader.py @@ -167,7 +167,6 @@ def list_tables(self, table_filter: str = None) -> List[Dict[str, Any]]: def _convert_bigquery_dtypes(self, df: pd.DataFrame) -> pd.DataFrame: """Convert BigQuery-specific dtypes to standard pandas dtypes""" - import json def safe_convert(x): try: From 532fecb15a203d0a9d8d632afeec37f955c94045 Mon Sep 17 00:00:00 2001 From: Hurairah Mateen Date: Mon, 8 Dec 2025 15:52:04 +0500 Subject: [PATCH 26/37] changes --- .../data_formulator/data_loader/__init__.py | 2 +- .../data_loader/bigquery_data_loader.py | 32 ++++++++++++------- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/py-src/data_formulator/data_loader/__init__.py b/py-src/data_formulator/data_loader/__init__.py index dadfaa1..bb9c247 100644 --- a/py-src/data_formulator/data_loader/__init__.py +++ b/py-src/data_formulator/data_loader/__init__.py @@ -17,5 +17,5 @@ "bigquery": BigQueryDataLoader } -__all__ = ["ExternalDataLoader", "MySQLDataLoader", "MSSQLDataLoader", "KustoDataLoader", "S3DataLoader", "AzureBlobDataLoader","PostgreSQLDataLoader","DATA_LOADERS"] +__all__ = ["ExternalDataLoader", "MySQLDataLoader", "MSSQLDataLoader", "KustoDataLoader", "S3DataLoader", "AzureBlobDataLoader","PostgreSQLDataLoader", "BigQueryDataLoader", "DATA_LOADERS"] diff --git a/py-src/data_formulator/data_loader/bigquery_data_loader.py b/py-src/data_formulator/data_loader/bigquery_data_loader.py index effed85..dec3930 100644 --- a/py-src/data_formulator/data_loader/bigquery_data_loader.py +++ b/py-src/data_formulator/data_loader/bigquery_data_loader.py @@ -1,10 +1,12 @@ import json import logging -from typing import Dict, Any, List +import re +from typing import Dict, Any, List, Optional import pandas as pd import duckdb from data_formulator.data_loader.external_data_loader import ExternalDataLoader, sanitize_table_name +from data_formulator.security import validate_sql_query try: from google.cloud import bigquery @@ -191,15 +193,15 @@ def safe_convert(x): elif str(df[col].dtype).startswith("db_dtypes"): try: df[col] = df[col].astype(str) - except Exception: - pass + except Exception as e: + logging.error(f"Failed to convert column '{col}' to string: {e}") # Handle nested objects/JSON columns elif df[col].dtype == "object": df[col] = df[col].apply(safe_convert) return df - def ingest_data(self, table_name: str, name_as: str | None = None, size: int = 1000000): + def ingest_data(self, table_name: str, name_as: Optional[str] = None, size: int = 1000000): """Ingest data from BigQuery table into DuckDB with stable, de-duplicated column aliases.""" if name_as is None: name_as = table_name.split('.')[-1] @@ -221,14 +223,12 @@ def build_alias(field_path: str) -> str: 'device.category' -> 'device_category' 'event_params.value' -> 'event_params_value' """ - import re as _re - # path "a.b.c" -> "a_b_c" alias = field_path.replace('.', '_') # remove weird characters - alias = _re.sub(r'[^0-9a-zA-Z_]', '_', alias) - alias = _re.sub(r'_+', '_', alias).strip('_') or "col" + alias = re.sub(r'[^0-9a-zA-Z_]', '_', alias) + alias = re.sub(r'_+', '_', alias).strip('_') or "col" # must start with letter or underscore if not alias[0].isalpha() and alias[0] != '_': @@ -285,23 +285,31 @@ def process_field(field, parent_path: str = ""): self.ingest_df_to_duckdb(df, name_as) - def view_query_sample(self, query: str) -> List[Dict[str, Any]]: - """Execute query and return sample results""" + def view_query_sample(self, query: str) -> str: + """Execute query and return sample results as a JSON string""" try: + result, error_message = validate_sql_query(query) + if not result: + raise ValueError(error_message) + # Add LIMIT if not present if "LIMIT" not in query.upper(): query += " LIMIT 10" df = self.client.query(query).to_dataframe() - return df.to_dict(orient="records") + return df.to_json(orient="records") except Exception as e: log.error(f"Error executing query sample: {e}") - return [] + return "[]" def ingest_data_from_query(self, query: str, name_as: str) -> pd.DataFrame: """Execute custom query and ingest results into DuckDB""" name_as = sanitize_table_name(name_as) + result, error_message = validate_sql_query(query) + if not result: + raise ValueError(error_message) + # Execute query and get DataFrame df = self.client.query(query).to_dataframe() From 6a53da8ec44921f6827e6efb2f03a3235ac46c23 Mon Sep 17 00:00:00 2001 From: BAIGUANGMEI <2868653801@qq.com> Date: Tue, 9 Dec 2025 00:28:56 +0800 Subject: [PATCH 27/37] Add MongoDB Support --- .../agents/agent_query_completion.py | 4 + .../data_formulator/data_loader/__init__.py | 7 +- .../data_loader/mongodb_data_loader.py | 370 ++++++++++++++++++ pyproject.toml | 3 +- requirements.txt | 1 + 5 files changed, 381 insertions(+), 4 deletions(-) create mode 100644 py-src/data_formulator/data_loader/mongodb_data_loader.py diff --git a/py-src/data_formulator/agents/agent_query_completion.py b/py-src/data_formulator/agents/agent_query_completion.py index f60a2fa..5e82f38 100644 --- a/py-src/data_formulator/agents/agent_query_completion.py +++ b/py-src/data_formulator/agents/agent_query_completion.py @@ -54,6 +54,10 @@ def __init__(self, client): def run(self, data_source_metadata, query): + # For MongoDB, treat it as a SQL-like data source for query generation + if data_source_metadata['data_loader_type'] == "mongodb": + data_source_metadata['data_loader_type'] = "SQL" + user_query = f"[DATA SOURCE]\n\n{json.dumps(data_source_metadata, indent=2)}\n\n[USER INPUTS]\n\n{query}\n\n" logger.info(user_query) diff --git a/py-src/data_formulator/data_loader/__init__.py b/py-src/data_formulator/data_loader/__init__.py index 1c964f8..03c0e99 100644 --- a/py-src/data_formulator/data_loader/__init__.py +++ b/py-src/data_formulator/data_loader/__init__.py @@ -5,6 +5,7 @@ from data_formulator.data_loader.s3_data_loader import S3DataLoader from data_formulator.data_loader.azure_blob_data_loader import AzureBlobDataLoader from data_formulator.data_loader.postgresql_data_loader import PostgreSQLDataLoader +from data_formulator.data_loader.mongodb_data_loader import MongoDBDataLoader DATA_LOADERS = { "mysql": MySQLDataLoader, @@ -12,8 +13,8 @@ "kusto": KustoDataLoader, "s3": S3DataLoader, "azure_blob": AzureBlobDataLoader, - "postgresql": PostgreSQLDataLoader + "postgresql": PostgreSQLDataLoader, + "mongodb": MongoDBDataLoader, } -__all__ = ["ExternalDataLoader", "MySQLDataLoader", "MSSQLDataLoader", "KustoDataLoader", "S3DataLoader", "AzureBlobDataLoader","PostgreSQLDataLoader","DATA_LOADERS"] - +__all__ = ["ExternalDataLoader", "MySQLDataLoader", "MSSQLDataLoader", "KustoDataLoader", "S3DataLoader", "AzureBlobDataLoader","PostgreSQLDataLoader","MongoDBDataLoader","DATA_LOADERS"] diff --git a/py-src/data_formulator/data_loader/mongodb_data_loader.py b/py-src/data_formulator/data_loader/mongodb_data_loader.py new file mode 100644 index 0000000..2f3bbce --- /dev/null +++ b/py-src/data_formulator/data_loader/mongodb_data_loader.py @@ -0,0 +1,370 @@ +import json +import string +import random as rand + +import pandas as pd +import duckdb +import pymongo +from bson import ObjectId +from datetime import datetime + +from data_formulator.data_loader.external_data_loader import ExternalDataLoader, sanitize_table_name + +from data_formulator.security import validate_sql_query +from typing import Dict, Any, Optional, List + + +class MongoDBDataLoader(ExternalDataLoader): + + @staticmethod + def list_params() -> bool: + params_list = [ + {"name": "host", "type": "string", "required": True, "default": "localhost", "description": ""}, + {"name": "port", "type": "int", "required": False, "default": 27017, "description": "MongoDB server port (default 27017)"}, + {"name": "username", "type": "string", "required": False, "default": "", "description": ""}, + {"name": "password", "type": "string", "required": False, "default": "", "description": ""}, + {"name": "database", "type": "string", "required": True, "default": "", "description": ""}, + {"name": "collection", "type": "string", "required": False, "default": "", "description": "If specified, only this collection will be accessed"} + ] + return params_list + + @staticmethod + def auth_instructions() -> str: + return """ +MongoDB Connection Instructions: + +1. Local MongoDB Setup: + - Ensure MongoDB server is running on your machine + - Default connection: host='localhost', port=27017 + - If authentication is not enabled, leave username and password empty + +2. Remote MongoDB Connection: + - Obtain host address, port, username, and password from your database administrator + - Ensure the MongoDB server allows remote connections + +3. Common Connection Parameters: + - host: MongoDB server address (default: 'localhost') + - port: MongoDB server port (default: 27017) + - username: Your MongoDB username (leave empty if no auth) + - password: Your MongoDB password (leave empty if no auth) + - database: Target database name to connect to + - collection: (Optional) Specific collection to access, leave empty to list all collections + +4. Troubleshooting: + - Verify MongoDB service is running: `mongod --version` + - Test connection: `mongosh --host [host] --port [port]` +""" + + def __init__(self, params: Dict[str, Any], duck_db_conn: duckdb.DuckDBPyConnection): + self.params = params + self.duck_db_conn = duck_db_conn + + try: + # Create MongoDB client + host = self.params.get("host", "localhost") + port = int(self.params.get("port", 27017)) + username = self.params.get("username", "") + password = self.params.get("password", "") + database = self.params.get("database", "") + collection = self.params.get("collection", "") + + if username and password: + self.mongo_client = pymongo.MongoClient(host=host, port=port, username=username, password=password) + else: + self.mongo_client = pymongo.MongoClient(host=host, port=port) + + self.db = self.mongo_client[database] + self.database_name = database + + self.collection = self.db[collection] if collection else None + + except Exception as e: + raise Exception(f"Failed to connect to MongoDB: {e}") + + @staticmethod + def _flatten_document(doc: Dict[str, Any], parent_key: str = '', sep: str = '_') -> Dict[str, Any]: + """ + Use recursion to flatten nested MongoDB documents + """ + items = [] + for key, value in doc.items(): + new_key = f"{parent_key}{sep}{key}" if parent_key else key + + if isinstance(value, dict): + items.extend(MongoDBDataLoader._flatten_document(value, new_key, sep).items()) + elif isinstance(value, list): + if len(value) == 0: + items.append((new_key, None)) + else: + for idx, item in enumerate(value, start=1): + item_key = f"{new_key}{sep}{idx}" + if isinstance(item, dict): + items.extend(MongoDBDataLoader._flatten_document(item, item_key, sep).items()) + else: + items.append((item_key, item)) + else: + items.append((new_key, value)) + + return dict(items) + + @staticmethod + def _convert_special_types(doc: Dict[str, Any]) -> Dict[str, Any]: + """ + Convert MongoDB special types (ObjectId, datetime, etc.) to serializable types + """ + result = {} + for key, value in doc.items(): + if isinstance(value, ObjectId): + result[key] = str(value) + elif isinstance(value, datetime): + result[key] = value.isoformat() + elif isinstance(value, bytes): + result[key] = value.decode('utf-8', errors='ignore') + elif isinstance(value, dict): + result[key] = MongoDBDataLoader._convert_special_types(value) + elif isinstance(value, list): + result[key] = [ + MongoDBDataLoader._convert_special_types(item) if isinstance(item, dict) + else str(item) if isinstance(item, ObjectId) + else item.isoformat() if isinstance(item, datetime) + else item + for item in value + ] + else: + result[key] = value + return result + + def _process_documents(self, documents: List[Dict[str, Any]]) -> pd.DataFrame: + """ + Process MongoDB documents list, flatten and convert to DataFrame + """ + if not documents: + return pd.DataFrame() + + processed_docs = [] + for doc in documents: + converted = self._convert_special_types(doc) + flattened = self._flatten_document(converted) + processed_docs.append(flattened) + + df = pd.DataFrame(processed_docs) + return df + + def list_tables(self, table_filter: str = None): + """ + List all collections + """ + results = [] + + # Get specified collection or all collections + collection_param = self.params.get("collection", "") + + if collection_param: + collection_names = [collection_param] + else: + collection_names = self.db.list_collection_names() + + for collection_name in collection_names: + # Apply filter + if table_filter and table_filter.lower() not in collection_name.lower(): + continue + + try: + full_table_name = f"{collection_name}" + collection = self.db[collection_name] + + # Get row count + row_count = collection.count_documents({}) + + # Get sample data + sample_data = list(collection.find().limit(10)) + + if sample_data: + df = self._process_documents(sample_data) + + # Construct column information + columns = [{ + 'name': col, + 'type': str(df[col].dtype) + } for col in df.columns] + + # Convert sample_data for return + sample_rows = json.loads(df.to_json(orient="records")) + else: + columns = [] + sample_rows = [] + + table_metadata = { + "row_count": row_count, + "columns": columns, + "sample_rows": sample_rows + } + + results.append({ + "name": full_table_name, + "metadata": table_metadata + }) + except Exception as e: + continue + + return results + + def ingest_data(self, table_name: str, name_as: Optional[str] = None, size: int = 100000): + """ + Import MongoDB collection data into DuckDB + """ + # Extract collection name from full table name + parts = table_name.split('.') + if len(parts) >= 3: + collection_name = parts[-1] + else: + collection_name = table_name + + if name_as is None: + name_as = collection_name + + # Get and process data from MongoDB (limit rows) + collection = self.db[collection_name] + data_cursor = collection.find().limit(size) + data_list = list(data_cursor) + if not data_list: + raise Exception(f"No data found in MongoDB collection '{collection_name}'.") + df = self._process_documents(data_list) + + name_as = sanitize_table_name(name_as) + + self._load_dataframe_to_duckdb(df, name_as, size) + return + + + def view_query_sample(self, query: str) -> str: + + self._existed_collections_in_duckdb() + self._difference_collections() + self._preload_all_collections(self.collection.name if self.collection else "") + + result, error_message = validate_sql_query(query) + if not result: + print(error_message) + raise ValueError(error_message) + + result_query = json.loads(self.duck_db_conn.execute(query).df().head(10).to_json(orient="records")) + + self._drop_all_loaded_tables() + + for collection_name, df in self.existed_collections.items(): + self._load_dataframe_to_duckdb(df, collection_name) + + return result_query + + def ingest_data_from_query(self, query: str, name_as: str) -> pd.DataFrame: + """ + Create a new table from query results + """ + result, error_message = validate_sql_query(query) + if not result: + raise ValueError(error_message) + + name_as = sanitize_table_name(name_as) + + self._existed_collections_in_duckdb() + self._difference_collections() + self._preload_all_collections(self.collection.name if self.collection else "") + + df = self.duck_db_conn.execute(query).df() + + self._drop_all_loaded_tables() + + for collection_name, df in self.existed_collections.items(): + self._load_dataframe_to_duckdb(df, collection_name) + + self._load_dataframe_to_duckdb(df, name_as) + + return df + + def _existed_collections_in_duckdb(self): + """ + Return the names and contents of tables already loaded into DuckDB + """ + self.existed_collections = {} + duckdb_tables = self.duck_db_conn.execute("SHOW TABLES").df() + for _, row in duckdb_tables.iterrows(): + collection_name = row['name'] + df = self.duck_db_conn.execute(f"SELECT * FROM {collection_name}").df() + self.existed_collections[collection_name] = df + + + def _difference_collections(self): + """ + Return the difference between all collections and loaded collections + """ + self.diff_collections = [] + all_collections = set(self.db.list_collection_names()) + loaded_collections = set(self.existed_collections) + diff_collections = all_collections - loaded_collections + self.diff_collections = list(diff_collections) + print(f'Difference collections: {self.diff_collections}') + + def _drop_all_loaded_tables(self): + """ + Drop all tables loaded into DuckDB + """ + for table_name in self.loaded_tables.values(): + try: + self.duck_db_conn.execute(f"DROP TABLE IF EXISTS main.{table_name}") + print(f"Dropped loaded table: {table_name}") + except Exception as e: + print(f"Warning: Failed to drop table '{table_name}': {e}") + + def _preload_all_collections(self, specified_collection: str = "", size: int = 100000): + """ + Preload all MongoDB collections into DuckDB memory + """ + # Get the list of collections to load + if specified_collection: + collection_names = [specified_collection] + else: + collection_names = self.db.list_collection_names() + + # Record loaded tables + self.loaded_tables = {} + + for collection_name in collection_names: + try: + collection = self.db[collection_name] + + # Get data + data_cursor = collection.find().limit(size) + data_list = list(data_cursor) + + if not data_list: + print(f"Skipping empty collection: {collection_name}") + continue + + df = self._process_documents(data_list) + + # Generate table name + table_name = sanitize_table_name(collection_name) + + # Load into DuckDB + self._load_dataframe_to_duckdb(df, table_name) + + # Record mapping + self.loaded_tables[collection_name] = table_name + print(f"Preloaded collection '{collection_name}' as table '{table_name}' ({len(data_list)} rows)") + + except Exception as e: + print(f"Warning: Failed to preload collection '{collection_name}': {e}") + + def _load_dataframe_to_duckdb(self, df: pd.DataFrame, table_name: str, size: int = 1000000): + """ + Load DataFrame into DuckDB + """ + # Create table using a temporary view + random_suffix = ''.join(rand.choices(string.ascii_letters + string.digits, k=6)) + temp_view_name = f'df_temp_{random_suffix}' + + self.duck_db_conn.register(temp_view_name, df) + # Use CREATE OR REPLACE to directly replace existing table + self.duck_db_conn.execute(f"CREATE OR REPLACE TABLE main.{table_name} AS SELECT * FROM {temp_view_name} LIMIT {size}") + self.duck_db_conn.execute(f"DROP VIEW {temp_view_name}") \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 1454f73..66b4a49 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,7 +40,8 @@ dependencies = [ "boto3", "backoff", "beautifulsoup4", - "scikit-learn" + "scikit-learn", + "pymongo" ] [project.urls] diff --git a/requirements.txt b/requirements.txt index 17adffe..18c456d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,4 +18,5 @@ vl-convert-python backoff beautifulsoup4 scikit-learn +pymongo -e . #also need to install data formulator itself \ No newline at end of file From 5f7e2274990b52ad75bcbaf46c1914816370303e Mon Sep 17 00:00:00 2001 From: Chenglong Wang Date: Mon, 8 Dec 2025 11:34:13 -0800 Subject: [PATCH 28/37] add icon for chart templates --- .../data_loader/azure_blob_data_loader.py | 2 +- .../data_loader/external_data_loader.py | 2 +- .../data_loader/kusto_data_loader.py | 2 +- .../data_loader/mssql_data_loader.py | 4 ++-- .../data_loader/mysql_data_loader.py | 2 +- .../data_loader/postgresql_data_loader.py | 2 +- .../data_loader/s3_data_loader.py | 2 +- src/assets/chart-icon-pie-min.png | Bin 0 -> 4206 bytes src/assets/chart-icon-pyramid-min.png | Bin 0 -> 1538 bytes src/assets/chart-icon-us-map-min.png | Bin 0 -> 5841 bytes src/components/ChartTemplates.tsx | 9 ++++++--- 11 files changed, 14 insertions(+), 11 deletions(-) create mode 100644 src/assets/chart-icon-pie-min.png create mode 100644 src/assets/chart-icon-pyramid-min.png create mode 100644 src/assets/chart-icon-us-map-min.png diff --git a/py-src/data_formulator/data_loader/azure_blob_data_loader.py b/py-src/data_formulator/data_loader/azure_blob_data_loader.py index 1959125..094ed6b 100644 --- a/py-src/data_formulator/data_loader/azure_blob_data_loader.py +++ b/py-src/data_formulator/data_loader/azure_blob_data_loader.py @@ -368,7 +368,7 @@ def view_query_sample(self, query: str) -> List[Dict[str, Any]]: if not result: raise ValueError(error_message) - return self.duck_db_conn.execute(query).df().head(10).to_dict(orient="records") + return json.loads(self.duck_db_conn.execute(query).df().head(10).to_json(orient="records")) def ingest_data_from_query(self, query: str, name_as: str): # Execute the query and get results as a DataFrame diff --git a/py-src/data_formulator/data_loader/external_data_loader.py b/py-src/data_formulator/data_loader/external_data_loader.py index 204b180..6bc4425 100644 --- a/py-src/data_formulator/data_loader/external_data_loader.py +++ b/py-src/data_formulator/data_loader/external_data_loader.py @@ -108,7 +108,7 @@ def ingest_data(self, table_name: str, name_as: str = None, size: int = 1000000) pass @abstractmethod - def view_query_sample(self, query: str) -> str: + def view_query_sample(self, query: str) -> List[Dict[str, Any]]: pass @abstractmethod diff --git a/py-src/data_formulator/data_loader/kusto_data_loader.py b/py-src/data_formulator/data_loader/kusto_data_loader.py index b216637..dc7ec64 100644 --- a/py-src/data_formulator/data_loader/kusto_data_loader.py +++ b/py-src/data_formulator/data_loader/kusto_data_loader.py @@ -236,7 +236,7 @@ def ingest_data(self, table_name: str, name_as: str = None, size: int = 5000000) total_rows_ingested += len(chunk_df) - def view_query_sample(self, query: str) -> str: + def view_query_sample(self, query: str) -> List[Dict[str, Any]]: df = self.query(query).head(10) return json.loads(df.to_json(orient="records", date_format='iso')) diff --git a/py-src/data_formulator/data_loader/mssql_data_loader.py b/py-src/data_formulator/data_loader/mssql_data_loader.py index 130f78c..9464fa7 100644 --- a/py-src/data_formulator/data_loader/mssql_data_loader.py +++ b/py-src/data_formulator/data_loader/mssql_data_loader.py @@ -1,6 +1,6 @@ import json import logging -from typing import Dict, Any, Optional +from typing import Dict, Any, Optional, List import duckdb import pandas as pd @@ -418,7 +418,7 @@ def ingest_data(self, table_name: str, name_as: Optional[str] = None, size: int log.error(f"Failed to ingest data from {table_name}: {e}") raise - def view_query_sample(self, query: str) -> str: + def view_query_sample(self, query: str) -> List[Dict[str, Any]]: """Execute a custom query and return sample results""" try: # Add TOP 10 if not already present for SELECT queries diff --git a/py-src/data_formulator/data_loader/mysql_data_loader.py b/py-src/data_formulator/data_loader/mysql_data_loader.py index 7910484..ec84f04 100644 --- a/py-src/data_formulator/data_loader/mysql_data_loader.py +++ b/py-src/data_formulator/data_loader/mysql_data_loader.py @@ -245,7 +245,7 @@ def ingest_data(self, table_name: str, name_as: Optional[str] = None, size: int self.ingest_df_to_duckdb(df, name_as) logger.info(f"Successfully ingested {len(df)} rows from {table_name} into DuckDB table {name_as}") - def view_query_sample(self, query: str) -> str: + def view_query_sample(self, query: str) -> List[Dict[str, Any]]: result, error_message = validate_sql_query(query) if not result: raise ValueError(error_message) diff --git a/py-src/data_formulator/data_loader/postgresql_data_loader.py b/py-src/data_formulator/data_loader/postgresql_data_loader.py index 3aa33e1..b045376 100644 --- a/py-src/data_formulator/data_loader/postgresql_data_loader.py +++ b/py-src/data_formulator/data_loader/postgresql_data_loader.py @@ -143,7 +143,7 @@ def ingest_data(self, table_name: str, name_as: Optional[str] = None, size: int LIMIT {size} """) - def view_query_sample(self, query: str) -> str: + def view_query_sample(self, query: str) -> List[Dict[str, Any]]: result, error_message = validate_sql_query(query) if not result: raise ValueError(error_message) diff --git a/py-src/data_formulator/data_loader/s3_data_loader.py b/py-src/data_formulator/data_loader/s3_data_loader.py index d00eb71..51059ba 100644 --- a/py-src/data_formulator/data_loader/s3_data_loader.py +++ b/py-src/data_formulator/data_loader/s3_data_loader.py @@ -202,7 +202,7 @@ def view_query_sample(self, query: str) -> List[Dict[str, Any]]: if not result: raise ValueError(error_message) - return self.duck_db_conn.execute(query).df().head(10).to_dict(orient="records") + return json.loads(self.duck_db_conn.execute(query).df().head(10).to_json(orient="records")) def ingest_data_from_query(self, query: str, name_as: str): # Execute the query and get results as a DataFrame diff --git a/src/assets/chart-icon-pie-min.png b/src/assets/chart-icon-pie-min.png new file mode 100644 index 0000000000000000000000000000000000000000..ae26ceecb0ae57000b980c755e2c14d7ef66499f GIT binary patch literal 4206 zcmZ`+c{r3^8=o12WUnkKB5NgUsSqP%$!jlBiK1-9m@z9e4>MyL`@V)LjD5*EWEq6A zFK>woNoA5Pgg47K^?rYR|9sbVp8Nit`~2?vcc16F&UKwbv$Zx8+%L5s1Of?Kn48#x zKwu68gZFW99DO-*h2st*O|K&z{O=*r?tz{lG#ahu{m>V2*FC^f%|Gyd=Ayn7hr}0P z?u-C|ggSnI;BNmy4-kk8WNYPUY8GIHL?VNbfym$EH$Vjia;&SXD+&eYfFML5ieou( z1RROvMBr!y9DzWf1AoU+h~G<`!Abwk@%R6C%E{u8kw^pw{22&NB4-H>|84(h!GnSj zNCe`K{Uh{8_kX7tjNgB&Ap_CqKk7jUPFI|C)bHZICH^G@1O)sBL4R7}h$0bx`uVr) ze>wkV{iY!hoV~QSwQg-~p}O`sk;;lPTykMS0Rt6;Xk)byLhyJT27?a5VbPDM6f_En z#h_YSnldxeDdaF3jT(%{Mny)jTANY%-Jt-H8cq%g#)VTzjSbAmh({DsD3L^ojfr|f z&%psC&i`y~dXB*a#Y9Kq0a65wN(jci=;)Z5nnG2MH$88_g;B|*Ff9E!jzmLs?oz`k z0GZO?--jw0%FoY@i;FHRElG%vO-@P(3ndZ>!L>EjsJ4yG&CTFY5)mK*p`mZyyw1+b zOixRx;B15#nv?x_eSN*SsDN2t%buHCSy@?JTqK17sOR4S3N@&97R6deb!>%@!mFw( z#>dCQNF+>FT^NOeda;eos3e4uGcwW|n03`v&lrrn#Kbr(23=IhsIIOoEiDQrh0o8= zKdY>G|Lz?wIz9I(y`sD{Kko@Pgiyf9+t}DBDK5l@l5vzMJRUnbI*Mvqn)&zC?4~q1rtKV5CBy3a$RlB+UhDgw>>EOMKI3uwI@if3AdoJt z_x~KA`yoV+V)Jw3h~Z@Kpg)}$JM^qYde`RoF|i5Xo+!|{;2gQoPgLQ>H?`8;lR-0N zi(x0~;~Z%4HvG=h^>0LEw4_G0w9DM+T_)O>%%91B?Q3~tiJImsIO_(Hi0z&lAULMf z-|KByxJoO_Iefj?l6?7Qm~%nZ=OtOlqP@!r;>yS`y#Y1XYWOej?)!qz zLbBeOu4VZP20)XZ{a8G(MP$0bPCYju?p1hFj%Z7_9@OY^`Z|nW-gw5FaDCaME?BT! zj%Cz;@p*5ZAG%3+bZM@9YMB}%q$HwX?hFMg!akHTiu&{V6wJe_Jq0al}&Qv{I_>Y z?!tzju2-qn+kx29yKq8wv&DDm;+#!9PlDn3dAmea2N4)%p?3oF!_Zo)IMLPc(e|@C zv(NL>y4)%~r2E1Lxx*_Bbmul1Cg9|GE3K>Gh#y(O*>yz&q)iI=gvGz66t$@%^?sOW9^!``o1)BKLIcXPwj4v?b^&#J&

    Sh?=Du!Q)73XHz`g}9!xr-Y`=4LvOYquK8pzc7lEB&{g! zhs`S&ew96(@^pa(8-FtHl&>1>grN(sv9KT%6aTTIFBFu|P(9dou1JPx$Q5CAbD!meybsLhTsON6$d)Sny?D(>YPlcE( zSzn^98bLF?#D|6?`SZWV@$qnNyxQioMK$dwNjY@i`e3WDs^aG43_0#-P7Uv9jJ*Qx zQM8xKe5t=XV0ZoW>Iv9Zl~8rPCgBnhphnH>*gv##xO>#rRZ zh^lk>09d9I!9An2s2?AN+RjwFp&+i4LjFpS?f6z3VTg_x#OhuTqvQCz5fHd0`|zH~ zo(k7%caSl-V#!2-kQF zlDUVukGfn!_%J&Uig(<2-LhJsO?4C>3%YiI$!&VckD!EhN<~5QI6o4Yks(*$#zu`b|1mZ=AXvI0Ozpb=@l`-QQL}%>n zg;jbM_2_#9WsHcYjPHG{9-O>6GYx^k+00|s)dy_re$As}T7q`Bx2mc-$@~&(s;=9A z^Q#}ND7%H&-dt&KZ*MwFp$Z?Hth(QeAY+d#sN(VCU075~S#ik?&w}z|=6M5ymh){! z#n_LR0w+eU->Lx%ux+Z3#^r?q+Y}4_g)|2Tn|G~a7`@KUWQ6pY)QOY{3wYc$jaJ!i zYpE@otsSgZbj|GeDR6&v`O}sbgAD^-;hgImdc0;a-v?2)_Epp8oS5~=6*A1eI@8oG zj~!2)GiP@~ea>NcOG}~ecYJm7K!*mM()Z=fTft7i;7cZksJr0H5GhMhD|qrF?RBxR zQtr}vHl{Lx_;%$3eFa^D0+p&D+8hvifL_sXWV^r7bR=8Gd;h zaD^V#sy=VezNe73CO1tuvYtPJ($$m{suYFkwt4Y5tuSNras;bIEA?d@6nQYmNAq~W z$`(>DXVX3q)~3Kpkn9ufB1&Jsb()OmUXCjRaN6tA4rQ1&m-bdbFyBgNU_$PGVd>}F zuunIk=282Ml4)qLa&^@Afnm8{ms3{NrY8@XXgK zi?u9M7bBxGeS?KBLMofKwz>{5lkbDy*-J{(+vh~-YHp@VVG6Vr`Q?4B5OeV;f**IB znQZh8u<~eM-+<`zVtINRzpeOkD7W1}DK`@%FP~)t{!6Dssvt6PLECrhP^CV9xzEkk zb6j8iWVls(?Acw4Q*J&%;0%a0Ls1r%b)&&Qzb-m~0esKbC`1lMT#-Fcg*~`j+W&G^ zputz`Z<@Up%m!xcQk*f?hp}t)2+Ib}^9y`(jJzF4Gab32&xZ+L134&&!BNHtEklTm z{0UosiL>117h$>Tc?$F=PxlYEBs5~`b{b86Ad5=9$MMBSTW4pue2*T3SRnwugm1`o zU`tU3s_)ioMi!SCj0rLhuKNkeK3#hpmdVfT5cG1T*8<&DLhT(+BjKSi>&7~WPF`O7 zf|A84o|<7;Cbyq?T1fzHnedb6OOlt|bj}qA_cT@S7(I&!ZK>B1mHjDs{aK%;3R1yEH(rY`vu~by z->zto_MJ?jI*JAS*yk)Lb-{6l@~~W^#n;}36`t&&`1k2sii7|mkm|S(-)7e}fxbDl z6_br4K%cUs=uoA8@BWT7Nt*h5UcEM^ZdNm3dLeK3>+oMQvG91tf0Oa3W|yZzlIC#9EFsV90_R}NpS~v>Q2Ijs)}@zxwyPVOqC@ssSNWhD zLGet+S6SI-2I-9e&jsHY(X>U?6>Br~b0hL!7!G0qdj;Z=Vv_77H8xex;ka^C4Dh zL6sdv_#`-M&I)s2KM)!E(Gz;%>WjR0Np=ZBD_Jx5hyp?{(h2cEN=4w6)Pxja8P{`` zy7{D>`W>&AQO#ynhBmC7$SS9|s=Tk3q{47aa~vbFfcnAcQYS58{?O1e^F)13q1M!{ zFL<=kvawq+*r(>ATQi}x z1+H2<^#Egi@F6~5x1N5cWbRJW@Jd6WMRRce&(6YE+09ulgOO($*IZ6c-n!_k7?&}8 z(>wES!`sK{n9pa%=jfVGKIEe;x(Nh#gJHwqxN1)(fM=JdPoBzMB0;NTTeBEx?nnDg`jS(sXzlwWcS`wymY B;uQb@ literal 0 HcmV?d00001 diff --git a/src/assets/chart-icon-pyramid-min.png b/src/assets/chart-icon-pyramid-min.png new file mode 100644 index 0000000000000000000000000000000000000000..d7d8343d7af5fd1a02f0052dc295fa1c263aaadc GIT binary patch literal 1538 zcmeAS@N?(olHy`uVBq!ia0y~yU<5K5893O0R7}x|G$6%U;1OBOz`%DHgc*+N0Njwz)w z2R`2a{N~oNExV*$IGj3`Ox1T4xaeqA@@~{qh zu+(AFh316#?9B!(E|<+gV$qzUaqhnLjW1dTOdhSJJ)X>nk#x@9Zz~eR{q0;hB{?sve3< z-&xBjA+yI^E@tnsymJqa{P-Be7*TfrQ^dxq+Nak;Tb*SDM2eAcZV5E1qV-!55Jgo^G+V$0hhgU!e4jYjY)X!C)Zo7 z-Y?a{5b{-3*IqpG*A=sA#d|gK&V8$xcJ!>sNB7&hI;WSt$vZi5>Rufdt9DE`y^z`K z_F;SO?RymqG~T^>{loYDT$%fOtMA{)Ii8|lfA4XRhI;L{Aa90S5f@)s%3Q2ka{2nr ztS@i7I?92e{8rm~UesOmu2;(2@2GLjsrhKCeniGk=9k4W zy}m6^-@kKqc#*_JdfMCY;*LbiOi|5Qy_v-`oVB+^}*gGB1BguUW8pF{=&eY<(8*|#rWO8)o+9a&Rb7>eNrF>*xPW`Tc$>kHe`=%QuTi=LVaKR{koIYu}p?6ngJSl6UHQhP>#VMWI*T zt-QIWOG0+9gz(p@mCtQH-cf5_J>l~4eO=L$%~$;ud$6tXV55YD#FlJPLp!suUzVrC zZF34li@x1f=Kl4$hJz#4hm=$YN}>(cse$jg4Qx$iCr_TdEJpBb*ybqP6#@f6D%BapqrWl-XVdd)1{W(=M#l zXTw%_yj}k#ym5Qp-CZWvr|65u49Rf}6`>l;gKN64_`M<(t&#niI z9UZSVyE%p1y!LOOfBpW7dbXx_e5~k!PI2)>vH=I?eh@#+!*i7Bc%l!m{ATcU^>bP0 Hl+XkKwRtJ~ literal 0 HcmV?d00001 diff --git a/src/assets/chart-icon-us-map-min.png b/src/assets/chart-icon-us-map-min.png new file mode 100644 index 0000000000000000000000000000000000000000..26d895ed8e57342ec4e30894d72ea5f96652c457 GIT binary patch literal 5841 zcma)gXFObA)U_xPqC|)?x(E?9gd|Gz8ZBB15z$N3nYkE78=b)z#6%sUNACpDJ0of& zYLwBV3nI_R|HJ!!d!G9{_msWYS!?gJ@BMNj^mWy#DM6G(L`2k@8Y%`vL{|vYD@4~w z2!qq77e<7E1)}-_V(4lM@qFuHL*(h{DdOPb1h#tXW+US2VVANk3nG9h+%#T-iHK<0 zFW)QOuKCtPL?lG|+D5ACZrV_Y2NddY+5cY3YzqF%rJPEN8ED;HpgaB}e0rGl*-0n--zz712KnRph0o9ApU?em&j*uV( z;&$nRf-%YOfz&!6x%^THfC8VsLW3fqz5$h$m6s9`?*VTZ;OPaFj9&(jQhj*{MI(fC zfb#Fqm<%8?8T!72kPaaLLIRfoc>7+ee5fQ;0Z94;AryjO_w)n^Pkw(@RTW`-MMcFw zq|fC!5L_R-pr6Jr6}@55!eJ;fa_ssGbY1`*=Ro@@w0Re3SOdNs0&OQi z zVzS!L)D;s|pq49rvFRZ=9(|@ybc^s({w~*r#Cztm>+ie^vSd_Om(X8VjlS6=fZD}|9JRSSaWn`ZE9S5{%iQkp6%Z0 z@y6^otFw!RC5YIxLc*^dMBl|?X-(rw&$lzBosQ$4^Hu!TPn-9DCpWf5*FAua>Z9S) z*13k?KaPKY8oF4|*;v_)`&HKFpIklUwu8Ymw1;jZ20r$$W#_7<@A#YMeA>;FE?)30 z`|Q?$&CLx@+qaDI%^!8DdRG!_axVHFs!v38^OdHGq7gI8Rx6nTn&L0{uN`UT=x5oD zGC4Vq85ETm6csm#8yih!8;2|P`NJLvU^Oo7o7?q)!UX;)Cs9ohOuMLU7 zT4sg4TRhPH%m=k~gN&_@mCp5lCAqw;LO;|FYIQh>D_lM=Ne-ZsofB6-zwzt+&2jZ~ zO_3_?Y<|!rsoIH=P9nJ|x^+Mx=_aa>>#5@zkKC3T_-j{k9bHYcjjEVc($j*OKM#ju zc&~a+kgD-^An6!rz`@=XW?Ea6C#$;gjO#woc6A$Fme$HQvh^C#f!4hybsX0bA_d_5 znG+Tl$FHhTFdnt$;%;oh#uQfJ#d4Lj+eo);&n}5t+}^G{Zd$ct9L?b}C)^V=5I);M zuK{oBPKXn8+((_G^`2)M{Jae}6?kexc6PC$$huj>W zFFYeHCLQfnSWi0B{DyLQ;!Yl)N2#=br2Ax(3hqt0>+1I6e!}!j%n(A#GL~|k)!1nt zck5gBBp<1^Y}`8=!C2{)mT|qSX`e+sy0ZS(yZS58=Imx7Jz?xD5i7R)In#-yF)`u2 z1F30`ZAjHE7R09}MNXy$FcS5IUs`v;nXkKR=qY4sf9r?rfxLNDBuAm?7KN}9pJUI- zSgotnT!T^Bmd1Pu(^DGV*zky#tmw6sB+UHJpY-!T>AmY6Vi^9o4jaIhjyf#t-?C~9 ztIh}i5Mw=%SXNpuLy_Btg;n9e?%9^xyK=oXl>OodoT?z`3$f0btJ-@%j1E%mx0%@b z%wIUNI3h3FXg}&Aq;yC~SiiqM@Rf9V%8X`O5nc_}PO0%?iH<_6#5$)Kyoly#lPbop z77MJ+Z$6$ZB_|buc z?Ls0Hd1(@b4?511GkR?BBKOo(bGgkB66UoOG#>;%8H`_}calw$xNfoLE*)4BA zK(=@v3Bk0DQ8`nBik=m8WY&}`&+mi5>(JgHj<=jVUdx^J(}A9y@Pdh(=YU(@)w6K3rsavpkUI^9O$U7UT}`Z zQ#ciVuMxCjA8xdtx}cc7c`$^@FxI0odAH&n6sWS6%SWFiH+6f$v6QOk%yFBgC|mrX zP0d_7BRQHfF(9ISL%BD&V0CDUshU)w+7flTgxAt75rpi?ON1xqlhj#s#G0Tfie%b5 zmA7R!BJi`*O_X?pPtp2xXdmUm*1MAN+TUIksW?^9-FuopGe~WYQ#~DIw1hZBGe>`9 z?F}Axof>8(=knXzBtO8hJ%PAYr+(y7)ASX)&aTJz?kA~?jNO-+pFdy0c#)!kG}Pn~ zFI5?mzuBSJ8qjfmNwhEeSGO~1e!lL*BL3EP2q9^0xdW{mMBuw6dK~jDO&DpLhepbc zS#)F_?_zEa2tAKmbDV{*WeHlWs>KTT#byYRNW5JPM}RCD9cU9mvh#i(_a-1$uef1! z(_U%4{%l^ju;@_H3nV2Otck+aX1RpbVBe5VzdY1DV=y|_ed0?*LX6MyP$mT%CTd!p zc&&GmHU`Tdb9T~Qd5Rz0aTRQ8vdfZrKVsbnB1t8BA@Wi8mc%`@{UF92@A%FIbf_h{ zvChd)o}E%ot{X4V=)*-8&~30jR%dwyb+4W5H8M&o+g!3q+8xnGr$@g(V`SYIs+)Ml zt_-iHDty=#A}(JS&EgTV);jbS<0=hL6c`p$Pr4}mEJjw%5*iGY^{HbXjRz@!uG*y zEYlr!g#}SFk}>$DH}r1tRX&}oo1|ej#P;`{OFn2%H*xRU(=)mu(s3tK*@%v9OU>I3 zwOlS;8|btgG`6#X`?%+00sMR9-!WF^Cmp}8LVP47U+&j?f4{DX5UKRlZL|aH za)1>XQ(W~br%=Yy7<7iOP%bKVVTwn7Z4W>rf$~xekim+UH<~=%Rv+fBa8UvJW zweqJ;Gc4kO5>rS1k|w}4H#Geb#>M8o#B8H&Un6Zklgw;Qm~Ks=^}2Z#Wo=3YqCtO7 z8nQgEr;aSmGcWkDiH-6aRuhMM^6>F(aw+G0Q|4derN&5)A3KUq@%Z44j?lc~PT)-} z8ri0!WBcj8AZ3FyyGQH*s?Y`ZOD)S;RvvF#%%)|lVSL}`WUX$$*!>6}(s3_tK)-WE zwU|O;0)AIR(N$>@>2e1=YS62fO>f9|Hw`C<@$FI*f9RC36Uz@vl%R!`$+If1tXy1+ zddD+p7DL)}lk(p4_>1$CM}8j>~_T6-rF2a6`HRLdBCH4gVFez zS^=?f`lkIqz24#v@9%d_>4ZDH>M2*;dACOb>K#ZhOD&J?_o;orbXwNz6*UmOu2QA6 z_$#FQ1wNZwLD@F%2yt@U{?0j*{P~5KxIC@az7RcoX6#Aa195pBPMGTW3uV#YEIQV} zE2&`0Vl3xEva>a?oi}`~H!z)z_<^;H%Nq{Kh#iVQk2v&rB*ySo?bn?*zd>^cuEyemM^&K`@U9F7T-I9ZY3j*R4|)M&%VX3E8ba{6RTR?}GOh z1{*ZqiHkibwiSQ4udZ_{MherS>YbiYS-GS6xK;NWJf!~P*{3?il_fHVIOgHZN3JRZ z$JS2(?NscqZi(`mS4G^FsDq@UH2QQTz5iD?Y7F^dlsB;}tL*LJA0Mt!V@SrAb$}8v zg{(7sNlc((N}oks+1R9@{k+eOc^YLh+Jd|*rNOsDTd#KKI|P{SVq~cJWsZrd?XOEU z59%p$K2pIc)L4ddO|ihAtz>5fI(5gX96X!qAb(&n<91#3&C9&!_V$_6Tg&0iHyf!M zo9~v*Z?l8Ewi83Mc2f724W76fnyutVVnHB{F;t`na|G`GfWX~#Go|>*t5n9SSd!?c z$)~U-9njqxc=nC9o{p%;iPiT&Hz;dN8`@6#ghy{P;)u#@v`U!&@R(pE4U}`Yhc-L$ zqT+8#qlK*(o7toN7`ncD*Q2yw!gD1~(Y#ZDWpSi-lA{sHDJJ`Y+#(025KZ1UveTFF z^0uV)2eqB{mcSyp;VoWh&bMSnujYDGdTke}{JH6pTcM?zT`BYQkHTEQNeUc~E+V!x zH&Ylkp0Q;fotn2&w|d>GvbZBQ1vu)Ae+-DOMxjuXi2<8sXwQpoLE{59_GfMO?@~#d zPAd|`iuH9^6Xk#aETQD zM#7WS^o#}&D>K4vccNobG@Rr2xu?8oPO|OR8fgm9shzX61`gzw-@I)z~KA z1+@I1ZoWoiT$s)UE7`&4e9IsUD)XIinAh(%Q*|t@yT*hs%-nqW3-ggV{rwY_2CCCI z4fv}9|NH_bPuis~*Lcqlv(k0R+_6k7JDHKE?GY`qT;okxGHC7g4GCklYtf3khy7XU ze*GbSk^UH!t_hf+fHT5!>AH)t58uE^7G!AK1cqOkVZ0?*-gX_%+lyJni3J{~=jT1F z$Dv0zWvqpeNEKuy4}VFLpfh^1iFY+uBNa@U7xTL=DZGZw*BQK$4})p%zmh%ea+*t- zYij>~?DR~J(;ds7a$m4l!=&R5sfxdsuQuk*XU<@ocAVng#rJCAsM&eb_Q%#eTDPDe zh;HbU-9IP|-qTW+7P>as`*!stY0ha4&=gtzY24T|= z_i*GTK`nj)r{`cggp(OwIE=FvG5w>bj2gtmuzWbJCAs#|4?2l*4s>vu5r^}c*b1dK z3)B}b(Nr7C*$*ydF?9wpQ_E$6oFCJ`<0@iz{5Jhns7l zm}&S+R|=E9KBpXWY(h?nGY>lumdgVNkafG?E9L+Kyo>@Iw^#M_pqNF(*s zIHT#=baehA!&^N>(lx6)pKhE8;BS~3X7Loi(sJ$}NfYaPlAK*N=+42$HsBfJvNtnU z;1bLn8qeT&2#)>R%w<`T8f9lCF)+jud&m5T(zH;x;iDkMLvySOaX2Ea1K&BOJXYWU z+@Lb*@CP^HZRjeO%??6vj<@d{mi8>}sLoxx6?ib^HuRR6@1AdyM$xjF*uEJtoI!04 zn+_VL^^g%cR^g;r_W!J%_9}b-ZF1&A)oJ0?pa@*M_?&L^?BhLk1KPvN-;7CCBc2@G zrEtBpP(eeB8+n{jRi;B^Xg8TmwQF*9)^eFHKtl6_%4=O*OK`Ug>FO@kR<7<@28Y_z z>wIs|?|R>zrT}-~ax{MN$Q4_ht*P9PwsLpll9P&mPOTjL+mGj~<71|1S#XjDuInY; zI(M*0gp8j4b3|yW`Bf_v1)t=zyj)EW=8h*z*@isA7Xu->x*@-6!$hRMMytUV6Klx? z#M0Z%?|{~;`X91g-Hkp)B-=!IpQ^bpQ#|pexWtpE> z8+TE|mbUG@#Rkf){NZ|I3$%7E^l7IbelkBfl%J!I(EW2eqn1iqgGLm^@X|=;YDSIe zCWS4H%e{5C@Exk#-`Anx9QB9BH>l8yH%=>$o6pY#Q26CJvi>>mRpxG5H|m+=@AAb= z#eeIIt&+#sr8SPys-@Yt89H`JvN0L6A8GE$GS7?@KF4FRAw?Etch|;tRI-NU7jH+> zi))7hRtV7Nl0Wc2ld1WZp8WXv52@m|YSo-`bkVX+w$H*sM8uw$ulYK9;_bPq_>L~- z6Y%?`%L`tSQ<}{?t2Y&djuA9T*dVu@D9$R4G+lO{EDPW5uMwsCDM$||<^lJ0QI(CB znrrH`PtEp-7_M96O71=jmqPcq_BgCkz^o6>@Z>SnhPHGgsYc9N_~uiltmC$(^DiA0 z%mHXQSY^kA!3~Ygh%-gQzw=b;lKd3AL*TjU!l-_rloU`lo5F_NnaCCrMZ) zpuCKxoIX2G?6+OiN>R9de<#lYc}o6x4yP9V?q=}gRu1m{*_VIQ*fRt}84D(ef`gir z@H|TzM2|iZBNB|yC0s-w#7_$|{ki>Sek0tjrb8)iBY}tYi1wn;(Z06BfLI~connl!0;UC|8~Rk?-Gdq;4fAAWY?h#aUS7z PjYv~fSEWSBBIthrF=%^} literal 0 HcmV?d00001 diff --git a/src/components/ChartTemplates.tsx b/src/components/ChartTemplates.tsx index 1d00bbb..347d125 100644 --- a/src/components/ChartTemplates.tsx +++ b/src/components/ChartTemplates.tsx @@ -25,6 +25,9 @@ import chartIconCustomLine from '../assets/chart-icon-custom-line-min.png'; import chartIconCustomBar from '../assets/chart-icon-custom-bar-min.png'; import chartIconCustomRect from '../assets/chart-icon-custom-rect-min.png'; import chartIconCustomArea from '../assets/chart-icon-custom-area-min.png'; +import chartIconPie from '../assets/chart-icon-pie-min.png'; +import chartIconUSMap from '../assets/chart-icon-us-map-min.png'; +import chartIconPyramid from '../assets/chart-icon-pyramid-min.png'; // Chart Icon Component using static imports const ChartIcon: React.FC<{ src: string; alt?: string }> = ({ src, alt = "" }) => { @@ -199,7 +202,7 @@ const barCharts: ChartTemplate[] = [ }, { "chart": "Pyramid Chart", - "icon": , + "icon": , "template": { "spacing": 0, @@ -327,7 +330,7 @@ const barCharts: ChartTemplate[] = [ const mapCharts: ChartTemplate[] = [ { "chart": "US Map with Points", - "icon": , + "icon": , "template": { "width": 500, "height": 300, @@ -376,7 +379,7 @@ const mapCharts: ChartTemplate[] = [ const pieCharts: ChartTemplate[] = [ { "chart": "Pie Chart", - "icon": , + "icon": , "template": { "mark": "arc", "encoding": { } From f80a2bdff4159bfbe2ad5728c7fb3259fa88f701 Mon Sep 17 00:00:00 2001 From: Chenglong Wang Date: Mon, 8 Dec 2025 13:54:04 -0800 Subject: [PATCH 29/37] some fixes with data loaders --- README.md | 14 +++++--- .../data_loader/azure_blob_data_loader.py | 13 ++++++++ .../data_loader/bigquery_data_loader.py | 33 +++++++++---------- .../data_loader/kusto_data_loader.py | 15 +++++++-- .../data_loader/mssql_data_loader.py | 17 ++++------ .../data_loader/mysql_data_loader.py | 13 +++++++- .../data_loader/s3_data_loader.py | 12 +++++++ pyproject.toml | 19 ++++++----- requirements.txt | 24 +++++++++----- 9 files changed, 108 insertions(+), 52 deletions(-) diff --git a/README.md b/README.md index 1435a33..ba567c4 100644 --- a/README.md +++ b/README.md @@ -12,11 +12,11 @@ -🪄 Turn data into insights with AI Agents, with the exploration paths you choose. Try Data Formulator now! +🪄 Explore data with visualizations, powered by AI agents. Try Data Formulator now! -- 🤖 New in v0.5: agent model + interative control [(video)](https://www.youtube.com/watch?v=GfTE2FLyMrs) -- 🔥🔥🔥 Try our online demo at [https://data-formulator.ai](https://data-formulator.ai) -- Any questions, thoughts? Discuss in the Discord channel! [![Discord](https://img.shields.io/badge/discord-chat-green?logo=discord)](https://discord.gg/mYCZMQKYZb) +- Interative exploration with AI agents [(video)](https://www.youtube.com/watch?v=GfTE2FLyMrs) +- Online demo available at [https://data-formulator.ai](https://data-formulator.ai) +- Discuss in the Discord channel! [![Discord](https://img.shields.io/badge/discord-chat-green?logo=discord)](https://discord.gg/mYCZMQKYZb) @@ -28,6 +28,12 @@ https://github.com/user-attachments/assets/8ca57b68-4d7a-42cb-bcce-43f8b1681ce2 ## News 🔥🔥🔥 +[12-08-2025] Data Formulator 0.5.1: More dataloaders and improved performance +- Data loaders: Google Big Query, MySQL (updated), Postgres (updated), MangoDB. +- New chart types: US Map, Pie Chart. +- Edit generated reports with [ChartArtifact](https://github.com/microsoft/chartifact). +- UI interaction gets much faster. +- Concept-level derivation is deprecated. [11-07-2025] Data Formulator 0.5: Vibe with your data, in control diff --git a/py-src/data_formulator/data_loader/azure_blob_data_loader.py b/py-src/data_formulator/data_loader/azure_blob_data_loader.py index 094ed6b..da97a3b 100644 --- a/py-src/data_formulator/data_loader/azure_blob_data_loader.py +++ b/py-src/data_formulator/data_loader/azure_blob_data_loader.py @@ -7,6 +7,13 @@ from typing import Dict, Any, List from data_formulator.security import validate_sql_query +try: + from azure.storage.blob import BlobServiceClient, ContainerClient + from azure.identity import DefaultAzureCredential, AzureCliCredential, ManagedIdentityCredential, EnvironmentCredential, ChainedTokenCredential + AZURE_BLOB_AVAILABLE = True +except ImportError: + AZURE_BLOB_AVAILABLE = False + class AzureBlobDataLoader(ExternalDataLoader): @staticmethod @@ -59,6 +66,12 @@ def auth_instructions() -> str: """ def __init__(self, params: Dict[str, Any], duck_db_conn: duckdb.DuckDBPyConnection): + if not AZURE_BLOB_AVAILABLE: + raise ImportError( + "Azure storage libraries are required for Azure Blob connections. " + "Install with: pip install azure-storage-blob azure-identity" + ) + self.params = params self.duck_db_conn = duck_db_conn diff --git a/py-src/data_formulator/data_loader/bigquery_data_loader.py b/py-src/data_formulator/data_loader/bigquery_data_loader.py index dec3930..c0383e8 100644 --- a/py-src/data_formulator/data_loader/bigquery_data_loader.py +++ b/py-src/data_formulator/data_loader/bigquery_data_loader.py @@ -70,7 +70,10 @@ def auth_instructions() -> str: def __init__(self, params: Dict[str, Any], duck_db_conn: duckdb.DuckDBPyConnection): if not BIGQUERY_AVAILABLE: - raise ImportError("google-cloud-bigquery is required for BigQuery connections. Install with: pip install google-cloud-bigquery") + raise ImportError( + "google-cloud-bigquery is required for BigQuery connections. " + "Install with: pip install google-cloud-bigquery google-auth" + ) self.params = params self.duck_db_conn = duck_db_conn @@ -285,22 +288,18 @@ def process_field(field, parent_path: str = ""): self.ingest_df_to_duckdb(df, name_as) - def view_query_sample(self, query: str) -> str: - """Execute query and return sample results as a JSON string""" - try: - result, error_message = validate_sql_query(query) - if not result: - raise ValueError(error_message) - - # Add LIMIT if not present - if "LIMIT" not in query.upper(): - query += " LIMIT 10" - - df = self.client.query(query).to_dataframe() - return df.to_json(orient="records") - except Exception as e: - log.error(f"Error executing query sample: {e}") - return "[]" + def view_query_sample(self, query: str) -> List[Dict[str, Any]]: + """Execute query and return sample results as a list of dictionaries""" + result, error_message = validate_sql_query(query) + if not result: + raise ValueError(error_message) + + # Add LIMIT if not present + if "LIMIT" not in query.upper(): + query += " LIMIT 10" + + df = self.client.query(query).to_dataframe() + return json.loads(df.to_json(orient="records")) def ingest_data_from_query(self, query: str, name_as: str) -> pd.DataFrame: """Execute custom query and ingest results into DuckDB""" diff --git a/py-src/data_formulator/data_loader/kusto_data_loader.py b/py-src/data_formulator/data_loader/kusto_data_loader.py index dc7ec64..8801621 100644 --- a/py-src/data_formulator/data_loader/kusto_data_loader.py +++ b/py-src/data_formulator/data_loader/kusto_data_loader.py @@ -8,11 +8,15 @@ import string from datetime import datetime -from azure.kusto.data import KustoClient, KustoConnectionStringBuilder -from azure.kusto.data.helpers import dataframe_from_result_table - from data_formulator.data_loader.external_data_loader import ExternalDataLoader, sanitize_table_name +try: + from azure.kusto.data import KustoClient, KustoConnectionStringBuilder + from azure.kusto.data.helpers import dataframe_from_result_table + KUSTO_AVAILABLE = True +except ImportError: + KUSTO_AVAILABLE = False + # Get logger for this module (logging config done in app.py) logger = logging.getLogger(__name__) @@ -57,6 +61,11 @@ def auth_instructions() -> str: """ def __init__(self, params: Dict[str, Any], duck_db_conn: duckdb.DuckDBPyConnection): + if not KUSTO_AVAILABLE: + raise ImportError( + "azure-kusto-data is required for Kusto/Azure Data Explorer connections. " + "Install with: pip install azure-kusto-data" + ) self.kusto_cluster = params.get("kusto_cluster", None) self.kusto_database = params.get("kusto_database", None) diff --git a/py-src/data_formulator/data_loader/mssql_data_loader.py b/py-src/data_formulator/data_loader/mssql_data_loader.py index 9464fa7..eeb8c4f 100644 --- a/py-src/data_formulator/data_loader/mssql_data_loader.py +++ b/py-src/data_formulator/data_loader/mssql_data_loader.py @@ -148,17 +148,12 @@ def __init__(self, params: Dict[str, Any], duck_db_conn: duckdb.DuckDBPyConnecti log.info("Initializing MSSQL DataLoader with parameters: %s", params) if not PYODBC_AVAILABLE: - error_msg = """ -pyodbc is required for MSSQL connections but is not properly installed. - -Installation steps for macOS: -1. Install unixodbc: brew install unixodbc -2. Install pyodbc: pip install pyodbc -3. Install Microsoft ODBC Driver for SQL Server - -For other platforms, see: https://github.com/mkleehammer/pyodbc/wiki -""" - raise ImportError(error_msg.strip()) + raise ImportError( + "pyodbc is required for MSSQL connections. " + "Install with: pip install pyodbc\n" + "Note for macOS: You may also need to run 'brew install unixodbc' first.\n" + "For other platforms, see: https://github.com/mkleehammer/pyodbc/wiki" + ) self.params = params self.duck_db_conn = duck_db_conn diff --git a/py-src/data_formulator/data_loader/mysql_data_loader.py b/py-src/data_formulator/data_loader/mysql_data_loader.py index ec84f04..9e29f3b 100644 --- a/py-src/data_formulator/data_loader/mysql_data_loader.py +++ b/py-src/data_formulator/data_loader/mysql_data_loader.py @@ -3,13 +3,18 @@ import pandas as pd import duckdb -import pymysql from data_formulator.data_loader.external_data_loader import ExternalDataLoader, sanitize_table_name from data_formulator.security import validate_sql_query from typing import Dict, Any, Optional, List +try: + import pymysql + PYMYSQL_AVAILABLE = True +except ImportError: + PYMYSQL_AVAILABLE = False + logger = logging.getLogger(__name__) @@ -54,6 +59,12 @@ def auth_instructions() -> str: """ def __init__(self, params: Dict[str, Any], duck_db_conn: duckdb.DuckDBPyConnection): + if not PYMYSQL_AVAILABLE: + raise ImportError( + "pymysql is required for MySQL connections. " + "Install with: pip install pymysql" + ) + self.params = params self.duck_db_conn = duck_db_conn diff --git a/py-src/data_formulator/data_loader/s3_data_loader.py b/py-src/data_formulator/data_loader/s3_data_loader.py index 51059ba..074172a 100644 --- a/py-src/data_formulator/data_loader/s3_data_loader.py +++ b/py-src/data_formulator/data_loader/s3_data_loader.py @@ -7,6 +7,12 @@ from typing import Dict, Any, List from data_formulator.security import validate_sql_query +try: + import boto3 + BOTO3_AVAILABLE = True +except ImportError: + BOTO3_AVAILABLE = False + class S3DataLoader(ExternalDataLoader): @staticmethod @@ -58,6 +64,12 @@ def auth_instructions() -> str: """ def __init__(self, params: Dict[str, Any], duck_db_conn: duckdb.DuckDBPyConnection): + if not BOTO3_AVAILABLE: + raise ImportError( + "boto3 is required for S3 connections. " + "Install with: pip install boto3" + ) + self.params = params self.duck_db_conn = duck_db_conn diff --git a/pyproject.toml b/pyproject.toml index f06aa27..2f7ef2a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,22 +26,25 @@ dependencies = [ "flask", "flask-cors", "openai", - "azure-identity", - "azure-kusto-data", - "azure-keyvault-secrets", - "azure-storage-blob", "python-dotenv", "vega_datasets", "litellm", "duckdb", - "pymysql", - "pyodbc", "numpy", "vl-convert-python", - "boto3", "backoff", "beautifulsoup4", - "scikit-learn" + "scikit-learn", + "azure-identity", + "azure-kusto-data", + "azure-keyvault-secrets", + "azure-storage-blob", + "google-cloud-bigquery", + "google-auth", + "db-dtypes", + "boto3", + "pymysql", + "pyodbc" ] [project.urls] diff --git a/requirements.txt b/requirements.txt index 5357205..e143134 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,22 +1,30 @@ +# Core dependencies (always required) jupyter pandas numpy flask flask-cors openai -azure-identity -azure-kusto-data -azure-keyvault-secrets -azure-storage-blob python-dotenv vega_datasets litellm duckdb -pymysql -boto3 -pyodbc vl-convert-python backoff beautifulsoup4 scikit-learn --e . #also need to install data formulator itself \ No newline at end of file + +# External data loaders (Azure, BigQuery, AWS S3, MySQL, MSSQL) +azure-identity +azure-kusto-data +azure-keyvault-secrets +azure-storage-blob +google-cloud-bigquery +google-auth +db-dtypes +boto3 +pymysql +pyodbc + +# Install data_formulator itself in editable mode +-e . From 9dab794318f5526f182917b6a5b9c8f56fa82a4a Mon Sep 17 00:00:00 2001 From: Chenglong Wang Date: Mon, 8 Dec 2025 14:51:40 -0800 Subject: [PATCH 30/37] fix and tested mongodb data loader --- .../data_loader/mongodb_data_loader.py | 72 ++++++++++--- src/views/DBTableManager.tsx | 102 ++++++++++++------ 2 files changed, 132 insertions(+), 42 deletions(-) diff --git a/py-src/data_formulator/data_loader/mongodb_data_loader.py b/py-src/data_formulator/data_loader/mongodb_data_loader.py index 2f3bbce..32ad8cd 100644 --- a/py-src/data_formulator/data_loader/mongodb_data_loader.py +++ b/py-src/data_formulator/data_loader/mongodb_data_loader.py @@ -24,7 +24,8 @@ def list_params() -> bool: {"name": "username", "type": "string", "required": False, "default": "", "description": ""}, {"name": "password", "type": "string", "required": False, "default": "", "description": ""}, {"name": "database", "type": "string", "required": True, "default": "", "description": ""}, - {"name": "collection", "type": "string", "required": False, "default": "", "description": "If specified, only this collection will be accessed"} + {"name": "collection", "type": "string", "required": False, "default": "", "description": "If specified, only this collection will be accessed"}, + {"name": "authSource", "type": "string", "required": False, "default": "", "description": "Authentication database (defaults to target database if empty)"} ] return params_list @@ -67,9 +68,17 @@ def __init__(self, params: Dict[str, Any], duck_db_conn: duckdb.DuckDBPyConnecti password = self.params.get("password", "") database = self.params.get("database", "") collection = self.params.get("collection", "") + auth_source = self.params.get("authSource", "") or database # Default to target database if username and password: - self.mongo_client = pymongo.MongoClient(host=host, port=port, username=username, password=password) + # Use authSource to specify which database contains user credentials + self.mongo_client = pymongo.MongoClient( + host=host, + port=port, + username=username, + password=password, + authSource=auth_source + ) else: self.mongo_client = pymongo.MongoClient(host=host, port=port) @@ -81,6 +90,28 @@ def __init__(self, params: Dict[str, Any], duck_db_conn: duckdb.DuckDBPyConnecti except Exception as e: raise Exception(f"Failed to connect to MongoDB: {e}") + def close(self): + """Close the MongoDB connection""" + if hasattr(self, 'mongo_client') and self.mongo_client is not None: + try: + self.mongo_client.close() + self.mongo_client = None + except Exception as e: + print(f"Warning: Failed to close MongoDB connection: {e}") + + def __enter__(self): + """Context manager entry""" + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + """Context manager exit - ensures connection is closed""" + self.close() + return False + + def __del__(self): + """Destructor to ensure connection is closed""" + self.close() + @staticmethod def _flatten_document(doc: Dict[str, Any], parent_key: str = '', sep: str = '_') -> Dict[str, Any]: """ @@ -237,7 +268,7 @@ def ingest_data(self, table_name: str, name_as: Optional[str] = None, size: int return - def view_query_sample(self, query: str) -> str: + def view_query_sample(self, query: str) -> List[Dict[str, Any]]: self._existed_collections_in_duckdb() self._difference_collections() @@ -271,17 +302,27 @@ def ingest_data_from_query(self, query: str, name_as: str) -> pd.DataFrame: self._difference_collections() self._preload_all_collections(self.collection.name if self.collection else "") - df = self.duck_db_conn.execute(query).df() + query_result_df = self.duck_db_conn.execute(query).df() self._drop_all_loaded_tables() - for collection_name, df in self.existed_collections.items(): - self._load_dataframe_to_duckdb(df, collection_name) + for collection_name, existing_df in self.existed_collections.items(): + self._load_dataframe_to_duckdb(existing_df, collection_name) - self._load_dataframe_to_duckdb(df, name_as) + self._load_dataframe_to_duckdb(query_result_df, name_as) - return df + return query_result_df + @staticmethod + def _quote_identifier(name: str) -> str: + """ + Safely quote a SQL identifier to prevent SQL injection. + Double quotes are escaped by doubling them. + """ + # Escape any double quotes in the identifier by doubling them + escaped = name.replace('"', '""') + return f'"{escaped}"' + def _existed_collections_in_duckdb(self): """ Return the names and contents of tables already loaded into DuckDB @@ -290,7 +331,8 @@ def _existed_collections_in_duckdb(self): duckdb_tables = self.duck_db_conn.execute("SHOW TABLES").df() for _, row in duckdb_tables.iterrows(): collection_name = row['name'] - df = self.duck_db_conn.execute(f"SELECT * FROM {collection_name}").df() + quoted_name = self._quote_identifier(collection_name) + df = self.duck_db_conn.execute(f"SELECT * FROM {quoted_name}").df() self.existed_collections[collection_name] = df @@ -311,7 +353,8 @@ def _drop_all_loaded_tables(self): """ for table_name in self.loaded_tables.values(): try: - self.duck_db_conn.execute(f"DROP TABLE IF EXISTS main.{table_name}") + quoted_name = self._quote_identifier(table_name) + self.duck_db_conn.execute(f"DROP TABLE IF EXISTS main.{quoted_name}") print(f"Dropped loaded table: {table_name}") except Exception as e: print(f"Warning: Failed to drop table '{table_name}': {e}") @@ -366,5 +409,10 @@ def _load_dataframe_to_duckdb(self, df: pd.DataFrame, table_name: str, size: int self.duck_db_conn.register(temp_view_name, df) # Use CREATE OR REPLACE to directly replace existing table - self.duck_db_conn.execute(f"CREATE OR REPLACE TABLE main.{table_name} AS SELECT * FROM {temp_view_name} LIMIT {size}") - self.duck_db_conn.execute(f"DROP VIEW {temp_view_name}") \ No newline at end of file + # Quote identifiers to prevent SQL injection + quoted_table_name = self._quote_identifier(table_name) + quoted_temp_view = self._quote_identifier(temp_view_name) + # Ensure size is an integer to prevent injection via size parameter + safe_size = int(size) + self.duck_db_conn.execute(f"CREATE OR REPLACE TABLE main.{quoted_table_name} AS SELECT * FROM {quoted_temp_view} LIMIT {safe_size}") + self.duck_db_conn.execute(f"DROP VIEW {quoted_temp_view}") \ No newline at end of file diff --git a/src/views/DBTableManager.tsx b/src/views/DBTableManager.tsx index 9bb97eb..da5bc8c 100644 --- a/src/views/DBTableManager.tsx +++ b/src/views/DBTableManager.tsx @@ -34,7 +34,8 @@ import { ToggleButtonGroup, ToggleButton, useTheme, - Link + Link, + Checkbox } from '@mui/material'; import DeleteIcon from '@mui/icons-material/Delete'; @@ -820,9 +821,14 @@ export const DBTableSelectionDialog: React.FC<{ onImport={() => { setIsUploading(true); }} - onFinish={(status, message) => { + onFinish={(status, message, importedTables) => { setIsUploading(false); - fetchTables(); + fetchTables().then(() => { + // Navigate to the first imported table after tables are fetched + if (status === "success" && importedTables && importedTables.length > 0) { + setSelectedTabKey(importedTables[0]); + } + }); if (status === "error") { setSystemMessage(message, "error"); } @@ -1030,7 +1036,7 @@ export const DataLoaderForm: React.FC<{ paramDefs: {name: string, default: string, type: string, required: boolean, description: string}[], authInstructions: string, onImport: () => void, - onFinish: (status: "success" | "error", message: string) => void + onFinish: (status: "success" | "error", message: string, importedTables?: string[]) => void }> = ({dataLoaderType, paramDefs, authInstructions, onImport, onFinish}) => { const dispatch = useDispatch(); @@ -1039,6 +1045,7 @@ export const DataLoaderForm: React.FC<{ const [tableMetadata, setTableMetadata] = useState>({}); let [displaySamples, setDisplaySamples] = useState>({}); let [tableFilter, setTableFilter] = useState(""); + const [selectedTables, setSelectedTables] = useState>(new Set()); const [displayAuthInstructions, setDisplayAuthInstructions] = useState(false); @@ -1086,7 +1093,12 @@ export const DataLoaderForm: React.FC<{ return [ toggleDisplaySamples(tableName)}> @@ -1103,33 +1115,20 @@ export const DataLoaderForm: React.FC<{ ))} - - + setSelectedTables(newSelected); + }} + /> , @@ -1155,6 +1154,49 @@ export const DataLoaderForm: React.FC<{

    , + mode === "view tables" && Object.keys(tableMetadata).length > 0 && + + , mode === "query" && ({name: t, fields: tableMetadata[t].columns.map((c: any) => c.name)}))} From 55412f684f1e084472631bab6819dce6c5f27890 Mon Sep 17 00:00:00 2001 From: Chenglong Wang Date: Mon, 8 Dec 2025 15:06:38 -0800 Subject: [PATCH 31/37] fixes --- src/assets/chart-icon-pie-min.png | Bin 4206 -> 8403 bytes src/assets/chart-icon-pyramid-min.png | Bin 1538 -> 1841 bytes src/views/ChartRecBox.tsx | 3 +++ 3 files changed, 3 insertions(+) diff --git a/src/assets/chart-icon-pie-min.png b/src/assets/chart-icon-pie-min.png index ae26ceecb0ae57000b980c755e2c14d7ef66499f..b53957c22b9ef6a2e5032a1dd608ba36362dad4f 100644 GIT binary patch literal 8403 zcmcI~_g7O*xb*~(8k&GasRk7g>Agn?phyk9SLq1SJ3$d5MMQ!~FA9isse&|vfKrv- zq<3io0@ChzzyIJ{>)xN{tjy$`XJ+>7{ha-r=Q>(y)RfmK0RYtZ@1ZdOz@SSQjGPo| zQUrntP($H)&%_%58t9sa0PFO4Zx8)=)knqHN6*8-$KS^5A@KM27jS;$>TPG^`B1>a z%Q1aR?iv7`;6D1Ue!#@$bYOt~(zL?PiFDPCREq0#V0-=(*Krvn7N)A=)ZgK3rjgP(jghDRf>P|rN@3u?9Szz&rw(shL7B$j(vw` zT&cpkC3j35ZXK>Pe3p^U^<0*o@y=ce>?=T%poiq0-H4E+fL<8ja4-}a4v@Bhj-D8c zLjZI;;QYVr0zfpK(@!&9g1@q?Ou%5UTAzN6IHwMC+md&f4*|C%2gvzhvC`Zn@Av}e z2HeoKC|zfXbUyUEyrQC_kj@uu++%r_YR}2duE#udbflGp+m&F}<>;@%tRtZoD68CP zX1geo0XKmHLt;N3!@-wN2`7wg90bvNJrDx#pDryf!hib88}W*p@FrhJrh-@6u04gk z#&-Xfg750o$o?DsUP36JUs4pnWB>Xdn( z<;~mPv1ot;&$VpHoL?D35;v?{ZI5h3uH_zYT#p|nH%xguw}%;e&8GBqPFwfiw?|^) z66b}(g0Ko*-kKlwB29a3WE}X%v0gOT=7{d4Yg|Yp*wO26_TNDc9^=F|)buJzIAF&b zeoc(-DJkujoQS6{@k4K?N7}GaxA@ry=3(9<)-2@v?3ct@S0k1$`c&vBg2eTEg~gJ? zqxxw>BJ(!Wb#?u2?2p2UcZlhaBg!`BePrs0jo;!;7yKFzJs(t&p{!Wf)ST1&o?_Nr zfV0(ksEf<(J{DmjF4c@!dM;H?;U8g!7r!JAKwteE&WcN}IR>w1+aqXq*G9tgs+}&r zov(JP7a0J<`spntLqMD}d0$&wyVkUHl&GE_TkP)t>q#P)n(fqB<-G?i&)MaJsyAAu zbzwWhDDXRoX>nD7b^KU>Qlf<-yn9OdsGsK`0w-6F|JOLVS?2Ii2-un$8X7hfIQz$T z$>r;tI|Z7Xvv$O@#Xer>M7rBwD=~Z!U8?MOsWrvq25?=1;kT!779JN(y{u1gL$Cl# zG<$!4|HYf+F94URYN+o0$vlPZX`>+XB-ZMK;Fpbz&(wdH*Zpb)61>Lxl4YHY~$ks zQXfTrH2QCAS(V=-$Bqyp+0VT)#&)%jMidkNO-zim`~^puqZ^?QjNiktPBQbdtS?+) zasxq~+M1eQFEMMVbS(N9-+3)y)^d*$TiZmWSZzyNTN(sk%FnK0XLlPF+Z=sVcfxPT zOw5`0$-InDW$_SnM82kGk$^MzXGBz2C+evC9?Ph@fXEff9D->Bo9y{HLOpge&fyJR*hG;BhHKhJ>+m7>-|hY&V@0W zBg-&^ngSmw_Nsc3yFiNTGaJNVVAHR+Zv1~igMLs@(8sT~fhb;IS-=4Hw`Vv~%tcu2 zR1;p{xWcLrK=V(om*9@d_QF{*?c0Vwdjmqn--Y5%{_O3g#Nyi2?Sao%-HS=&pBq?x zaPQu|*54+~T-@9&X;(h^o2r?C$aWU$os|o=bVL5=GBV{@c@Va`8{k)olqOka{PgKl zQA63q)z(AoGK4?)}o-azPPX=MDqhAbW5v)O_7<~xx`fHZ8I-rTRB>z>B? zRhgI5t|F-tvYCB}Zr4dsjom#x)zYjl9Fs8@e8^CZTMQ0;84-@sO6m4ZDsVn_k}l6q%8Zv~o{6|i zO!o0HG7?7ZOAu@?nJN2Hp}U|Q>miih`>Us?M;D5>)j3_#BkS9m&$%u-izzX(zFz81 zeV9U7^S|3~dNX9>(ZeIn_fF3Dvb=vmrbfu7^u2dkMfK%G~Ux2Ao~M<89o{aT|399uqpEDGG{ z6Lr;jHM0Haub3;gC~x-`zR-Kf68AgUXf~sAEmuYHLwR{PKQ}jbL@5C{5#~J2$vL&r z|K9XEm@8h$mxF}mZ72mf^;tKvuxm>#b!9zI-R*xp^Bb&mtYCoC#WDE zZ+)6_wbh`z)o$TQRUk;-f2qLi&TIb+Ek{b?ob>N~ea%Fz-JnN4>6PbXsp7IhsBjPV ziG5$^O{xpNYA04~yk4Q!=2x)lU@kCvhu(x~;y65HBVnN2>h|F)1h-h z25@Iw7bKd4=M=43FJEHKuJA7=wxma)JipWlEd={VDYY_@lNkEfZICn=0DA`q8nB}p zP#Zo+E95x~wy3A2NNy~cnjcN?^ggXxUtfPM0&I1(w6valvp3~JDvN2({W~7DS=G@x z^7SVbGxqjSTJGV40reuY46T)Z1Q15^=KENLU+L9OXk+qTvF&;Dqa=)I;FRv?uaq;! z70oUyPj>qx9{|q&9u?(Yel)NRcU*nlc?>b1`8S2!H%^HJdzaft&kmCMYZR`AV_+i-J6lsSCdRAr0_G)KQ}EszOGxW+589VIAdif5qh^ z6xcQAxd=%@Bw?;zNNkHk{5JKVK*LJ|&BB{NRFyvfJePmPiFD8$v`mwFoR!~|``}U3XaPVreNTKy#w(v+y?;h;&#Gvd= zu(bBE?g`DMe;P`CH)kl$j_jfd!p~uJ*P*ycdGo`_#H=<`5`|Nl9&9QwuFBidUk<(z z#V&eoe8-%VkpV^rC*`1{Pak*d@NW(kFHV+0#}{!6Gzf>hoz*WK45TK`iGBF+VLq&$ z0u*R{Qz^G>GAq<=ewX^dA4;%1RyD$evM6GdagD+pUGB@jV7O#qvCKdLP0G|A<=gWnZYQEr&MnN>l zal~APf;Ubb;)OdQ!oKkjh*2Y74Gj(XFcaq_K};@o4 zIB=}zS+-i+Yk4%!>!Y_I68`-Stj6s#eIf&H5-cL3A|}s=e4DUWB~gQn;6`fi^0p|D z8N>7Hk>1WI2xmF2Yhl+oSx}8;~oc5ua6cbsP#XV ztz9OP0N^oPBBG*>&`rAPA1{=EFcPq~mh=*jZ>M}GU^MD4_wO$UIS@+K@D~CyxamhN zaFhq=S_%x0p$mI>ciBMEX^S4$)A#M0bOLMuhR0BW9IF<6IJ+D!S&g0$bk1me=L#5j z(l$#(ijsBxG%)X#bL^~$PJH(rAGbR`7pz<62&(-hySH0Z(IsgGLV;c~d@qoF15-vfCdwUXhTn32)r@)kFUml4=E6$WqmffF}G1FT>iyiEPJrKnOs%9L6aal?wCq!2rc53KpfAi$;_4S;hsuogcnoFD zYfFSsfYWN{RXA33(MLuP#KEwMT+-)-tJbKwjBj&+fSY)>=OK|(*!9VZ+>$#n*P2u# z%MIC*va%pTliC9)2-ujQLk9!hA)y1R8M^oL^Yb4Oz>yw2C8J$F%Fa`_Fwfq3*cN3Y z1%-8s7(~IZ_Qd49&m%;7cSG)sBkL^?yWBgg(~3a)&`b6ec0b|)2(wyyQV;AE|P7mQ8v;LSjI*C%%x zCrI(%;brPRk+p)H)nqXz^!uz6VW6ek{oqQ{RlvCE)^rlBAOf@&RyiKkyvqtXp;al5 z%nF5O#u@TUFd_lG<#AX`z5H)18|Jw#kKM6Rd;vr0_GHjC(+k;5R69S19$><&X-A8- zeR>c}1xai{Oky`OEW!1RPpUMvf*98P<{2I^RF%S%0SvB|nwb_e= zdJCuZ3Xe9Q{M~#AGTIW6t?a0k)xko+nfj?ji*ZA5x;EdyMY4`5+QWS2*1v1VPXvMfp+}Liw9 zmy3*u*kw(aV|XU;uh}U=s7G~z_UPy z%}bq`Vay`dQHLNp_+yzJ5M%T7^z0Zun9*2A*+3_uig||u;Mw@>ZtCiPC@oge?1pI! zR%=5$;7AKtPi(Iw^L$_xRAg^{J1i6z3Z1tq7U1H#5pWBe_tTM#z<`aiL7ZF#g_0y= zZ2v;y2p~r3*|WLO=Ju7nVNsx^Y->AT2-2kBIZljlQe9(Xa-gHW89Q_kHwL=8y9Z$* zaG;hT0(ct4#D*3IatXN{heDvQM*v72jo&q7R7w8+{rkuv%ne#zSpLun@Lb)o5tERp z{J;~`adARL$o93E`2wVf8_GP0(gcg{2=M;LGh?;A%>Ryx`d4KC>r=?~;4m%A6_C+s z|B&~dj{K-HwDZM40tfm}gNyD!76)jBkph+HF)=Ya7Fcy&h*2j<02wVCnv?E7YMg8d z4?lYB*JkZWMLc_lC;yjg7A^VwATgP-1>^3o(8H#IPG#s3l29k)2++b9VXbN$Ocf10 zdBY<^tqZ#VpzTZ1M;oQpJkFz|u$&xg$!taq`c2din7)C5__ldkt;Ek!$=gJ&LUBg0 zoU$?+6_pJ5yL(&bEiBg>3`a*snmd6L8T5!^94icK!4cMe`z=o2$||}yeTJ5;`qyn9 zo*hvz5rIRb6_h%&lQMcDQ1nIb-;=8)uu~s^5fy@rS~n{|FB*XNWo37Db%l%Lk);Z| z%PbyI3Zvt0wFhvlpMBN1#PfT+pnjU&kW~fGz^%o(9lLvRvAnc2ECMEUEiJRk@Ilvr z!tEpSni}5{nHa=mNb^ooF&@*8?hbPOG1LXEkTkFuecE++Z0yTOe|mnaB+S>>X=Mho z5~kr;ngK7PVU}J3kmM%jMdI;{cKumRxe#6asNOPVtUa`X&F=N{=@=hT6el#`Z4ky) z%LfC4;Skr&s*s|{{sc9#09JtIH>>?fRP=yN8WRJ#0Z|PXg6=cTO&PF(2*~@ZU<7}E zPsb9XX{#uUie2b8MoX3L?6?4;p-hp{edhYLXyD}LW=@l1j;?sTAX|c7zucLSVt2;8 zaz0uu*BS*I_~ExXVL&$94|FPmC281Y+FjmhDgJ9Y7%_v^j>h-m0ueu>96LfiFg8}N z&+BWB-sx-EGfo^T`rh5WGvVs0WK+?$uzQ36nNw7BiP6eUSVDe&iTZl=+lz_O(Y(Ao zB490&8pcsNqY}GJ{iA_-r#!r=_xn37-Jm1S4XW_-O5cqU|Es`4l6i*?{2s4jJ~5Mz z&3wP3X#(^X_FlIOBw zj7Po=D{s+-75BWQ=@qezED-m;fd0kX898AKy}0NwG1~(iW8O zXApEGloj&l8+53Q^!4SgP|ff;5VdalF<*-?MNHf7)Vh!s7Rk)5L>f=5|gScOG}+5y*{RJLTf`!d0p6Y zJEnZJk$0roQG*oQze2^6|42uDSW-e_{uvYB+~JAIu+Mi0DbJU*<)QsZ++<>6QjGn& zUG#OG)TePe7;S$Yu*wH978^Ew|4^WZvGjw}DjZp_`tBQmW){Ax^XXdvK-};diF^RF z?(EA9iA03HZD^1=J*N#vnV3iea_n|+)2}z7f=yR8SgMYKs>*xVzR`Av(d~$N-L=iPXs3Po4ro{Bj z#cArItk>bhpd{R*RMyQLa)Y~s;=1t|>Vt!WfPblufh{1^xqh`{AwiK|W~QazO(I-Y zP*B?uwBt@sL&|Lsqd1(diOGM{W(Z2-^Yy;O5gK{29S$1Glz9(66*zm>xs79fur)W1 zWJ*+zZdjD#8>pNVM0e(61(ipSPqAAi?^nV3M)S$QzyJUTz`D-1_qbth4GlZ=|_jPTSKI#{!=@a<%gE} zLwlgYe@3Ci_!t>XG~JW?l&7BUv-=ywC;B;PK{KMnr- z`=NG@8QZZUVg75Z?yrw~#P!3Qeu|@PlS&(9DxApqgCGxv#1$p^h-5x1Tk`h$SM#=0 zVV!KH{^#j(?EO}auE9>zpAhKwuKs>2@>vrw*5pY0S}~w zQ&0HY&V|5S*}CT=)3HiuID{(lR!Hz}3v@6%$cDGxGASHzL7XB-PxKXgu z7XjR%<9m}|n~@TF-?JY7xD--r&FJdn)SC1lWnM5mP;9>$$Ax?a6%$6QpwZk*2SHY1 z@P*6y`tdW{oiWLRwr4CJUxeTiT8z?RpIKXuzJ?mt$edD^TR=8m_kWh^h7f4xB{_FT zsMca!T5z*XhY~i&dfI#E5jkswt{s6^Nyv%)QiZXsagEKo@}@D?<-JW2j`z91P(;65VrN2J&l@@5&A?nGJr*aTY7sTM%6(SW+cN4}wc^f(De!}$} z2FS6D%lw9AsH9-pHx3kR5gk7H`b9X-?dijv)mNRKN1nVa?_fx(B@kn~W`SA+D$o+P{A4}BZs?m*ukI%-4S9RDERx|+v>@y93cxiWE zG@S4~S#S=$x5(fNT4oYXLj<>sL&&m1ysoZqu;k$X#CDbYT4+Pg|4Ls!l!7`1itpD! zmRmA-jq9p_G6r1J$zPj}xV3ug_5WURCG=$n4uHbhBEy~VbzX==IFY$l?z-yy`$0{? z!o|AX(({BH37bRr}q1YPSY zglL~rj;`=pqI5ewl%10mPsTh%{$VK}J@M(T&qe?XL0Q@0dYl{*X%zkXCO5a;a?jPS z?>$fr3VKuh`Jx=k{*;ZIg2c}c%IlO9HlAQU{1S>&oZS2yz4zzO(Yhlk${xxEy&$h- z1~^2&KG>ROfy~7SE@LT#JsHhx6|~!Jcgl&qD0iNCYF77X zNh%vgmkHT@K0wiRjh+W`N1h4Wb+jfXKZ7a|nKE*?NFWr)3B|SZ$dcXCs_%Whvg26FExcB+6@DH<|ca;__Ry1f_|t#ENk@dJc%* z-_o@2FBT&7ohd%KbEaUF= zOR%W(?r%;q6QXFoR?I02hl~lmD=1JJtQMdG^u&VvV zk9lT-3ava49j=z;{#11`E8xA#-MbMStukPvKr`K;thl&aSKZ<^GUaxDM=UwC^#5fw zA=$L3y{pg8&rZ0+#hF4+{;IURzR8JU^v z!so(RP5{+5rBH-qEMQHZl`IT_-Qu)Ck%*b7BeNffMI^My|mXCFNmdb@G?0Y{~|to_>lMI z%a>cVQc&?c50OoV`u{4QTQ!+nKqUtK|GowI-z>lnZJu#-S}%qMzB6fu?ho#(XrarL Hte^Z3KlM@9 literal 4206 zcmZ`+c{r3^8=o12WUnkKB5NgUsSqP%$!jlBiK1-9m@z9e4>MyL`@V)LjD5*EWEq6A zFK>woNoA5Pgg47K^?rYR|9sbVp8Nit`~2?vcc16F&UKwbv$Zx8+%L5s1Of?Kn48#x zKwu68gZFW99DO-*h2st*O|K&z{O=*r?tz{lG#ahu{m>V2*FC^f%|Gyd=Ayn7hr}0P z?u-C|ggSnI;BNmy4-kk8WNYPUY8GIHL?VNbfym$EH$Vjia;&SXD+&eYfFML5ieou( z1RROvMBr!y9DzWf1AoU+h~G<`!Abwk@%R6C%E{u8kw^pw{22&NB4-H>|84(h!GnSj zNCe`K{Uh{8_kX7tjNgB&Ap_CqKk7jUPFI|C)bHZICH^G@1O)sBL4R7}h$0bx`uVr) ze>wkV{iY!hoV~QSwQg-~p}O`sk;;lPTykMS0Rt6;Xk)byLhyJT27?a5VbPDM6f_En z#h_YSnldxeDdaF3jT(%{Mny)jTANY%-Jt-H8cq%g#)VTzjSbAmh({DsD3L^ojfr|f z&%psC&i`y~dXB*a#Y9Kq0a65wN(jci=;)Z5nnG2MH$88_g;B|*Ff9E!jzmLs?oz`k z0GZO?--jw0%FoY@i;FHRElG%vO-@P(3ndZ>!L>EjsJ4yG&CTFY5)mK*p`mZyyw1+b zOixRx;B15#nv?x_eSN*SsDN2t%buHCSy@?JTqK17sOR4S3N@&97R6deb!>%@!mFw( z#>dCQNF+>FT^NOeda;eos3e4uGcwW|n03`v&lrrn#Kbr(23=IhsIIOoEiDQrh0o8= zKdY>G|Lz?wIz9I(y`sD{Kko@Pgiyf9+t}DBDK5l@l5vzMJRUnbI*Mvqn)&zC?4~q1rtKV5CBy3a$RlB+UhDgw>>EOMKI3uwI@if3AdoJt z_x~KA`yoV+V)Jw3h~Z@Kpg)}$JM^qYde`RoF|i5Xo+!|{;2gQoPgLQ>H?`8;lR-0N zi(x0~;~Z%4HvG=h^>0LEw4_G0w9DM+T_)O>%%91B?Q3~tiJImsIO_(Hi0z&lAULMf z-|KByxJoO_Iefj?l6?7Qm~%nZ=OtOlqP@!r;>yS`y#Y1XYWOej?)!qz zLbBeOu4VZP20)XZ{a8G(MP$0bPCYju?p1hFj%Z7_9@OY^`Z|nW-gw5FaDCaME?BT! zj%Cz;@p*5ZAG%3+bZM@9YMB}%q$HwX?hFMg!akHTiu&{V6wJe_Jq0al}&Qv{I_>Y z?!tzju2-qn+kx29yKq8wv&DDm;+#!9PlDn3dAmea2N4)%p?3oF!_Zo)IMLPc(e|@C zv(NL>y4)%~r2E1Lxx*_Bbmul1Cg9|GE3K>Gh#y(O*>yz&q)iI=gvGz66t$@%^?sOW9^!``o1)BKLIcXPwj4v?b^&#J&

    Sh?=Du!Q)73XHz`g}9!xr-Y`=4LvOYquK8pzc7lEB&{g! zhs`S&ew96(@^pa(8-FtHl&>1>grN(sv9KT%6aTTIFBFu|P(9dou1JPx$Q5CAbD!meybsLhTsON6$d)Sny?D(>YPlcE( zSzn^98bLF?#D|6?`SZWV@$qnNyxQioMK$dwNjY@i`e3WDs^aG43_0#-P7Uv9jJ*Qx zQM8xKe5t=XV0ZoW>Iv9Zl~8rPCgBnhphnH>*gv##xO>#rRZ zh^lk>09d9I!9An2s2?AN+RjwFp&+i4LjFpS?f6z3VTg_x#OhuTqvQCz5fHd0`|zH~ zo(k7%caSl-V#!2-kQF zlDUVukGfn!_%J&Uig(<2-LhJsO?4C>3%YiI$!&VckD!EhN<~5QI6o4Yks(*$#zu`b|1mZ=AXvI0Ozpb=@l`-QQL}%>n zg;jbM_2_#9WsHcYjPHG{9-O>6GYx^k+00|s)dy_re$As}T7q`Bx2mc-$@~&(s;=9A z^Q#}ND7%H&-dt&KZ*MwFp$Z?Hth(QeAY+d#sN(VCU075~S#ik?&w}z|=6M5ymh){! z#n_LR0w+eU->Lx%ux+Z3#^r?q+Y}4_g)|2Tn|G~a7`@KUWQ6pY)QOY{3wYc$jaJ!i zYpE@otsSgZbj|GeDR6&v`O}sbgAD^-;hgImdc0;a-v?2)_Epp8oS5~=6*A1eI@8oG zj~!2)GiP@~ea>NcOG}~ecYJm7K!*mM()Z=fTft7i;7cZksJr0H5GhMhD|qrF?RBxR zQtr}vHl{Lx_;%$3eFa^D0+p&D+8hvifL_sXWV^r7bR=8Gd;h zaD^V#sy=VezNe73CO1tuvYtPJ($$m{suYFkwt4Y5tuSNras;bIEA?d@6nQYmNAq~W z$`(>DXVX3q)~3Kpkn9ufB1&Jsb()OmUXCjRaN6tA4rQ1&m-bdbFyBgNU_$PGVd>}F zuunIk=282Ml4)qLa&^@Afnm8{ms3{NrY8@XXgK zi?u9M7bBxGeS?KBLMofKwz>{5lkbDy*-J{(+vh~-YHp@VVG6Vr`Q?4B5OeV;f**IB znQZh8u<~eM-+<`zVtINRzpeOkD7W1}DK`@%FP~)t{!6Dssvt6PLECrhP^CV9xzEkk zb6j8iWVls(?Acw4Q*J&%;0%a0Ls1r%b)&&Qzb-m~0esKbC`1lMT#-Fcg*~`j+W&G^ zputz`Z<@Up%m!xcQk*f?hp}t)2+Ib}^9y`(jJzF4Gab32&xZ+L134&&!BNHtEklTm z{0UosiL>117h$>Tc?$F=PxlYEBs5~`b{b86Ad5=9$MMBSTW4pue2*T3SRnwugm1`o zU`tU3s_)ioMi!SCj0rLhuKNkeK3#hpmdVfT5cG1T*8<&DLhT(+BjKSi>&7~WPF`O7 zf|A84o|<7;Cbyq?T1fzHnedb6OOlt|bj}qA_cT@S7(I&!ZK>B1mHjDs{aK%;3R1yEH(rY`vu~by z->zto_MJ?jI*JAS*yk)Lb-{6l@~~W^#n;}36`t&&`1k2sii7|mkm|S(-)7e}fxbDl z6_br4K%cUs=uoA8@BWT7Nt*h5UcEM^ZdNm3dLeK3>+oMQvG91tf0Oa3W|yZzlIC#9EFsV90_R}NpS~v>Q2Ijs)}@zxwyPVOqC@ssSNWhD zLGet+S6SI-2I-9e&jsHY(X>U?6>Br~b0hL!7!G0qdj;Z=Vv_77H8xex;ka^C4Dh zL6sdv_#`-M&I)s2KM)!E(Gz;%>WjR0Np=ZBD_Jx5hyp?{(h2cEN=4w6)Pxja8P{`` zy7{D>`W>&AQO#ynhBmC7$SS9|s=Tk3q{47aa~vbFfcnAcQYS58{?O1e^F)13q1M!{ zFL<=kvawq+*r(>ATQi}x z1+H2<^#Egi@F6~5x1N5cWbRJW@Jd6WMRRce&(6YE+09ulgOO($*IZ6c-n!_k7?&}8 z(>wES!`sK{n9pa%=jfVGKIEe;x(Nh#gJHwqxN1)(fM=JdPoBzMB0;NTTeBEx?nnDg`jS(sXzlwWcS`wymY B;uQb@ diff --git a/src/assets/chart-icon-pyramid-min.png b/src/assets/chart-icon-pyramid-min.png index d7d8343d7af5fd1a02f0052dc295fa1c263aaadc..25b2ffb22f125e091688e94fe4f2ccc90944dfac 100644 GIT binary patch delta 1425 zcmV;C1#bF+46zQ7U4I6?Nklh}K;1q!fL2}PeoDW%gax`*t`8>4}5QWYTw4E%Q(lR zl3zIUv*Q;XQ35bhW@oE+=UH;5WT#UrB4&EGrtfcjAhP_qGR`s2&^WTkVZZ#Z00x;U@^?H2j&}VM(r)VDQYpc7z z*SUrG+?S4&Vs3vwy&5l{KIH%!=7*CJlK}xHlaK`vf56~*GSGXV-R{Ks^RL{}_fQmh zU$5)zwBp>K*GnD_+dtB-#;&06OtaqSO&b7jv2Rk-RO4Oll_6`ln0X` z0w|L}0tu5q0tA16!SiIG9#^kkiAG~%?DnhZ^L5S5M*QxlCnsX9yWO?`IOX3h%d%)T zn-jbLMNvd+d((j+026jCRH;2v>2y{nU{~`SZG5?P;NrbgXFoKFYk&494{ya&4^8B8 zpA<#m?1xGH;4=ac06+i$0RRL55CA{`fN@jG8-W%V7Y~0WNit`71}{mnY-Q!-TJQF| zjx5#Z+R0%P8jdubJ^jMdi`xRw&)1Tqcsimu=&%`rB56g$Oz-CDEWNXc>{$aEj;v_j z7ZEQA!2e)jVIeA&)X_5_{rr(rnpR`)9UnBT0ol0ndSsoBOMshk;NZt&cGi`^Amn)- z>ubCJslyud+lVs6^hGS=^`p;6HYW;pS1jdga0YLx&0RRL55CA{`fN?eO`v8s|J2n>) zhm5wIk|fJke^ySe^=`lG$Wnd28=p0y@;dYLM^}ph|G)h(ZUp*h68Rr2S4kB~DP3;M86M~*txzHfBMxg46ICR7scoWAj`6N^UZ5kZ^UHXZd|=^&LDgP zxXEjZO64C}Ry<&|CB5dW7WLVD}14iHj!3aPA z0096501yB`000311ON~KVD#PeMyio2nE)FkUEm0E)`l|N8B~9|OVU@*^OVfdx2|Kmi#80$>D_ faRnI|jFkTXCcJojlC`DQRP1F@(CpwTFJ6L3J5=DMK$6gdp%8?TDH749xZ%t0tZA#eGxPjA zNr&2(r*`F;na}4s&n)}@{h$5LGs7%TmQo6!JLN_I0ssgAAd&YslaK@ylfni&lh6hd zfBaK#JOA>VH*a2(=kcU%ET_MJ*%!mZ!?$L7eBIE_p)At#?Y6Egi;sRhF#NS0?veoX z=IPlK*@w2VoauV~NJPvSfJo`_l*OjDTUd(Y5g`EG5`f!kwOaHqU1~SyaP`WSD2ig< z$FF#3L)^b|P36~h;moPHdf^uv+^qo^BOJUp9$dY~ZqDJv`0-K!Sa#p?Sh0G2<=0go zJud+Nkq-eulRN_@lTQN z$7C;m-s6lYCw=x(O90wf(ds!I(R!rQ*V$_H?TUzZW_o<>D?6TAyChCHBg$DnwDXzM z2j4p?0Bx?X@9xO*tmD?zY&4_UY?OLVmSs+exnB9(`0Mln@Gmwz`dHLzwT@fY`QJuj zWaQU*A3wN!MQnWW6g4Nr98MhiIL0oHwB_&aLqO7=)P>DVmLGbC^Ra z3IXUOKb-nLB0BGuSVd9p-jx%VN8|AR_ni=~0J=+~(dg>+=aIKk6n{DsN?r-%SyKSo zShJNj_Vo00{7h2Btbg3654`(bMD#l&I$ze#cQyt7@6{e40Du4h0ssgAAOL^>00IDX zibeU>+m^T9f7}&$w$^y7DQm^P5BBc;X0~J24edB&Qq`66C9j|PWc%-x3_!}Vr?V6r zZBsQBdHQl5hrM7@Ro=v6OJ%PBZdRn+dO*{q5f9laP6^?Ir2Jn`HngR79!$G(bN zlQ*jU7T~sl!GRbU7_@5@F+Mg{`Z>#2J#26la`xLJ2n+i2dXQSS>d zJvAMZlar=g5zSV!^mA_1#|*AQilWs%0XTd1Cp*^Wjvd@*2(AFi$+EZ*DgA7`6@A~A z@@w-r%>GqcHoYJ}C8cDR(5){9Mv?IoLhY~gSAn-~-MTc-^IoHa>Pl(6*t2KnV!~6NXB^x1ZPFsDB26?hXZ$5dj^OkOUi(kOUc# e;xQP&o$wdS3=G8r$6-tW0000 = function ({ tableId, placeHolde "& .MuiInput-underline:hover:not(.Mui-disabled):before": { borderBottom: 'none', }, + "& .MuiInput-underline:not(.Mui-disabled):before": { + borderBottom: 'none', + }, "& .MuiInput-underline:(.Mui-disabled):before": { borderBottom: 'none', }, From 95e23e7eb8cca6fc6dcba8536ef86081de5b72f9 Mon Sep 17 00:00:00 2001 From: Chenglong Wang Date: Mon, 8 Dec 2025 15:14:34 -0800 Subject: [PATCH 32/37] update readme and version --- README.md | 11 +++++------ pyproject.toml | 2 +- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index ba567c4..9e91ab4 100644 --- a/README.md +++ b/README.md @@ -28,12 +28,11 @@ https://github.com/user-attachments/assets/8ca57b68-4d7a-42cb-bcce-43f8b1681ce2 ## News 🔥🔥🔥 -[12-08-2025] Data Formulator 0.5.1: More dataloaders and improved performance -- Data loaders: Google Big Query, MySQL (updated), Postgres (updated), MangoDB. -- New chart types: US Map, Pie Chart. -- Edit generated reports with [ChartArtifact](https://github.com/microsoft/chartifact). -- UI interaction gets much faster. -- Concept-level derivation is deprecated. +[12-08-2025] **Data Formulator 0.5.1** — Connect more, visualize more, move faster +- 🔌 **Community data loaders**: Google BigQuery, MySQL, Postgres, MongoDB +- 📊 **New chart types**: US Map & Pie Chart (more to be added soon) +- ✏️ **Editable reports**: Refine generated reports with [Chartifact](https://github.com/microsoft/chartifact) in markdown style. [demo](https://github.com/microsoft/data-formulator/pull/200#issue-3635408217) +- ⚡ **Snappier UI**: Noticeably faster interactions across the board [11-07-2025] Data Formulator 0.5: Vibe with your data, in control diff --git a/pyproject.toml b/pyproject.toml index d72365d..71e11f8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "data_formulator" -version = "0.5.0.3" +version = "0.5.1" requires-python = ">=3.9" authors = [ From 68ac9e6ac69f4bd9f8905f84e325a20bade7deb0 Mon Sep 17 00:00:00 2001 From: Chenglong Wang Date: Mon, 8 Dec 2025 16:11:03 -0800 Subject: [PATCH 33/37] readme style --- README.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 9e91ab4..e5f5a8e 100644 --- a/README.md +++ b/README.md @@ -14,9 +14,10 @@ 🪄 Explore data with visualizations, powered by AI agents. Try Data Formulator now! -- Interative exploration with AI agents [(video)](https://www.youtube.com/watch?v=GfTE2FLyMrs) -- Online demo available at [https://data-formulator.ai](https://data-formulator.ai) -- Discuss in the Discord channel! [![Discord](https://img.shields.io/badge/discord-chat-green?logo=discord)](https://discord.gg/mYCZMQKYZb) + + + Try Online Demo + @@ -114,9 +115,9 @@ Here are milestones that lead to the current design: ## Overview -**Data Formulator** is an application from Microsoft Research that uses AI agents to make it easier to turn data into insights. +**Data Formulator** is a Microsoft Research prototype for data exploration with visualizations powered by AI agents. -Data Formulator is an AI-powered tool for analysts to iteratively explore and visualize data. Started with data in any format (screenshot, text, csv, or database), users can work with AI agents with a novel blended interface that combines *user interface interactions (UI)* and *natural language (NL) inputs* to communicate their intents, control branching exploration directions, and create reports to share their insights. +Data Formulator enables analysts to iteratively explore and visualize data. Started with data in any format (screenshot, text, csv, or database), users can work with AI agents with a novel blended interface that combines *user interface interactions (UI)* and *natural language (NL) inputs* to communicate their intents, control branching exploration directions, and create reports to share their insights. ## Get Started From 33d87822aeab94c6b99e5b235e40f31fee24c7ad Mon Sep 17 00:00:00 2001 From: Chenglong Wang Date: Mon, 8 Dec 2025 16:19:28 -0800 Subject: [PATCH 34/37] readme style --- README.md | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index e5f5a8e..1c0b035 100644 --- a/README.md +++ b/README.md @@ -12,12 +12,19 @@ -🪄 Explore data with visualizations, powered by AI agents. Try Data Formulator now! - - - - Try Online Demo - +

    + 🪄 Explore data with visualizations, powered by AI agents. +

    + +

    + + Try Online Demo + +    + + Install Locally + +

    From 99ee72b86a9c0298f1a415efa40a984caf1683ed Mon Sep 17 00:00:00 2001 From: Chenglong Wang Date: Mon, 8 Dec 2025 16:23:28 -0800 Subject: [PATCH 35/37] readme style --- README.md | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 1c0b035..80cd748 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,9 @@ -

    - Data Formulator icon Data Formulator: Vibe with data, in control +

    + Data Formulator icon  + Data Formulator: Vibe with data, in control

    -
    - -[![arxiv](https://img.shields.io/badge/Paper-arXiv:2408.16119-b31b1b.svg)](https://arxiv.org/abs/2408.16119)  -[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)  -[![YouTube](https://img.shields.io/badge/YouTube-white?logo=youtube&logoColor=%23FF0000)](https://www.youtube.com/watch?v=GfTE2FLyMrs)  -[![build](https://github.com/microsoft/data-formulator/actions/workflows/python-build.yml/badge.svg)](https://github.com/microsoft/data-formulator/actions/workflows/python-build.yml) -[![Discord](https://img.shields.io/badge/discord-chat-green?logo=discord)](https://discord.gg/mYCZMQKYZb) -

    🪄 Explore data with visualizations, powered by AI agents. @@ -26,6 +19,14 @@

    +

    + arXiv  + License: MIT  + YouTube  + build  + Discord +

    + https://github.com/user-attachments/assets/8ca57b68-4d7a-42cb-bcce-43f8b1681ce2 From 57676e2b79d6c5ebfe3ab999c0ad8af98b0bb549 Mon Sep 17 00:00:00 2001 From: Chenglong Wang Date: Mon, 8 Dec 2025 16:26:43 -0800 Subject: [PATCH 36/37] finally --- README.md | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 80cd748..f5d2288 100644 --- a/README.md +++ b/README.md @@ -1,22 +1,17 @@

    Data Formulator icon  - Data Formulator: Vibe with data, in control + Data Formulator: AI-powered Data Visualization

    -

    - 🪄 Explore data with visualizations, powered by AI agents. + 🪄 Explore data with visualizations, powered by AI agents.

    - - Try Online Demo - -    - - Install Locally - + Try Online Demo +   + Install Locally

    From 5939cc371279a4e84337681a77b45c9ba803c117 Mon Sep 17 00:00:00 2001 From: Chenglong Wang Date: Mon, 8 Dec 2025 16:31:15 -0800 Subject: [PATCH 37/37] use image for cleaner readme --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index f5d2288..992f666 100644 --- a/README.md +++ b/README.md @@ -23,12 +23,12 @@

    + -https://github.com/user-attachments/assets/8ca57b68-4d7a-42cb-bcce-43f8b1681ce2 - - + + + ## News 🔥🔥🔥