From d1eb48d42f9524f48b34accfbda6bd434b1768ca Mon Sep 17 00:00:00 2001 From: awatercolorpen Date: Sat, 4 Apr 2026 14:55:43 +0800 Subject: [PATCH] docs: add godoc comments to core APIs and improve README MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 1 tasks: Add godoc documentation for core APIs + Improve README Quick Start. - manager.go: package-level doc, Manager type, all public methods - client.go: ClientsOption, Clients type, all public methods - configuration.go: Configuration struct with field comments - database.go: DBOption struct with field docs, NewDB, getDialect - dictionary.go: Option, Dictionary, Translate, NewDictionary - run.go: RunChan, RunSync, BuildResultChan, BuildResultSync README improvements: - Architecture diagram showing query → dictionary → SQL → result flow - Complete runnable Quick Start with NewManager, BuildSQL, RunSync - Common patterns section: filters, streaming with RunChan, SQL inspection - Requirements section noting Go 1.22+ minimum - Cleaner documentation table --- README.md | 256 ++++++++++++++++++++++++++++------------------- client.go | 22 ++++ configuration.go | 13 ++- database.go | 20 +++- dictionary.go | 11 ++ manager.go | 39 ++++++++ run.go | 9 ++ 7 files changed, 259 insertions(+), 111 deletions(-) diff --git a/README.md b/README.md index 04d965b..1b35859 100644 --- a/README.md +++ b/README.md @@ -1,51 +1,37 @@ # olap-sql [![Go](https://github.com/AWaterColorPen/olap-sql/actions/workflows/go.yml/badge.svg)](https://github.com/AWaterColorPen/olap-sql/actions/workflows/go.yml) +[![Go Reference](https://pkg.go.dev/badge/github.com/awatercolorpen/olap-sql.svg)](https://pkg.go.dev/github.com/awatercolorpen/olap-sql) ## Introduction -olap-sql is golang library for generating **adapted sql** by **olap query** with metrics, dimension and filter. -Then get **formatted sql result** by queried metrics and dimension. +**olap-sql** is a Go library that turns high-level OLAP query definitions into adapted SQL for multiple database backends (ClickHouse, MySQL, PostgreSQL, SQLite). You describe *what* you want — metrics, dimensions, filters — and olap-sql figures out *how* to query it. -### Example +### How it works -There is unprocessed olap data with table named `wikistat`. - -| date | time | hits | -|------------|---------------------|------| -| 2021-05-07 | 2021-05-07 09:28:27 | 4783 | -| 2021-05-07 | 2021-05-07 09:33:59 | 1842 | -| 2021-05-07 | 2021-05-07 10:34:12 | 0 | -| 2021-05-06 | 2021-05-06 20:32:41 | 5 | -| 2021-05-06 | 2021-05-06 21:16:39 | 139 | - -It wants a sql to query the data with `metrics: sum(hits) / count(*)` and `dimension: date`. - -```sql -SELECT wikistat.date AS date, ( ( 1.0 * SUM(wikistat.hits) ) / NULLIF(( COUNT(*) ), 0) ) AS hits_avg FROM wikistat AS wikistat GROUP BY wikistat.date ``` - -It wants a sql to query the data with `metrics: sum(hits)` and `filter: date <= '2021-05-06'`. - -```sql -SELECT SUM(wikistat.hits) AS hits FROM wikistat AS wikistat WHERE wikistat.date <= '2021-05-06' +Query (metrics + dimensions + filters) + ↓ + Dictionary (schema/config) + ↓ + Clause (backend-specific IR) + ↓ + SQL string ──► Database ──► Result ``` -## Documentation +--- + +## Quick Start -1. [Configuration](./docs/configuration.md) to configure olap-sql instance and OLAP dictionary. -2. [Query](./docs/query.md) to define olap query. -3. [Result](./docs/result.md) to parse olap result. +### 1. Install -## Getting Started +```bash +go get github.com/awatercolorpen/olap-sql +``` -### Define the OLAP dictionary configuration file +### 2. Define the schema (TOML) -Create a new file for example named `olap-sql.toml` to define -[sets](./docs/configuration.md#sets), -[sources](./docs/configuration.md#sources), -[metrics](./docs/configuration.md#metrics), -[dimensions](./docs/configuration.md#dimensions). +Create `olap-sql.toml` describing your data model: ```toml sets = [ @@ -57,8 +43,8 @@ sources = [ ] metrics = [ - {data_source = "wikistat", type = "METRIC_SUM", name = "hits", field_name = "hits", value_type = "VALUE_INTEGER"}, - {data_source = "wikistat", type = "METRIC_COUNT", name = "count", field_name = "*", value_type = "VALUE_INTEGER"}, + {data_source = "wikistat", type = "METRIC_SUM", name = "hits", field_name = "hits", value_type = "VALUE_INTEGER"}, + {data_source = "wikistat", type = "METRIC_COUNT", name = "count", field_name = "*", value_type = "VALUE_INTEGER"}, {data_source = "wikistat", type = "METRIC_DIVIDE", name = "hits_avg", value_type = "VALUE_FLOAT", dependency = ["wikistat.hits", "wikistat.count"]}, ] @@ -67,99 +53,159 @@ dimensions = [ ] ``` -### To make use of olap-sql in golang +### 3. Create a Manager + +```go +package main + +import ( + "encoding/json" + "fmt" + "log" + + olapsql "github.com/awatercolorpen/olap-sql" + "github.com/awatercolorpen/olap-sql/api/types" +) + +func main() { + cfg := &olapsql.Configuration{ + // Map each DB type to a connection option. + ClientsOption: olapsql.ClientsOption{ + "clickhouse": { + DSN: "clickhouse://localhost:9000/default", + Type: types.DBTypeClickHouse, + }, + }, + // Point to your TOML schema file. + DictionaryOption: &olapsql.Option{ + AdapterOption: olapsql.AdapterOption{Dsn: "olap-sql.toml"}, + }, + } -Create a new [manager instance](./docs/configuration.md#manager-configuration). + manager, err := olapsql.NewManager(cfg) + if err != nil { + log.Fatal(err) + } -```golang -import "github.com/awatercolorpen/olap-sql" + // --- Build the query --- + queryJSON := `{ + "data_set_name": "wikistat", + "time_interval": {"name": "date", "start": "2021-05-06", "end": "2021-05-08"}, + "metrics": ["hits", "hits_avg"], + "dimensions": ["date"] + }` + + query := &types.Query{} + if err := json.Unmarshal([]byte(queryJSON), query); err != nil { + log.Fatal(err) + } -// set clients option -clientsOption := map[string]*olapsql.DBOption{ - "clickhouse": &olapsql.DBOption{ - DSN: "clickhouse://localhost:9000/default", - Type: "clickhouse" - } -}, + // --- (Optional) Inspect the generated SQL --- + sql, err := manager.BuildSQL(query) + if err != nil { + log.Fatal(err) + } + fmt.Println("Generated SQL:", sql) -// set dictionary option -dictionaryOption := olapsql.AdapterOption{ - Dsn: "olap_sql.toml", -} + // --- Run the query --- + result, err := manager.RunSync(query) + if err != nil { + log.Fatal(err) + } -// build manager configuration -configuration := &olapsql.Configuration{ - ClientsOption: clientsOption, - DictionaryOption: dictionaryOption, + out, _ := json.MarshalIndent(result, "", " ") + fmt.Println(string(out)) } - -// create a new manager instance -manager, err := olapsql.NewManager(configuration) ``` -Build olap-sql [query](./docs/query.md). +**Generated SQL** (ClickHouse): + +```sql +SELECT + wikistat.date AS date, + SUM(wikistat.hits) AS hits, + (1.0 * SUM(wikistat.hits)) / NULLIF(COUNT(*), 0) AS hits_avg +FROM wikistat AS wikistat +WHERE wikistat.date >= '2021-05-06' + AND wikistat.date < '2021-05-08' +GROUP BY wikistat.date +``` -```golang -import "github.com/awatercolorpen/olap-sql/api/types" +**Result JSON**: -queryJson := ` +```json { - "data_set_name": "wikistat", - "time_interval": { - "name": "date", - "start": "2021-05-06", - "end": "2021-05-08" - }, - "metrics": [ - "hits", - "hits_avg" - ], - "dimensions": [ - "date" + "dimensions": ["date", "hits", "hits_avg"], + "source": [ + {"date": "2021-05-06T00:00:00Z", "hits": 147, "hits_avg": 49}, + {"date": "2021-05-07T00:00:00Z", "hits": 7178, "hits_avg": 897.25} ] -}` +} +``` + +--- -query := &types.Query{} -err := json.Unmarshal([]byte(queryJson), query) +## Common Patterns + +### Add filters + +```go +query := &types.Query{ + DataSetName: "wikistat", + Metrics: []string{"hits"}, + Filters: []*types.Filter{ + { + OperatorType: types.FilterOperatorTypeLessEquals, + Name: "date", + Value: []any{"2021-05-06"}, + }, + }, +} +``` + +Generated SQL: + +```sql +SELECT SUM(wikistat.hits) AS hits +FROM wikistat AS wikistat +WHERE wikistat.date <= '2021-05-06' ``` -Run query to get result from manager. +### Stream large result sets + +For large queries, use `RunChan` to receive rows one at a time instead of buffering everything in memory: -```golang -// run query with parallel chan +```go result, err := manager.RunChan(query) +``` + +### Inspect SQL without executing + +Use `BuildSQL` to preview the generated query (useful for debugging): -// run query with sync -result, err := manager.RunSync(query) +```go +sql, err := manager.BuildSQL(query) +fmt.Println(sql) ``` -### Generate SQL then format result +--- -Firstly, auto generate sql. [For detail](./docs/query.md#generate-sql-from-query). +## Documentation -Then, get [result](./docs/result.md) json with `dimensions` property and `source` property. +| Document | Description | +|----------|-------------| +| [Configuration](./docs/configuration.md) | Configure Manager, clients, and the OLAP dictionary | +| [Query](./docs/query.md) | Define metrics, dimensions, filters, orders, and limits | +| [Result](./docs/result.md) | Parse and work with query results | -```json -{ - "dimensions": [ - "date", - "hits", - "hits_avg" - ], - "source": [ - { - "date": "2021-05-06T00:00:00Z", - "hits": 147, - "hits_avg": 49 - }, - { - "date": "2021-05-07T00:00:00Z", - "hits": 7178, - "hits_avg": 897.25 - } - ] -} -``` +--- + +## Requirements + +- **Go 1.22+** (uses range-over-integer syntax) +- Supported databases: ClickHouse, MySQL, PostgreSQL, SQLite + +--- ## License diff --git a/client.go b/client.go index 7f828ea..5929481 100644 --- a/client.go +++ b/client.go @@ -8,15 +8,24 @@ import ( "gorm.io/gorm/logger" ) +// ClientsOption is a map from connection-key to DBOption. +// The key is used to look up the correct database connection when running a query. +// Typical keys follow the pattern "" (e.g. "clickhouse") or +// "/" for dataset-scoped connections. type ClientsOption = map[string]*DBOption +// Clients is a registry of open *gorm.DB connections, keyed by "" or "/". type Clients map[string]*gorm.DB +// RegisterByKV registers a *gorm.DB connection under the composite key derived from dbType and dataset. func (c Clients) RegisterByKV(dbType types.DBType, dataset string, db *gorm.DB) { key := c.key(dbType, dataset) c[key] = db } +// RegisterByOption opens database connections for each entry in option +// and registers them in the Clients map. +// Returns an error if any connection cannot be established. func (c Clients) RegisterByOption(option ClientsOption) error { for k, v := range option { db, err := v.NewDB() @@ -28,12 +37,18 @@ func (c Clients) RegisterByOption(option ClientsOption) error { return nil } +// SetLogger replaces the GORM logger on every registered connection. +// Call this to enable SQL statement logging or to plug in a custom logger. func (c Clients) SetLogger(log logger.Interface) { for _, v := range c { v.Config.Logger = log } } +// Get returns the *gorm.DB for the given dbType and dataset. +// If no dataset-specific connection is registered, it falls back to the +// type-level connection (dataset == ""). +// Returns an error if neither key is found. func (c Clients) Get(dbType types.DBType, dataset string) (*gorm.DB, error) { key1 := c.key(dbType, dataset) if v, ok := c[key1]; ok { @@ -46,6 +61,7 @@ func (c Clients) Get(dbType types.DBType, dataset string) (*gorm.DB, error) { return nil, fmt.Errorf("not found client %v %v", dbType, dataset) } +// key builds the internal lookup key for a (dbType, dataset) pair. func (c Clients) key(dbType types.DBType, dataset string) string { if dataset == "" { return fmt.Sprintf("%v", dbType) @@ -53,6 +69,8 @@ func (c Clients) key(dbType types.DBType, dataset string) string { return fmt.Sprintf("%v/%v", dbType, dataset) } +// BuildDB selects the correct client for the clause and constructs +// a *gorm.DB with the translated query applied. func (c Clients) BuildDB(clause types.Clause) (*gorm.DB, error) { client, err := c.Get(clause.GetDBType(), clause.GetDataset()) if err != nil { @@ -61,6 +79,8 @@ func (c Clients) BuildDB(clause types.Clause) (*gorm.DB, error) { return clause.BuildDB(client) } +// BuildSQL selects the correct client for the clause and returns +// the SQL string that would be executed, without actually running it. func (c Clients) BuildSQL(clause types.Clause) (string, error) { client, err := c.Get(clause.GetDBType(), clause.GetDataset()) if err != nil { @@ -69,6 +89,8 @@ func (c Clients) BuildSQL(clause types.Clause) (string, error) { return clause.BuildSQL(client) } +// NewClients creates a Clients registry by opening connections for each DBOption in option. +// Returns an error if any connection fails to open. func NewClients(option ClientsOption) (Clients, error) { c := Clients{} if err := c.RegisterByOption(option); err != nil { diff --git a/configuration.go b/configuration.go index de7a716..d9ed4e2 100644 --- a/configuration.go +++ b/configuration.go @@ -1,7 +1,14 @@ package olapsql +// Configuration holds the top-level options used to initialise a Manager. +// Both fields are optional; omitting ClientsOption creates a dictionary-only +// instance (useful for SQL generation without execution), and omitting +// DictionaryOption creates a client-only instance. type Configuration struct { - // configurations for clients, data_dictionary - ClientsOption ClientsOption `json:"clients_option"` - DictionaryOption *Option `json:"dictionary_option"` + // ClientsOption maps connection-key strings to database connection options. + // Each key typically corresponds to a database type (e.g. "clickhouse"). + ClientsOption ClientsOption `json:"clients_option"` + + // DictionaryOption configures the OLAP schema adapter (e.g. a TOML file path). + DictionaryOption *Option `json:"dictionary_option"` } diff --git a/database.go b/database.go index 8d2cce8..39ad745 100644 --- a/database.go +++ b/database.go @@ -11,12 +11,25 @@ import ( "gorm.io/gorm" ) +// DBOption holds the connection parameters for a single database instance. type DBOption struct { - Debug bool `json:"debug"` - DSN string `json:"dsn"` - Type types.DBType `json:"type"` + // Debug enables GORM's debug mode, which prints every generated SQL statement. + Debug bool `json:"debug"` + + // DSN is the data source name (connection string) for the database. + // Format depends on the driver, e.g.: + // ClickHouse: "clickhouse://user:pass@host:9000/db" + // MySQL: "user:pass@tcp(host:3306)/db?charset=utf8" + // PostgreSQL: "host=host user=user password=pass dbname=db port=5432" + // SQLite: "/path/to/file.db" + DSN string `json:"dsn"` + + // Type identifies the database engine. Supported values: clickhouse, mysql, postgre, sqlite. + Type types.DBType `json:"type"` } +// NewDB opens a database connection using the DBOption settings. +// Returns a configured *gorm.DB or an error if the DSN or type is invalid. func (o *DBOption) NewDB() (*gorm.DB, error) { dialect, err := getDialect(o.Type, o.DSN) if err != nil { @@ -34,6 +47,7 @@ func (o *DBOption) NewDB() (*gorm.DB, error) { return db, nil } +// getDialect maps a DBType to the corresponding GORM Dialector. func getDialect(ty types.DBType, dsn string) (gorm.Dialector, error) { switch ty { case types.DBTypeSQLite: diff --git a/dictionary.go b/dictionary.go index 1ab3439..44dcf57 100644 --- a/dictionary.go +++ b/dictionary.go @@ -4,13 +4,19 @@ import ( "github.com/awatercolorpen/olap-sql/api/types" ) +// Option wraps AdapterOption and is the configuration type for the OLAP dictionary. type Option struct { AdapterOption } + +// Dictionary holds the schema adapter and provides query translation capabilities. +// It converts a high-level [types.Query] into a backend-specific [types.Clause]. type Dictionary struct { Adapter IAdapter } +// Translator builds a [Translator] for the given query by looking up the +// target data set in the adapter and wiring up the appropriate translator options. func (d *Dictionary) Translator(query *types.Query) (Translator, error) { set, err := d.Adapter.GetDataSetByKey(query.DataSetName) if err != nil { @@ -25,6 +31,8 @@ func (d *Dictionary) Translator(query *types.Query) (Translator, error) { return NewTranslator(option) } +// Translate is a convenience method that builds a Translator and runs it in one call. +// It returns the resulting [types.Clause] ready to be executed against a database client. func (d *Dictionary) Translate(query *types.Query) (types.Clause, error) { translator, err := d.Translator(query) if err != nil { @@ -33,6 +41,9 @@ func (d *Dictionary) Translate(query *types.Query) (types.Clause, error) { return translator.Translate(query) } +// NewDictionary creates a Dictionary from the provided Option. +// The option's DSN is used to locate and parse the schema file (e.g. a TOML config). +// Returns an error if the adapter cannot be initialised. func NewDictionary(option *Option) (*Dictionary, error) { adapter, err := NewAdapter(&option.AdapterOption) if err != nil { diff --git a/manager.go b/manager.go index 38e9577..9ee6abb 100644 --- a/manager.go +++ b/manager.go @@ -1,3 +1,17 @@ +// Package olapsql provides a Go library for generating adapted SQL from OLAP queries. +// It supports metrics, dimensions, and filters to automatically produce SQL +// for multiple database backends (ClickHouse, MySQL, PostgreSQL, SQLite). +// +// Basic usage: +// +// cfg := &olapsql.Configuration{ +// ClientsOption: map[string]*olapsql.DBOption{ +// "clickhouse": {DSN: "clickhouse://localhost:9000/default", Type: "clickhouse"}, +// }, +// DictionaryOption: &olapsql.Option{AdapterOption: olapsql.AdapterOption{Dsn: "olap-sql.toml"}}, +// } +// manager, err := olapsql.NewManager(cfg) +// result, err := manager.RunSync(query) package olapsql import ( @@ -8,11 +22,16 @@ import ( "gorm.io/gorm/logger" ) +// Manager is the main entry point for olap-sql. +// It holds a set of database clients and an OLAP dictionary, +// and exposes methods to build or run OLAP queries. type Manager struct { clients Clients dictionary *Dictionary } +// GetClients returns the registered database clients. +// Returns an error if the Manager was not initialised with a ClientsOption. func (m *Manager) GetClients() (Clients, error) { if m.clients == nil { return nil, fmt.Errorf("it is no initialization") @@ -20,6 +39,8 @@ func (m *Manager) GetClients() (Clients, error) { return m.clients, nil } +// GetDictionary returns the OLAP dictionary used for schema translation. +// Returns an error if the Manager was not initialised with a DictionaryOption. func (m *Manager) GetDictionary() (*Dictionary, error) { if m.dictionary == nil { return nil, fmt.Errorf("it is no initialization") @@ -27,6 +48,8 @@ func (m *Manager) GetDictionary() (*Dictionary, error) { return m.dictionary, nil } +// SetLogger attaches a custom GORM logger to all registered database clients. +// Call this after creating the Manager to enable query logging. func (m *Manager) SetLogger(log logger.Interface) { c, err := m.GetClients() if err == nil { @@ -34,6 +57,9 @@ func (m *Manager) SetLogger(log logger.Interface) { } } +// RunSync executes the OLAP query synchronously and returns the result. +// It translates the query into SQL, runs it against the target database, +// and returns a structured Result containing dimensions and row data. func (m *Manager) RunSync(query *types.Query) (*types.Result, error) { db, err := m.BuildTransaction(query) if err != nil { @@ -46,6 +72,9 @@ func (m *Manager) RunSync(query *types.Query) (*types.Result, error) { return BuildResultSync(query, rows) } +// RunChan executes the OLAP query and streams rows over a channel. +// This is useful for large result sets where you want to process rows +// as they arrive rather than buffering them all in memory. func (m *Manager) RunChan(query *types.Query) (*types.Result, error) { db, err := m.BuildTransaction(query) if err != nil { @@ -58,6 +87,9 @@ func (m *Manager) RunChan(query *types.Query) (*types.Result, error) { return BuildResultChan(query, rows) } +// BuildTransaction translates a Query into a *gorm.DB ready to execute. +// Use this when you need direct access to the GORM DB object, +// for example to add custom GORM hooks or inspect the generated SQL. func (m *Manager) BuildTransaction(query *types.Query) (*gorm.DB, error) { clients, clause, err := m.build(query) if err != nil { @@ -66,6 +98,8 @@ func (m *Manager) BuildTransaction(query *types.Query) (*gorm.DB, error) { return clients.BuildDB(clause) } +// BuildSQL translates a Query into its SQL string without executing it. +// Useful for debugging, logging, or displaying the generated SQL to users. func (m *Manager) BuildSQL(query *types.Query) (string, error) { clients, clause, err := m.build(query) if err != nil { @@ -74,6 +108,8 @@ func (m *Manager) BuildSQL(query *types.Query) (string, error) { return clients.BuildSQL(clause) } +// build is the internal helper that resolves the dictionary and clients +// needed to translate and execute a query. func (m *Manager) build(query *types.Query) (Clients, types.Clause, error) { query.TranslateTimeIntervalToFilter() dict, err := m.GetDictionary() @@ -91,6 +127,9 @@ func (m *Manager) build(query *types.Query) (Clients, types.Clause, error) { return clients, clause, nil } +// NewManager creates and initialises a Manager from the provided Configuration. +// At least one of ClientsOption or DictionaryOption should be set. +// Returns an error if any client DSN is invalid or the dictionary file cannot be parsed. func NewManager(configuration *Configuration) (*Manager, error) { m := &Manager{} if configuration.ClientsOption != nil { diff --git a/run.go b/run.go index 49b4b5c..c47f826 100644 --- a/run.go +++ b/run.go @@ -5,6 +5,10 @@ import ( "gorm.io/gorm" ) +// RunChan executes the prepared *gorm.DB query and returns results over a channel. +// Each row is scanned into a map[string]any and sent on the returned channel. +// The channel is closed automatically when all rows have been consumed. +// This is suitable for streaming large result sets without loading them all into memory. func RunChan(db *gorm.DB) (chan map[string]any, error) { rows, err := db.Rows() if err != nil { @@ -26,11 +30,15 @@ func RunChan(db *gorm.DB) (chan map[string]any, error) { return ch, nil } +// RunSync executes the prepared *gorm.DB query and returns all rows as a slice. +// Prefer RunSync for small-to-medium result sets; use RunChan for very large ones. func RunSync(db *gorm.DB) ([]map[string]any, error) { var result []map[string]any return result, db.Scan(&result).Error } +// BuildResultChan collects rows from a streaming channel and assembles a Result. +// It sets the dimension/metric names from the query and appends each incoming row. func BuildResultChan(query *types.Query, in chan map[string]any) (*types.Result, error) { result := &types.Result{} result.SetDimensions(query) @@ -42,6 +50,7 @@ func BuildResultChan(query *types.Query, in chan map[string]any) (*types.Result, return result, nil } +// BuildResultSync wraps a slice of rows into a Result with dimension/metric metadata. func BuildResultSync(query *types.Query, in []map[string]any) (*types.Result, error) { result := &types.Result{} result.SetDimensions(query)