diff --git a/backend/apps/chat/models/chat_model.py b/backend/apps/chat/models/chat_model.py index d24e19e1..f20c4551 100644 --- a/backend/apps/chat/models/chat_model.py +++ b/backend/apps/chat/models/chat_model.py @@ -1,6 +1,6 @@ from datetime import datetime from enum import Enum -from typing import List, Optional +from typing import List, Optional, Union from fastapi import Body from pydantic import BaseModel @@ -9,13 +9,14 @@ from sqlalchemy.dialects.postgresql import JSONB from sqlmodel import SQLModel, Field +from apps.db.constant import DB from apps.template.filter.generator import get_permissions_template from apps.template.generate_analysis.generator import get_analysis_template from apps.template.generate_chart.generator import get_chart_template from apps.template.generate_dynamic.generator import get_dynamic_template from apps.template.generate_guess_question.generator import get_guess_question_template from apps.template.generate_predict.generator import get_predict_template -from apps.template.generate_sql.generator import get_sql_template +from apps.template.generate_sql.generator import get_sql_template, get_sql_example_template from apps.template.select_datasource.generator import get_datasource_template @@ -182,10 +183,27 @@ class AiModelQuestion(BaseModel): custom_prompt: str = "" error_msg: str = "" - def sql_sys_question(self): + def sql_sys_question(self, db_type: Union[str, DB], enable_query_limit: bool = True): + _sql_template = get_sql_example_template(db_type) + _base_sql_rules = _sql_template['quot_rule'] + _sql_template['limit_rule'] + _sql_template['other_rule'] + _query_limit = get_sql_template()['query_limit'] if enable_query_limit else '' + _sql_examples = _sql_template['basic_example'] + _example_engine = _sql_template['example_engine'] + _example_answer_1 = _sql_template['example_answer_1_with_limit'] if enable_query_limit else _sql_template[ + 'example_answer_1'] + _example_answer_2 = _sql_template['example_answer_2_with_limit'] if enable_query_limit else _sql_template[ + 'example_answer_2'] + _example_answer_3 = _sql_template['example_answer_3_with_limit'] if enable_query_limit else _sql_template[ + 'example_answer_3'] return get_sql_template()['system'].format(engine=self.engine, schema=self.db_schema, question=self.question, lang=self.lang, terminologies=self.terminologies, - data_training=self.data_training, custom_prompt=self.custom_prompt) + data_training=self.data_training, custom_prompt=self.custom_prompt, + base_sql_rules=_base_sql_rules, query_limit=_query_limit, + basic_sql_examples=_sql_examples, + example_engine=_example_engine, + example_answer_1=_example_answer_1, + example_answer_2=_example_answer_2, + example_answer_3=_example_answer_3) def sql_user_question(self, current_time: str): return get_sql_template()['user'].format(engine=self.engine, schema=self.db_schema, question=self.question, diff --git a/backend/apps/chat/task/llm.py b/backend/apps/chat/task/llm.py index 83f08c96..049a0334 100644 --- a/backend/apps/chat/task/llm.py +++ b/backend/apps/chat/task/llm.py @@ -171,7 +171,8 @@ def init_messages(self): self.sql_message = [] # add sys prompt - self.sql_message.append(SystemMessage(content=self.chat_question.sql_sys_question())) + self.sql_message.append(SystemMessage( + content=self.chat_question.sql_sys_question(self.ds.type, settings.GENERATE_SQL_QUERY_LIMIT_ENABLED))) if last_sql_messages is not None and len(last_sql_messages) > 0: # limit count for last_sql_message in last_sql_messages[count_limit:]: diff --git a/backend/apps/db/constant.py b/backend/apps/db/constant.py index abb01355..ddd456c8 100644 --- a/backend/apps/db/constant.py +++ b/backend/apps/db/constant.py @@ -13,29 +13,33 @@ def __init__(self, type_name): class DB(Enum): - mysql = ('mysql', 'MySQL', '`', '`', ConnectType.sqlalchemy) - sqlServer = ('sqlServer', 'Microsoft SQL Server', '[', ']', ConnectType.sqlalchemy) - pg = ('pg', 'PostgreSQL', '"', '"', ConnectType.sqlalchemy) - excel = ('excel', 'Excel/CSV', '"', '"', ConnectType.sqlalchemy) - oracle = ('oracle', 'Oracle', '"', '"', ConnectType.sqlalchemy) - ck = ('ck', 'ClickHouse', '"', '"', ConnectType.sqlalchemy) - dm = ('dm', '达梦', '"', '"', ConnectType.py_driver) - doris = ('doris', 'Apache Doris', '`', '`', ConnectType.py_driver) - redshift = ('redshift', 'AWS Redshift', '"', '"', ConnectType.py_driver) - es = ('es', 'Elasticsearch', '"', '"', ConnectType.py_driver) - kingbase = ('kingbase', 'Kingbase', '"', '"', ConnectType.py_driver) - starrocks = ('starrocks', 'StarRocks', '`', '`', ConnectType.py_driver) - - def __init__(self, type, db_name, prefix, suffix, connect_type: ConnectType): + excel = ('excel', 'Excel/CSV', '"', '"', ConnectType.sqlalchemy, 'PostgreSQL') + redshift = ('redshift', 'AWS Redshift', '"', '"', ConnectType.py_driver, 'AWS_Redshift') + ck = ('ck', 'ClickHouse', '"', '"', ConnectType.sqlalchemy, 'ClickHouse') + dm = ('dm', '达梦', '"', '"', ConnectType.py_driver, 'DM') + doris = ('doris', 'Apache Doris', '`', '`', ConnectType.py_driver, 'Doris') + es = ('es', 'Elasticsearch', '"', '"', ConnectType.py_driver, 'Elasticsearch') + kingbase = ('kingbase', 'Kingbase', '"', '"', ConnectType.py_driver, 'Kingbase') + sqlServer = ('sqlServer', 'Microsoft SQL Server', '[', ']', ConnectType.sqlalchemy, 'Microsoft_SQL_Server') + mysql = ('mysql', 'MySQL', '`', '`', ConnectType.sqlalchemy, 'MySQL') + oracle = ('oracle', 'Oracle', '"', '"', ConnectType.sqlalchemy, 'Oracle') + pg = ('pg', 'PostgreSQL', '"', '"', ConnectType.sqlalchemy, 'PostgreSQL') + starrocks = ('starrocks', 'StarRocks', '`', '`', ConnectType.py_driver, 'StarRocks') + + def __init__(self, type, db_name, prefix, suffix, connect_type: ConnectType, template_name: str): self.type = type self.db_name = db_name self.prefix = prefix self.suffix = suffix self.connect_type = connect_type + self.template_name = template_name @classmethod - def get_db(cls, type): + def get_db(cls, type, default_if_none=False): for db in cls: if db.type == type: return db - raise ValueError(f"Invalid db type: {type}") + if default_if_none: + return DB.pg + else: + raise ValueError(f"Invalid db type: {type}") diff --git a/backend/apps/template/generate_sql/generator.py b/backend/apps/template/generate_sql/generator.py index 5d08cc9e..07eb6197 100644 --- a/backend/apps/template/generate_sql/generator.py +++ b/backend/apps/template/generate_sql/generator.py @@ -1,6 +1,14 @@ -from apps.template.template import get_base_template +from typing import Union + +from apps.db.constant import DB +from apps.template.template import get_base_template, get_sql_template as get_base_sql_template def get_sql_template(): template = get_base_template() return template['template']['sql'] + + +def get_sql_example_template(db_type: Union[str, DB]): + template = get_base_sql_template(db_type) + return template['template'] diff --git a/backend/apps/template/template.py b/backend/apps/template/template.py index e342a84f..1234f6f6 100644 --- a/backend/apps/template/template.py +++ b/backend/apps/template/template.py @@ -1,15 +1,64 @@ import yaml +from pathlib import Path +from functools import cache +from typing import Union -base_template = None +from apps.db.constant import DB +# 基础路径配置 +PROJECT_ROOT = Path(__file__).parent.parent.parent +TEMPLATES_DIR = PROJECT_ROOT / 'templates' +BASE_TEMPLATE_PATH = TEMPLATES_DIR / 'template.yaml' +SQL_TEMPLATES_DIR = TEMPLATES_DIR / 'sql_examples' -def load(): - with open('./template.yaml', 'r', encoding='utf-8') as f: - global base_template - base_template = yaml.load(f, Loader=yaml.SafeLoader) + +@cache +def _load_template_file(file_path: Path): + """内部函数:加载并解析YAML文件""" + try: + with open(file_path, 'r', encoding='utf-8') as f: + return yaml.safe_load(f) + except FileNotFoundError: + raise FileNotFoundError(f"Template file not found at {file_path}") + except yaml.YAMLError as e: + raise ValueError(f"Error parsing YAML file {file_path}: {e}") def get_base_template(): - if not base_template: - load() - return base_template + """获取基础模板(自动缓存)""" + return _load_template_file(BASE_TEMPLATE_PATH) + + +def get_sql_template(db_type: Union[str, DB]): + # 处理输入参数 + if isinstance(db_type, str): + # 如果是字符串,查找对应的枚举值,找不到则使用默认的 DB.pg + db_enum = DB.get_db(db_type, default_if_none=True) + elif isinstance(db_type, DB): + db_enum = db_type + else: + db_enum = DB.pg + + # 使用 template_name 作为文件名 + template_path = SQL_TEMPLATES_DIR / f"{db_enum.template_name}.yaml" + + return _load_template_file(template_path) + + +def get_all_sql_templates(): + """获取所有支持的数据库模板""" + templates = {} + for db in DB: + try: + templates[db.type] = get_sql_template(db) + except FileNotFoundError: + # 如果某个数据库的模板文件不存在,跳过 + continue + return templates + + +def reload_all_templates(): + """清空所有模板缓存""" + _load_template_file.cache_clear() + + diff --git a/backend/common/core/config.py b/backend/common/core/config.py index dd666796..0850df2d 100644 --- a/backend/common/core/config.py +++ b/backend/common/core/config.py @@ -96,6 +96,8 @@ def SQLALCHEMY_DATABASE_URI(self) -> PostgresDsn | str: EMBEDDING_TERMINOLOGY_TOP_COUNT: int = EMBEDDING_DEFAULT_TOP_COUNT EMBEDDING_DATA_TRAINING_TOP_COUNT: int = EMBEDDING_DEFAULT_TOP_COUNT + GENERATE_SQL_QUERY_LIMIT_ENABLED: bool = True + PARSE_REASONING_BLOCK_ENABLED: bool = True DEFAULT_REASONING_CONTENT_START: str = '' DEFAULT_REASONING_CONTENT_END: str = '' diff --git a/backend/templates/sql_examples/AWS_Redshift.yaml b/backend/templates/sql_examples/AWS_Redshift.yaml new file mode 100644 index 00000000..de72b3b3 --- /dev/null +++ b/backend/templates/sql_examples/AWS_Redshift.yaml @@ -0,0 +1,86 @@ +template: + quot_rule: | + + 必须对数据库名、表名、字段名、别名外层加双引号(")。 + + 1. 点号(.)不能包含在引号内,必须写成 "schema"."table" + 2. 即使标识符不含特殊字符或非关键字,也需强制加双引号 + 3. Redshift 默认将未加引号的标识符转为小写 + + + + limit_rule: | + + 使用 LIMIT 或 FETCH FIRST 限制行数(Redshift 兼容 PostgreSQL) + + 1. 标准写法:LIMIT 100 + 2. 可选写法:FETCH FIRST 100 ROWS ONLY + + + + other_rule: | + 必须为每个表生成别名(不加AS) + 禁止使用星号(*),必须明确字段名 + 中文/特殊字符字段需保留原名并添加英文别名 + 函数字段必须加别名 + 百分比字段保留两位小数并以%结尾(使用ROUND+CONCAT) + 避免与Redshift关键字冲突(如USER/GROUP/ORDER等) + + basic_example: | + + + 📌 以下示例严格遵循中的 AWS Redshift 规范,展示符合要求的 SQL 写法与典型错误案例。 + ⚠️ 注意:示例中的表名、字段名均为演示虚构,实际使用时需替换为用户提供的真实标识符。 + 🔍 重点观察: + 1. 双引号包裹所有数据库对象的规范用法 + 2. 中英别名/百分比/函数等特殊字段的处理 + 3. 关键字冲突的规避方式 + + + 查询 TEST.SALES 表的前100条订单(含百分比计算) + + SELECT * FROM TEST.SALES LIMIT 100 -- 错误:未加引号、使用星号 + SELECT "订单ID", "金额" FROM "TEST"."SALES" "t1" FETCH FIRST 100 ROWS ONLY -- 错误:缺少英文别名 + SELECT COUNT("订单ID") FROM "TEST"."SALES" "t1" -- 错误:函数未加别名 + + + SELECT + "t1"."订单ID" AS "order_id", + "t1"."金额" AS "amount", + COUNT("t1"."订单ID") AS "total_orders", + CONCAT(ROUND("t1"."折扣率" * 100, 2), '%') AS "discount_percent" + FROM "TEST"."SALES" "t1" + LIMIT 100 + + + + + 统计用户表 PUBLIC.USERS(含关键字字段user)的活跃占比 + + SELECT user, status FROM PUBLIC.USERS -- 错误:未处理关键字和引号 + SELECT "user", ROUND(active_ratio) FROM "PUBLIC"."USERS" -- 错误:百分比格式错误 + + + SELECT + "u"."user" AS "user_account", + CONCAT(ROUND("u"."active_ratio" * 100, 2), '%') AS "active_percent" + FROM "PUBLIC"."USERS" "u" + WHERE "u"."status" = 1 + FETCH FIRST 1000 ROWS ONLY + + + + + example_engine: AWS Redshift 1.0 + example_answer_1: | + {"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"continent\" AS \"continent_name\", \"year\" AS \"year\", \"gdp\" AS \"gdp_usd\" FROM \"Sample_Database\".\"sample_country_gdp\" ORDER BY \"country\", \"year\"","tables":["sample_country_gdp"],"chart-type":"line"} + example_answer_1_with_limit: | + {"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"continent\" AS \"continent_name\", \"year\" AS \"year\", \"gdp\" AS \"gdp_usd\" FROM \"Sample_Database\".\"sample_country_gdp\" ORDER BY \"country\", \"year\" LIMIT 1000 OFFSET 0","tables":["sample_country_gdp"],"chart-type":"line"} + example_answer_2: | + {"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"gdp\" AS \"gdp_usd\" FROM \"Sample_Database\".\"sample_country_gdp\" WHERE \"year\" = '2024' ORDER BY \"gdp\" DESC","tables":["sample_country_gdp"],"chart-type":"pie"} + example_answer_2_with_limit: | + {"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"gdp\" AS \"gdp_usd\" FROM \"Sample_Database\".\"sample_country_gdp\" WHERE \"year\" = '2024' ORDER BY \"gdp\" DESC LIMIT 1000 OFFSET 0","tables":["sample_country_gdp"],"chart-type":"pie"} + example_answer_3: | + {"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"gdp\" AS \"gdp_usd\" FROM \"Sample_Database\".\"sample_country_gdp\" WHERE \"year\" = '2025' AND \"country\" = '中国'","tables":["sample_country_gdp"],"chart-type":"table"} + example_answer_3_with_limit: | + {"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"gdp\" AS \"gdp_usd\" FROM \"Sample_Database\".\"sample_country_gdp\" WHERE \"year\" = '2025' AND \"country\" = '中国' LIMIT 1000 OFFSET 0","tables":["sample_country_gdp"],"chart-type":"table"} diff --git a/backend/templates/sql_examples/ClickHouse.yaml b/backend/templates/sql_examples/ClickHouse.yaml new file mode 100644 index 00000000..0b2b9bc2 --- /dev/null +++ b/backend/templates/sql_examples/ClickHouse.yaml @@ -0,0 +1,90 @@ +template: + quot_rule: | + + 必须对数据库名、表名、字段名、别名外层加双引号(")。 + + 1. 点号(.)不能包含在引号内,必须写成 "database"."table" + 2. ClickHouse 严格区分大小写,必须通过引号保留原始大小写 + 3. 嵌套字段使用点号连接:`"json_column.field"` + + + + limit_rule: | + + 行数限制使用标准SQL语法: + + 1. 标准写法:LIMIT [count] + 2. 分页写法:LIMIT [count] OFFSET [start] + 3. 禁止使用原生 `topk()` 等函数替代 + + + + other_rule: | + 必须为每个表生成简短别名(如t1/t2) + 禁止使用星号(*),必须明确字段名 + JSON字段需用点号语法访问:`"column.field"` + 函数字段必须加别名 + 百分比显示为:`ROUND(x*100,2) || '%'` + 避免与ClickHouse关键字冲突(如`timestamp`/`default`) + + basic_example: | + + + 📌 以下示例严格遵循中的 ClickHouse 规范,展示符合要求的 SQL 写法与典型错误案例。 + ⚠️ 注意:示例中的表名、字段名均为演示虚构,实际使用时需替换为用户提供的真实标识符。 + 🔍 重点观察: + 1. 双引号包裹所有数据库对象的规范用法 + 2. 中英别名/百分比/函数等特殊字段的处理 + 3. 关键字冲突的规避方式 + + + 查询 events 表的前100条错误日志(含JSON字段) + + SELECT * FROM default.events LIMIT 100 -- 错误1:使用星号 + SELECT message FROM "default"."events" WHERE level = 'error' -- 错误2:未处理JSON字段 + SELECT "message", "extra.error_code" FROM events LIMIT 100 -- 错误3:表名未加引号 + + + SELECT + "e"."message" AS "log_content", + "e"."extra"."error_code" AS "error_id", + toDateTime("e"."timestamp") AS "log_time" + FROM "default"."events" "e" + WHERE "e"."level" = 'error' + LIMIT 100 + + + + + 统计各地区的错误率Top 5(含百分比) + + SELECT region, COUNT(*) FROM events GROUP BY region -- 错误1:使用COUNT(*) + SELECT "region", MAX("count") FROM "events" GROUP BY 1 -- 错误2:使用序号分组 + + + SELECT + "e"."region" AS "area", + COUNT(*) AS "total", + COUNTIf("e"."level" = 'error') AS "error_count", + ROUND(error_count * 100.0 / total, 2) || '%' AS "error_rate" + FROM "default"."events" "e" + GROUP BY "e"."region" + ORDER BY "error_rate" DESC + LIMIT 5 + + + + + example_engine: ClickHouse 23.3 + example_answer_1: | + {"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"continent\" AS \"continent_name\", \"year\" AS \"year\", \"gdp\" AS \"gdp_usd\" FROM \"Sample_Database\".\"sample_country_gdp\" ORDER BY \"country\", \"year\"","tables":["sample_country_gdp"],"chart-type":"line"} + example_answer_1_with_limit: | + {"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"continent\" AS \"continent_name\", \"year\" AS \"year\", \"gdp\" AS \"gdp_usd\" FROM \"Sample_Database\".\"sample_country_gdp\" ORDER BY \"country\", \"year\" LIMIT 1000","tables":["sample_country_gdp"],"chart-type":"line"} + example_answer_2: | + {"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"gdp\" AS \"gdp_usd\" FROM \"Sample_Database\".\"sample_country_gdp\" WHERE \"year\" = '2024' ORDER BY \"gdp\" DESC","tables":["sample_country_gdp"],"chart-type":"pie"} + example_answer_2_with_limit: | + {"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"gdp\" AS \"gdp_usd\" FROM \"Sample_Database\".\"sample_country_gdp\" WHERE \"year\" = '2024' ORDER BY \"gdp\" DESC LIMIT 1000","tables":["sample_country_gdp"],"chart-type":"pie"} + example_answer_3: | + {"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"gdp\" AS \"gdp_usd\" FROM \"Sample_Database\".\"sample_country_gdp\" WHERE \"year\" = '2025' AND \"country\" = '中国'","tables":["sample_country_gdp"],"chart-type":"table"} + example_answer_3_with_limit: | + {"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"gdp\" AS \"gdp_usd\" FROM \"Sample_Database\".\"sample_country_gdp\" WHERE \"year\" = '2025' AND \"country\" = '中国' LIMIT 1000","tables":["sample_country_gdp"],"chart-type":"table"} diff --git a/backend/templates/sql_examples/DM.yaml b/backend/templates/sql_examples/DM.yaml new file mode 100644 index 00000000..4f5314ad --- /dev/null +++ b/backend/templates/sql_examples/DM.yaml @@ -0,0 +1,87 @@ +template: + quot_rule: | + + 必须对模式名、表名、字段名、别名外层加双引号(")。 + + 1. 点号(.)不能包含在引号内,必须写成 "schema"."table" + 2. 即使标识符不含特殊字符或非关键字,也需强制加双引号 + 3. 达梦默认将未加引号的标识符转为大写 + + + + limit_rule: | + + 行数限制使用达梦兼容语法: + + 1. 标准写法:LIMIT [count] + 2. 分页写法:LIMIT [offset], [count] 或 LIMIT [count] OFFSET [start] + 3. 兼容Oracle写法:WHERE ROWNUM <= 100(需达梦7+版本) + + + + other_rule: | + 必须为每个表生成别名(不加AS) + 禁止使用星号(*),必须明确字段名 + 中文/特殊字符字段需保留原名并添加英文别名 + 函数字段必须加别名 + 百分比字段使用TO_CHAR(x*100,'990.99') || '%' + 避免与达梦关键字冲突(如LEVEL/ORDER/PARTITION等) + + basic_example: | + + + 📌 以下示例严格遵循中的 达梦(DM) 规范,展示符合要求的 SQL 写法与典型错误案例。 + ⚠️ 注意:示例中的表名、字段名均为演示虚构,实际使用时需替换为用户提供的真实标识符。 + 🔍 重点观察: + 1. 双引号包裹所有数据库对象的规范用法 + 2. 中英别名/百分比/函数等特殊字段的处理 + 3. 关键字冲突的规避方式 + + + 查询 TEST.ORDERS 表的前100条订单(含中文字段和百分比) + + SELECT * FROM TEST.ORDERS LIMIT 100 -- 错误:未加引号、使用星号 + SELECT "订单ID", "金额" FROM "TEST"."ORDERS" "t1" WHERE ROWNUM <= 100 -- 错误:混用ROWNUM和LIMIT + SELECT COUNT("订单ID") FROM "TEST"."ORDERS" "t1" -- 错误:函数未加别名 + + + SELECT + "t1"."订单ID" AS "order_id", + "t1"."金额" AS "amount", + COUNT("t1"."订单ID") AS "total_orders", + TO_CHAR("t1"."折扣率" * 100, '990.99') || '%' AS "discount_percent" + FROM "TEST"."ORDERS" "t1" + LIMIT 100 + + + + + 统计用户表 PUBLIC.USERS(含关键字字段LEVEL)的活跃占比 + + SELECT LEVEL, status FROM PUBLIC.USERS -- 错误:未处理关键字 + SELECT "LEVEL", ROUND(active_ratio*100) FROM "PUBLIC"."USERS" -- 错误:百分比格式错误 + + + SELECT + "u"."LEVEL" AS "user_level", + TO_CHAR("u"."active_ratio" * 100, '990.99') || '%' AS "active_percent" + FROM "PUBLIC"."USERS" "u" + WHERE "u"."status" = 1 + LIMIT 0, 1000 -- 达梦兼容写法 + + + + + example_engine: DM Database 8 + example_answer_1: | + {"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"continent\" AS \"continent_name\", \"year\" AS \"year\", \"gdp\" AS \"gdp_usd\" FROM \"Sample_Database\".\"sample_country_gdp\" ORDER BY \"country\", \"year\"","tables":["sample_country_gdp"],"chart-type":"line"} + example_answer_1_with_limit: | + {"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"continent\" AS \"continent_name\", \"year\" AS \"year\", \"gdp\" AS \"gdp_usd\" FROM \"Sample_Database\".\"sample_country_gdp\" ORDER BY \"country\", \"year\" LIMIT 1000","tables":["sample_country_gdp"],"chart-type":"line"} + example_answer_2: | + {"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"gdp\" AS \"gdp_usd\" FROM \"Sample_Database\".\"sample_country_gdp\" WHERE \"year\" = '2024' ORDER BY \"gdp\" DESC","tables":["sample_country_gdp"],"chart-type":"pie"} + example_answer_2_with_limit: | + {"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"gdp\" AS \"gdp_usd\" FROM \"Sample_Database\".\"sample_country_gdp\" WHERE \"year\" = '2024' ORDER BY \"gdp\" DESC LIMIT 1000","tables":["sample_country_gdp"],"chart-type":"pie"} + example_answer_3: | + {"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"gdp\" AS \"gdp_usd\" FROM \"Sample_Database\".\"sample_country_gdp\" WHERE \"year\" = '2025' AND \"country\" = '中国'","tables":["sample_country_gdp"],"chart-type":"table"} + example_answer_3_with_limit: | + {"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"gdp\" AS \"gdp_usd\" FROM \"Sample_Database\".\"sample_country_gdp\" WHERE \"year\" = '2025' AND \"country\" = '中国' LIMIT 1000","tables":["sample_country_gdp"],"chart-type":"table"} diff --git a/backend/templates/sql_examples/Doris.yaml b/backend/templates/sql_examples/Doris.yaml new file mode 100644 index 00000000..1849dd53 --- /dev/null +++ b/backend/templates/sql_examples/Doris.yaml @@ -0,0 +1,89 @@ +template: + quot_rule: | + + 必须对数据库名、表名、字段名、别名外层加反引号(`)。 + + 1. 点号(.)不能包含在反引号内,必须写成 `database`.`table` + 2. 当标识符为关键字、含特殊字符或需保留大小写时必须加反引号 + 3. Doris 标识符默认不区分大小写,但建议统一使用反引号 + + + + limit_rule: | + + 行数限制使用标准SQL语法: + + 1. 标准写法:LIMIT [count] + 2. 分页写法:LIMIT [count] OFFSET [start] + 3. 支持 MySQL 兼容写法:LIMIT [offset], [count] + + + + other_rule: | + 必须为每个表生成别名(不加AS) + 禁止使用星号(*),必须明确字段名 + 中文/特殊字符字段需保留原名并添加英文别名 + 函数字段必须加别名 + 百分比显示为:CONCAT(ROUND(x*100,2),'%') + 避免与Doris关键字冲突(如`rank`/`partition`/`values`) + 分区查询需使用PARTITION语句明确指定 + + basic_example: | + + + 📌 以下示例严格遵循中的 Doris 规范,展示符合要求的 SQL 写法与典型错误案例。 + ⚠️ 注意:示例中的表名、字段名均为演示虚构,实际使用时需替换为用户提供的真实标识符。 + 🔍 重点观察: + 1. 双引号包裹所有数据库对象的规范用法 + 2. 中英别名/百分比/函数等特殊字段的处理 + 3. 关键字冲突的规避方式 + + + 查询 test.orders 表的前100条订单(含中文字段和百分比) + + SELECT * FROM test.orders LIMIT 100 -- 错误:使用星号 + SELECT `订单ID`, `金额` FROM `test`.`orders` `t1` LIMIT 100 -- 错误:缺少英文别名 + SELECT COUNT(`订单ID`) FROM `test`.`orders` `t1` -- 错误:函数未加别名 + + + SELECT + `t1`.`订单ID` AS `order_id`, + `t1`.`金额` AS `amount`, + COUNT(`t1`.`订单ID`) AS `total_orders`, + CONCAT(ROUND(`t1`.`折扣率` * 100, 2), '%') AS `discount_percent` + FROM `test`.`orders` `t1` + LIMIT 100 + + + + + 统计用户表 dw.users(含分区字段和关键字rank)的活跃占比 + + SELECT rank, status FROM dw.users -- 错误:未处理关键字 + SELECT `rank`, ROUND(active_ratio*100) FROM `dw`.`users` -- 错误:百分比格式错误 + + + SELECT + `u`.`rank` AS `user_rank`, + CONCAT(ROUND(`u`.`active_ratio` * 100, 2), '%') AS `active_percent` + FROM `dw`.`users` `u` + WHERE `u`.`status` = 1 + AND `u`.`dt` = '2024-01-01' -- Doris分区字段过滤 + LIMIT 1000 + + + + + example_engine: Apache Doris 2.0 + example_answer_1: | + {"success":true,"sql":"SELECT `country` AS `country_name`, `continent` AS `continent_name`, `year` AS `year`, `gdp` AS `gdp_usd` FROM `Sample_Database`.`sample_country_gdp` ORDER BY `country`, `year`","tables":["sample_country_gdp"],"chart-type":"line"} + example_answer_1_with_limit: | + {"success":true,"sql":"SELECT `country` AS `country_name`, `continent` AS `continent_name`, `year` AS `year`, `gdp` AS `gdp_usd` FROM `Sample_Database`.`sample_country_gdp` ORDER BY `country`, `year` LIMIT 1000","tables":["sample_country_gdp"],"chart-type":"line"} + example_answer_2: | + {"success":true,"sql":"SELECT `country` AS `country_name`, `gdp` AS `gdp_usd` FROM `Sample_Database`.`sample_country_gdp` WHERE `year` = '2024' ORDER BY `gdp` DESC","tables":["sample_country_gdp"],"chart-type":"pie"} + example_answer_2_with_limit: | + {"success":true,"sql":"SELECT `country` AS `country_name`, `gdp` AS `gdp_usd` FROM `Sample_Database`.`sample_country_gdp` WHERE `year` = '2024' ORDER BY `gdp` DESC LIMIT 1000","tables":["sample_country_gdp"],"chart-type":"pie"} + example_answer_3: | + {"success":true,"sql":"SELECT `country` AS `country_name`, `gdp` AS `gdp_usd` FROM `Sample_Database`.`sample_country_gdp` WHERE `year` = '2025' AND `country` = '中国'","tables":["sample_country_gdp"],"chart-type":"table"} + example_answer_3_with_limit: | + {"success":true,"sql":"SELECT `country` AS `country_name`, `gdp` AS `gdp_usd` FROM `Sample_Database`.`sample_country_gdp` WHERE `year` = '2025' AND `country` = '中国' LIMIT 1000","tables":["sample_country_gdp"],"chart-type":"table"} diff --git a/backend/templates/sql_examples/Elasticsearch.yaml b/backend/templates/sql_examples/Elasticsearch.yaml new file mode 100644 index 00000000..e14e67a0 --- /dev/null +++ b/backend/templates/sql_examples/Elasticsearch.yaml @@ -0,0 +1,91 @@ +template: + quot_rule: | + + 必须对索引名(表名)、字段名、别名外层加双引号(")。 + + 1. 通配符索引需整体加引号:`"logs-*"` + 2. 嵌套字段用点号连接:`"user.name"` + 3. 时间字段必须显式转换:`CAST("@timestamp" AS TIMESTAMP)` + + + + limit_rule: | + + 行数限制必须使用标准SQL语法: + + 1. 首选:`LIMIT [count]` + 2. 分页:`LIMIT [count] OFFSET [start]` + 3. 禁止使用Elasticsearch原生`size/from`参数 + + + + other_rule: | + 必须为每个索引生成别名(不加AS) + 禁止使用星号(*),必须明确字段名 + 中文/特殊字符字段需保留原名并添加英文别名 + 函数字段必须加别名 + 百分比显示为:`ROUND(x*100,2) || '%'` + 避免与Elasticsearch关键字冲突(如`score`/`type`) + + basic_example: | + + + 📌 以下示例严格遵循中的 Elasticsearch 规范,展示符合要求的 SQL 写法与典型错误案例。 + ⚠️ 注意:示例中的表名、字段名均为演示虚构,实际使用时需替换为用户提供的真实标识符。 + 🔍 重点观察: + 1. 双引号包裹所有数据库对象的规范用法 + 2. 中英别名/百分比/函数等特殊字段的处理 + 3. 关键字冲突的规避方式 + + + 查询 logs-* 索引的前100条错误日志(含时间过滤) + + SELECT * FROM logs-* WHERE level = 'error' LIMIT 100 -- 错误1:使用星号 + SELECT message FROM "logs-*" WHERE @timestamp > NOW() - 1d -- 错误2:未转换时间 + SELECT "message", "@timestamp" FROM "logs-*" LIMIT 100 -- 错误3:未处理时间字段 + + + SELECT + "l"."message" AS "log_content", + "l"."host.ip" AS "client_ip", + CAST("l"."@timestamp" AS TIMESTAMP) AS "log_time" + FROM "logs-*" "l" + WHERE "l"."level" = 'error' + AND CAST("l"."@timestamp" AS TIMESTAMP) > DATE_SUB(NOW(), INTERVAL 1 DAY) + LIMIT 100 + + + + + 统计各地区的错误日志占比(Top 5) + + SELECT region, COUNT(*) FROM "logs-*" GROUP BY region -- 错误1:使用COUNT(*) + SELECT "region", MAX("count") FROM "logs-*" GROUP BY 1 -- 错误2:使用序号分组 + + + SELECT + "l"."region" AS "area", + COUNT("l"."message") AS "error_count", + ROUND(COUNT("l"."message") * 100.0 / SUM(COUNT("l"."message")) OVER (), 2) || '%' AS "error_percent" + FROM "logs-*" "l" + WHERE "l"."level" = 'error' + GROUP BY "l"."region" + ORDER BY "error_count" DESC + LIMIT 5 + + + + + example_engine: Elasticsearch SQL 8.9 + example_answer_1: | + {"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"continent\" AS \"continent_name\", \"year\" AS \"year\", \"gdp\" AS \"gdp_usd\" FROM \"Sample_Database\".\"sample_country_gdp\" ORDER BY \"country\", \"year\"","tables":["sample_country_gdp"],"chart-type":"line"} + example_answer_1_with_limit: | + {"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"continent\" AS \"continent_name\", \"year\" AS \"year\", \"gdp\" AS \"gdp_usd\" FROM \"Sample_Database\".\"sample_country_gdp\" ORDER BY \"country\", \"year\" LIMIT 1000","tables":["sample_country_gdp"],"chart-type":"line"} + example_answer_2: | + {"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"gdp\" AS \"gdp_usd\" FROM \"Sample_Database\".\"sample_country_gdp\" WHERE \"year\" = '2024' ORDER BY \"gdp\" DESC","tables":["sample_country_gdp"],"chart-type":"pie"} + example_answer_2_with_limit: | + {"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"gdp\" AS \"gdp_usd\" FROM \"Sample_Database\".\"sample_country_gdp\" WHERE \"year\" = '2024' ORDER BY \"gdp\" DESC LIMIT 1000","tables":["sample_country_gdp"],"chart-type":"pie"} + example_answer_3: | + {"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"gdp\" AS \"gdp_usd\" FROM \"Sample_Database\".\"sample_country_gdp\" WHERE \"year\" = '2025' AND \"country\" = '中国'","tables":["sample_country_gdp"],"chart-type":"table"} + example_answer_3_with_limit: | + {"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"gdp\" AS \"gdp_usd\" FROM \"Sample_Database\".\"sample_country_gdp\" WHERE \"year\" = '2025' AND \"country\" = '中国' LIMIT 1000","tables":["sample_country_gdp"],"chart-type":"table"} diff --git a/backend/templates/sql_examples/Kingbase.yaml b/backend/templates/sql_examples/Kingbase.yaml new file mode 100644 index 00000000..cc3e4e33 --- /dev/null +++ b/backend/templates/sql_examples/Kingbase.yaml @@ -0,0 +1,83 @@ +template: + quot_rule: | + + 必须对数据库名、表名、字段名、别名外层加双引号(")。 + + 1. 点号(.)不能包含在引号内,必须写成 "schema"."table" + 2. 即使标识符不含特殊字符或非关键字,也需强制加双引号 + 3. Kingbase 默认将未加引号的标识符转为小写 + + + + limit_rule: | + + 行数限制使用标准语法: + + 1. 标准写法:LIMIT [count] + 2. 分页写法:LIMIT [count] OFFSET [start] + 3. 兼容 Oracle 写法:WHERE ROWNUM <= 100(需开启兼容模式) + + + + other_rule: | + 必须为每个表生成别名(不加AS) + 禁止使用星号(*),必须明确字段名 + 中文/特殊字符字段需保留原名并添加英文别名 + 函数字段必须加别名 + 百分比显示为:TO_CHAR(x*100,'990.99') || '%' + 避免与Kingbase关键字冲突(如SYSDATE/LEVEL等) + + basic_example: | + + + 📌 以下示例严格遵循 Kingbase 语法规范(兼容 PostgreSQL) + ⚠️ 注意:示例中的表名、字段名均为演示虚构,实际使用时需替换为用户提供的真实标识符。 + 🔍 重点观察: + 1. 双引号包裹所有数据库对象 + 2. 中英别名/百分比/函数等特殊字段处理 + 3. 关键字冲突规避(如SYSDATE/LEVEL) + + + 查询 TEST.ORDERS 表的前100条订单(含中文字段) + + SELECT * FROM TEST.ORDERS LIMIT 100 -- 错误:未加引号、使用星号 + SELECT "订单ID", "金额" FROM "TEST"."ORDERS" "t1" -- 错误:缺少英文别名 + + + SELECT + "t1"."订单ID" AS "order_id", + "t1"."金额" AS "amount_usd", + TO_CHAR("t1"."折扣率" * 100, '990.99') || '%' AS "discount_percent" + FROM "TEST"."ORDERS" "t1" + LIMIT 100 + + + + 统计用户表 PUBLIC.USERS(含关键字字段LEVEL) + + SELECT LEVEL, status FROM PUBLIC.USERS -- 错误:未处理关键字 + SELECT "LEVEL", "active_ratio" FROM "PUBLIC"."USERS" -- 错误:百分比未格式化 + + + SELECT + "u"."LEVEL" AS "user_level", + TO_CHAR("u"."active_ratio" * 100, '990.99') || '%' AS "active_percent" + FROM "PUBLIC"."USERS" "u" + WHERE "u"."status" = 1 + + + + + example_engine: Kingbase V8 + example_answer_1: | + {"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"continent\" AS \"continent_name\", \"year\" AS \"year\", \"gdp\" AS \"gdp_usd\" FROM \"Sample_Database\".\"sample_country_gdp\" ORDER BY \"country\", \"year\"","tables":["sample_country_gdp"],"chart-type":"line"} + example_answer_1_with_limit: | + {"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"continent\" AS \"continent_name\", \"year\" AS \"year\", \"gdp\" AS \"gdp_usd\" FROM \"Sample_Database\".\"sample_country_gdp\" ORDER BY \"country\", \"year\" LIMIT 1000","tables":["sample_country_gdp"],"chart-type":"line"} + example_answer_2: | + {"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"gdp\" AS \"gdp_usd\" FROM \"Sample_Database\".\"sample_country_gdp\" WHERE \"year\" = '2024' ORDER BY \"gdp\" DESC","tables":["sample_country_gdp"],"chart-type":"pie"} + example_answer_2_with_limit: | + {"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"gdp\" AS \"gdp_usd\" FROM \"Sample_Database\".\"sample_country_gdp\" WHERE \"year\" = '2024' ORDER BY \"gdp\" DESC LIMIT 1000","tables":["sample_country_gdp"],"chart-type":"pie"} + example_answer_3: | + {"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"gdp\" AS \"gdp_usd\" FROM \"Sample_Database\".\"sample_country_gdp\" WHERE \"year\" = '2025' AND \"country\" = '中国'","tables":["sample_country_gdp"],"chart-type":"table"} + example_answer_3_with_limit: | + {"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"gdp\" AS \"gdp_usd\" FROM \"Sample_Database\".\"sample_country_gdp\" WHERE \"year\" = '2025' AND \"country\" = '中国' LIMIT 1000","tables":["sample_country_gdp"],"chart-type":"table"} \ No newline at end of file diff --git a/backend/templates/sql_examples/Microsoft_SQL_Server.yaml b/backend/templates/sql_examples/Microsoft_SQL_Server.yaml new file mode 100644 index 00000000..63d2e289 --- /dev/null +++ b/backend/templates/sql_examples/Microsoft_SQL_Server.yaml @@ -0,0 +1,87 @@ +template: + quot_rule: | + + 必须对数据库名、表名、字段名、别名外层加方括号([])。 + + 1. 点号(.)不能包含在方括号内,必须写成 [schema].[table] + 2. 当标识符为关键字、含特殊字符或需保留大小写时必须加方括号 + 3. SQL Server 标识符默认不区分大小写 + + + + limit_rule: | + + 行数限制使用标准SQL语法: + + 1. 标准写法:TOP [count] + 2. 分页写法:OFFSET [start] ROWS FETCH NEXT [count] ROWS ONLY + 3. 禁止使用 MySQL 的 LIMIT 语法 + + + + other_rule: | + 必须为每个表生成别名(不加AS) + 禁止使用星号(*),必须明确字段名 + 中文/特殊字符字段需保留原名并添加英文别名 + 函数字段必须加别名 + 百分比显示为:CONVERT(VARCHAR, ROUND(x*100,2)) + '%' + 避免与SQL Server关键字冲突(如[user]/[order]/[desc]) + + basic_example: | + + + 📌 以下示例严格遵循中的 Microsoft SQL Server 规范,展示符合要求的 SQL 写法与典型错误案例。 + ⚠️ 注意:示例中的表名、字段名均为演示虚构,实际使用时需替换为用户提供的真实标识符。 + 🔍 重点观察: + 1. 双引号包裹所有数据库对象的规范用法 + 2. 中英别名/百分比/函数等特殊字段的处理 + 3. 关键字冲突的规避方式 + + + 查询 Sales.Orders 表的前100条订单(含中文字段和百分比) + + SELECT * FROM Sales.Orders -- 错误:使用星号 + SELECT [订单ID], [金额] FROM [Sales].[Orders] [o] -- 错误:缺少英文别名 + SELECT COUNT([订单ID]) FROM [Sales].[Orders] [o] -- 错误:函数未加别名 + + + SELECT TOP 100 + [o].[订单ID] AS [order_id], + [o].[金额] AS [amount], + COUNT([o].[订单ID]) AS [total_orders], + CONVERT(VARCHAR, ROUND([o].[折扣率] * 100, 2)) + '%' AS [discount_percent] + FROM [Sales].[Orders] [o] + + + + + 统计用户表 dbo.Users(含关键字字段user)的活跃占比 + + SELECT user, status FROM dbo.Users -- 错误:未处理关键字 + SELECT [user], ROUND(active_ratio*100) FROM [dbo].[Users] -- 错误:百分比格式错误 + + + SELECT + [u].[user] AS [user_name], + CONVERT(VARCHAR, ROUND([u].[active_ratio] * 100, 2)) + '%' AS [active_percent] + FROM [dbo].[Users] [u] + WHERE [u].[status] = 1 + ORDER BY [u].[create_date] + OFFSET 0 ROWS FETCH NEXT 1000 ROWS ONLY + + + + + example_engine: Microsoft SQL Server 2022 + example_answer_1: | + {"success":true,"sql":"SELECT [country] AS [country_name], [continent] AS [continent_name], [year] AS [year], [gdp] AS [gdp_usd] FROM [Sample_Database].[sample_country_gdp] ORDER BY [country], [year]","tables":["sample_country_gdp"],"chart-type":"line"} + example_answer_1_with_limit: | + {"success":true,"sql":"SELECT TOP 1000 [country] AS [country_name], [continent] AS [continent_name], [year] AS [year], [gdp] AS [gdp_usd] FROM [Sample_Database].[sample_country_gdp] ORDER BY [country], [year]","tables":["sample_country_gdp"],"chart-type":"line"} + example_answer_2: | + {"success":true,"sql":"SELECT [country] AS [country_name], [gdp] AS [gdp_usd] FROM [Sample_Database].[sample_country_gdp] WHERE [year] = '2024' ORDER BY [gdp] DESC","tables":["sample_country_gdp"],"chart-type":"pie"} + example_answer_2_with_limit: | + {"success":true,"sql":"SELECT TOP 1000 [country] AS [country_name], [gdp] AS [gdp_usd] FROM [Sample_Database].[sample_country_gdp] WHERE [year] = '2024' ORDER BY [gdp] DESC","tables":["sample_country_gdp"],"chart-type":"pie"} + example_answer_3: | + {"success":true,"sql":"SELECT [country] AS [country_name], [gdp] AS [gdp_usd] FROM [Sample_Database].[sample_country_gdp] WHERE [year] = '2025' AND [country] = N'中国'","tables":["sample_country_gdp"],"chart-type":"table"} + example_answer_3_with_limit: | + {"success":true,"sql":"SELECT TOP 1000 [country] AS [country_name], [gdp] AS [gdp_usd] FROM [Sample_Database].[sample_country_gdp] WHERE [year] = '2025' AND [country] = N'中国'","tables":["sample_country_gdp"],"chart-type":"table"} diff --git a/backend/templates/sql_examples/MySQL.yaml b/backend/templates/sql_examples/MySQL.yaml new file mode 100644 index 00000000..e2b9e965 --- /dev/null +++ b/backend/templates/sql_examples/MySQL.yaml @@ -0,0 +1,86 @@ +template: + quot_rule: | + + 必须对数据库名、表名、字段名、别名外层加反引号(`)。 + + 1. 点号(.)不能包含在反引号内,必须写成 `schema`.`table` + 2. 当标识符为关键字、含特殊字符或需保留大小写时必须加反引号 + 3. MySQL 默认不区分大小写(Linux系统区分表名大小写) + + + + limit_rule: | + + 行数限制使用标准SQL语法: + + 1. 标准写法:LIMIT [count] + 2. 分页写法:LIMIT [offset], [count] 或 LIMIT [count] OFFSET [start] + + + + other_rule: | + 必须为每个表生成别名(不加AS) + 禁止使用星号(*),必须明确字段名 + 中文/特殊字符字段需保留原名并添加英文别名 + 函数字段必须加别名 + 百分比显示为:CONCAT(ROUND(x*100,2),'%') + 避免与MySQL关键字冲突(如`order`/`group`/`desc`) + + basic_example: | + + + 📌 以下示例严格遵循中的 MySQL 规范,展示符合要求的 SQL 写法与典型错误案例。 + ⚠️ 注意:示例中的表名、字段名均为演示虚构,实际使用时需替换为用户提供的真实标识符。 + 🔍 重点观察: + 1. 双引号包裹所有数据库对象的规范用法 + 2. 中英别名/百分比/函数等特殊字段的处理 + 3. 关键字冲突的规避方式 + + + 查询 test.orders 表的前100条订单(含中文字段和百分比) + + SELECT * FROM test.orders LIMIT 100 -- 错误:使用星号 + SELECT `订单ID`, `金额` FROM `test`.`orders` `t1` LIMIT 100 -- 错误:缺少英文别名 + SELECT COUNT(`订单ID`) FROM `test`.`orders` `t1` -- 错误:函数未加别名 + + + SELECT + `t1`.`订单ID` AS `order_id`, + `t1`.`金额` AS `amount`, + COUNT(`t1`.`订单ID`) AS `total_orders`, + CONCAT(ROUND(`t1`.`折扣率` * 100, 2), '%') AS `discount_percent` + FROM `test`.`orders` `t1` + LIMIT 100 + + + + + 统计用户表 public.users(含关键字字段desc)的活跃占比 + + SELECT desc, status FROM public.users -- 错误:未处理关键字 + SELECT `desc`, ROUND(active_ratio*100) FROM `public`.`users` -- 错误:百分比格式错误 + + + SELECT + `u`.`desc` AS `description`, + CONCAT(ROUND(`u`.`active_ratio` * 100, 2), '%') AS `active_percent` + FROM `public`.`users` `u` + WHERE `u`.`status` = 1 + LIMIT 1000 + + + + + example_engine: MySQL 8.0 + example_answer_1: | + {"success":true,"sql":"SELECT `country` AS `country_name`, `continent` AS `continent_name`, `year` AS `year`, `gdp` AS `gdp_usd` FROM `Sample_Database`.`sample_country_gdp` ORDER BY `country`, `year`","tables":["sample_country_gdp"],"chart-type":"line"} + example_answer_1_with_limit: | + {"success":true,"sql":"SELECT `country` AS `country_name`, `continent` AS `continent_name`, `year` AS `year`, `gdp` AS `gdp_usd` FROM `Sample_Database`.`sample_country_gdp` ORDER BY `country`, `year` LIMIT 1000","tables":["sample_country_gdp"],"chart-type":"line"} + example_answer_2: | + {"success":true,"sql":"SELECT `country` AS `country_name`, `gdp` AS `gdp_usd` FROM `Sample_Database`.`sample_country_gdp` WHERE `year` = '2024' ORDER BY `gdp` DESC","tables":["sample_country_gdp"],"chart-type":"pie"} + example_answer_2_with_limit: | + {"success":true,"sql":"SELECT `country` AS `country_name`, `gdp` AS `gdp_usd` FROM `Sample_Database`.`sample_country_gdp` WHERE `year` = '2024' ORDER BY `gdp` DESC LIMIT 1000","tables":["sample_country_gdp"],"chart-type":"pie"} + example_answer_3: | + {"success":true,"sql":"SELECT `country` AS `country_name`, `gdp` AS `gdp_usd` FROM `Sample_Database`.`sample_country_gdp` WHERE `year` = '2025' AND `country` = '中国'","tables":["sample_country_gdp"],"chart-type":"table"} + example_answer_3_with_limit: | + {"success":true,"sql":"SELECT `country` AS `country_name`, `gdp` AS `gdp_usd` FROM `Sample_Database`.`sample_country_gdp` WHERE `year` = '2025' AND `country` = '中国' LIMIT 1000","tables":["sample_country_gdp"],"chart-type":"table"} diff --git a/backend/templates/sql_examples/Oracle.yaml b/backend/templates/sql_examples/Oracle.yaml new file mode 100644 index 00000000..49218969 --- /dev/null +++ b/backend/templates/sql_examples/Oracle.yaml @@ -0,0 +1,86 @@ +template: + quot_rule: | + + 必须对数据库名、表名、字段名、别名外层加双引号(")。 + + 1. 点号(.)不能包含在引号内,必须写成 "schema"."table" + 2. 即使标识符不含特殊字符或非关键字,也需强制加双引号 + 3. Oracle 对象名默认大写,如需小写必须加双引号 + + + + limit_rule: | + + 使用 ROWNUM 或 FETCH FIRST 限制行数(Oracle 12c+) + + 1. 传统写法:WHERE ROWNUM <= 100 + 2. 现代写法:FETCH FIRST 100 ROWS ONLY + + + + other_rule: | + 必须为每个表生成别名(不加AS) + 禁止使用星号(*),必须明确字段名 + 中文/特殊字符字段需保留原名并添加英文别名 + 函数字段必须加别名 + 百分比字段保留两位小数并以%结尾 + 避免与 Oracle 关键字冲突(如 DATE/LEVEL/ORDER 等) + + basic_example: | + + + 📌 以下示例严格遵循中的 Oracle 规范,展示符合要求的 SQL 写法与典型错误案例。 + ⚠️ 注意:示例中的表名、字段名均为演示虚构,实际使用时需替换为用户提供的真实标识符。 + 🔍 重点观察: + 1. 双引号包裹所有数据库对象的规范用法 + 2. 中英别名/百分比/函数等特殊字段的处理 + 3. 关键字冲突的规避方式 + + + 查询 TEST.ORDERS 表的前100条订单(含中文字段和百分比) + + SELECT * FROM TEST.ORDERS WHERE ROWNUM <= 100 -- 错误:未加引号、使用星号 + SELECT "订单ID", "金额" FROM "TEST"."ORDERS" "t1" WHERE ROWNUM <= 100 -- 错误:缺少英文别名 + SELECT COUNT("订单ID") FROM "TEST"."ORDERS" "t1" -- 错误:函数未加别名 + + + SELECT + "t1"."订单ID" AS "order_id", + "t1"."金额" AS "amount", + COUNT("t1"."订单ID") AS "total_orders", + TO_CHAR("t1"."折扣率" * 100, '990.99') || '%' AS "discount_percent" + FROM "TEST"."ORDERS" "t1" + WHERE ROWNUM <= 100 + + + + + 统计用户表 PUBLIC.USERS(含关键字字段DATE)的活跃占比 + + SELECT DATE, status FROM PUBLIC.USERS -- 错误:未处理关键字和引号 + SELECT "DATE", ROUND(active_ratio) FROM "PUBLIC"."USERS" -- 错误:百分比格式错误 + + + SELECT + "u"."DATE" AS "create_date", + TO_CHAR("u"."active_ratio" * 100, '990.99') || '%' AS "active_percent" + FROM "PUBLIC"."USERS" "u" + WHERE "u"."status" = 1 + AND ROWNUM <= 1000 + + + + + example_engine: Oracle 19c + example_answer_1: | + {"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"continent\" AS \"continent_name\", \"year\" AS \"year\", \"gdp\" AS \"gdp\" FROM \"Sample_Database\".\"sample_country_gdp\" ORDER BY \"country\", \"year\"","tables":["sample_country_gdp"],"chart-type":"line"} + example_answer_1_with_limit: | + {"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"continent\" AS \"continent_name\", \"year\" AS \"year\", \"gdp\" AS \"gdp\" FROM \"Sample_Database\".\"sample_country_gdp\" WHERE ROWNUM <= 1000 ORDER BY \"country\", \"year\"","tables":["sample_country_gdp"],"chart-type":"line"} + example_answer_2: | + {"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"gdp\" AS \"gdp\" FROM \"Sample_Database\".\"sample_country_gdp\" WHERE \"year\" = '2024' ORDER BY \"gdp\" DESC","tables":["sample_country_gdp"],"chart-type":"pie"} + example_answer_2_with_limit: | + {"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"gdp\" AS \"gdp\" FROM \"Sample_Database\".\"sample_country_gdp\" WHERE \"year\" = '2024' AND ROWNUM <= 1000 ORDER BY \"gdp\" DESC","tables":["sample_country_gdp"],"chart-type":"pie"} + example_answer_3: | + {"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"gdp\" AS \"gdp\" FROM \"Sample_Database\".\"sample_country_gdp\" WHERE \"year\" = '2025' AND \"country\" = '中国'","tables":["sample_country_gdp"],"chart-type":"table"} + example_answer_3_with_limit: | + {"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"gdp\" AS \"gdp\" FROM \"Sample_Database\".\"sample_country_gdp\" WHERE \"year\" = '2025' AND \"country\" = '中国' AND ROWNUM <= 1000","tables":["sample_country_gdp"],"chart-type":"table"} diff --git a/backend/templates/sql_examples/PostgreSQL.yaml b/backend/templates/sql_examples/PostgreSQL.yaml new file mode 100644 index 00000000..cf7ef595 --- /dev/null +++ b/backend/templates/sql_examples/PostgreSQL.yaml @@ -0,0 +1,80 @@ +template: + quot_rule: | + + 必须对数据库名、表名、字段名、别名外层加双引号(")。 + + 1. 点号(.)不能包含在引号内,必须写成 "schema"."table" + 2. 即使标识符不含特殊字符或非关键字,也需强制加双引号 + + + + limit_rule: | + + 使用 LIMIT 来限制行数 + + + other_rule: | + 必须为每个表生成别名(不加AS) + 禁止使用星号(*),必须明确字段名 + 中文/特殊字符字段需保留原名并添加英文别名 + 函数字段必须加别名 + 百分比字段保留两位小数并以%结尾 + 避免与数据库关键字冲突 + + basic_example: | + + + 📌 以下示例严格遵循中的 PostgreSQL 规范,展示符合要求的 SQL 写法与典型错误案例。 + ⚠️ 注意:示例中的表名、字段名均为演示虚构,实际使用时需替换为用户提供的真实标识符。 + 🔍 重点观察: + 1. 双引号包裹所有数据库对象的规范用法 + 2. 中英别名/百分比/函数等特殊字段的处理 + 3. 关键字冲突的规避方式 + + + 查询 TEST.ORDERS 表的前100条订单(含中文字段和百分比) + + SELECT * FROM TEST.ORDERS LIMIT 100 -- 错误:未加引号、使用星号 + SELECT "订单ID", "金额" FROM "TEST"."ORDERS" "t1" LIMIT 100 -- 错误:缺少英文别名 + SELECT COUNT("订单ID") FROM "TEST"."ORDERS" "t1" -- 错误:函数未加别名 + + + SELECT + "t1"."订单ID" AS "order_id", + "t1"."金额" AS "amount", + COUNT("t1"."订单ID") AS "total_orders", + ROUND("t1"."折扣率" * 100, 2) || '%' AS "discount_percent" + FROM "TEST"."ORDERS" "t1" + LIMIT 100 + + + + + 统计用户表 PUBLIC.USERS(含关键字字段user)的活跃占比 + + SELECT user, status FROM PUBLIC.USERS -- 错误:未处理关键字和引号 + SELECT "user", ROUND(active_ratio) FROM "PUBLIC"."USERS" -- 错误:百分比格式错误 + + + SELECT + "u"."user" AS "username", + ROUND("u"."active_ratio" * 100, 2) || '%' AS "active_percent" + FROM "PUBLIC"."USERS" "u" + WHERE "u"."status" = 1 + + + + + example_engine: PostgreSQL17.6 (Debian 17.6-1.pgdg12+1) + example_answer_1: | + {"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"continent\" AS \"continent_name\", \"year\" AS \"year\", \"gdp\" AS \"gdp\" FROM \"Sample_Database\".\"sample_country_gdp\" ORDER BY \"country\", \"year\"","tables":["sample_country_gdp"],"chart-type":"line"} + example_answer_1_with_limit: | + {"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"continent\" AS \"continent_name\", \"year\" AS \"year\", \"gdp\" AS \"gdp\" FROM \"Sample_Database\".\"sample_country_gdp\" ORDER BY \"country\", \"year\" LIMIT 1000","tables":["sample_country_gdp"],"chart-type":"line"} + example_answer_2: | + {"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"gdp\" AS \"gdp\" FROM \"Sample_Database\".\"sample_country_gdp\" WHERE \"year\" = '2024' ORDER BY \"gdp\" DESC","tables":["sample_country_gdp"],"chart-type":"pie"} + example_answer_2_with_limit: | + {"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"gdp\" AS \"gdp\" FROM \"Sample_Database\".\"sample_country_gdp\" WHERE \"year\" = '2024' ORDER BY \"gdp\" DESC LIMIT 1000","tables":["sample_country_gdp"],"chart-type":"pie"} + example_answer_3: | + {"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"gdp\" AS \"gdp\" FROM \"Sample_Database\".\"sample_country_gdp\" WHERE \"year\" = '2025' AND \"country\" = '中国'","tables":["sample_country_gdp"],"chart-type":"table"} + example_answer_3_with_limit: | + {"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"gdp\" AS \"gdp\" FROM \"Sample_Database\".\"sample_country_gdp\" WHERE \"year\" = '2025' AND \"country\" = '中国' LIMIT 1000","tables":["sample_country_gdp"],"chart-type":"table"} diff --git a/backend/templates/sql_examples/StarRocks.yaml b/backend/templates/sql_examples/StarRocks.yaml new file mode 100644 index 00000000..f49ac0e2 --- /dev/null +++ b/backend/templates/sql_examples/StarRocks.yaml @@ -0,0 +1,88 @@ +template: + quot_rule: | + + 必须对数据库名、表名、字段名、别名外层加反引号(`)。 + + 1. 点号(.)不能包含在反引号内,必须写成 `database`.`table` + 2. 当标识符为关键字、含特殊字符或需保留大小写时必须加反引号 + 3. StarRocks 标识符默认不区分大小写,但建议统一使用反引号 + + + + limit_rule: | + + 行数限制使用标准SQL语法: + + 1. 标准写法:LIMIT [count] + 2. 分页写法:LIMIT [count] OFFSET [start] + 3. 支持 MySQL 兼容写法:LIMIT [offset], [count] + + + + other_rule: | + 必须为每个表生成别名(不加AS) + 禁止使用星号(*),必须明确字段名 + 中文/特殊字符字段需保留原名并添加英文别名 + 函数字段必须加别名 + 百分比显示为:CONCAT(ROUND(x*100,2),'%') + 避免与StarRocks关键字冲突(如`rank`/`partition`/`values`) + 分区查询需使用PARTITION语句明确指定 + 优先使用Colocate Group特性优化JOIN性能 + + basic_example: | + + + 📌 以下示例严格遵循 StarRocks 规范(兼容 MySQL 协议) + ⚠️ 注意:示例中的表名、字段名均为演示虚构,实际使用时需替换为用户提供的真实标识符。 + 🔍 重点观察: + 1. 反引号包裹所有数据库对象 + 2. 分区/分桶等MPP优化特性 + 3. 关键字冲突规避(如`rank`) + + + 查询 test.orders 表的前100条订单(含中文字段) + + SELECT * FROM test.orders LIMIT 100 -- 错误:使用星号 + SELECT `订单ID`, `金额` FROM `test`.`orders` `t1` -- 错误:缺少英文别名 + + + SELECT + `t1`.`订单ID` AS `order_id`, + `t1`.`金额` AS `amount_usd`, + CONCAT(ROUND(`t1`.`折扣率` * 100, 2), '%') AS `discount_percent` + FROM `test`.`orders` `t1` + PARTITION (`p2023`) -- StarRocks分区裁剪 + LIMIT 100 + + + + 统计用户表 dw.users(含关键字rank)的活跃占比 + + SELECT rank, status FROM dw.users -- 错误:未处理关键字 + SELECT `rank`, active_ratio FROM `dw`.`users` -- 错误:百分比未格式化 + + + SELECT + `u`.`rank` AS `user_rank`, + CONCAT(ROUND(`u`.`active_ratio` * 100, 2), '%') AS `active_percent` + FROM `dw`.`users` `u` + WHERE `u`.`status` = 1 + AND `u`.`dt` = '2024-01-01' -- 分区键过滤 + LIMIT 1000 + + + + + example_engine: StarRocks 3.0 + example_answer_1: | + {"success":true,"sql":"SELECT `country` AS `country_name`, `continent` AS `continent_name`, `year` AS `year`, `gdp` AS `gdp_usd` FROM `Sample_Database`.`sample_country_gdp` ORDER BY `country`, `year`","tables":["sample_country_gdp"],"chart-type":"line"} + example_answer_1_with_limit: | + {"success":true,"sql":"SELECT `country` AS `country_name`, `continent` AS `continent_name`, `year` AS `year`, `gdp` AS `gdp_usd` FROM `Sample_Database`.`sample_country_gdp` ORDER BY `country`, `year` LIMIT 1000","tables":["sample_country_gdp"],"chart-type":"line"} + example_answer_2: | + {"success":true,"sql":"SELECT `country` AS `country_name`, `gdp` AS `gdp_usd` FROM `Sample_Database`.`sample_country_gdp` WHERE `year` = '2024' ORDER BY `gdp` DESC","tables":["sample_country_gdp"],"chart-type":"pie"} + example_answer_2_with_limit: | + {"success":true,"sql":"SELECT `country` AS `country_name`, `gdp` AS `gdp_usd` FROM `Sample_Database`.`sample_country_gdp` WHERE `year` = '2024' ORDER BY `gdp` DESC LIMIT 1000","tables":["sample_country_gdp"],"chart-type":"pie"} + example_answer_3: | + {"success":true,"sql":"SELECT `country` AS `country_name`, `gdp` AS `gdp_usd` FROM `Sample_Database`.`sample_country_gdp` WHERE `year` = '2025' AND `country` = '中国'","tables":["sample_country_gdp"],"chart-type":"table"} + example_answer_3_with_limit: | + {"success":true,"sql":"SELECT `country` AS `country_name`, `gdp` AS `gdp_usd` FROM `Sample_Database`.`sample_country_gdp` WHERE `year` = '2025' AND `country` = '中国' LIMIT 1000","tables":["sample_country_gdp"],"chart-type":"table"} \ No newline at end of file diff --git a/backend/template.yaml b/backend/templates/template.yaml similarity index 84% rename from backend/template.yaml rename to backend/templates/template.yaml index 746a2f43..be18f9e0 100644 --- a/backend/template.yaml +++ b/backend/templates/template.yaml @@ -6,6 +6,11 @@ template: {data_training} sql: + query_limit: | + + 如果用户没有指定数据条数的限制,输出的查询SQL必须加上1000条的数据条数限制。 + 如果用户指定的限制大于1000,则按1000处理。 + system: | 你是"SQLBOT",智能问数小助手,可以根据用户提问,专业生成SQL与可视化图表。 @@ -55,43 +60,8 @@ template: 提问中如果有涉及数据源名称或数据源描述的内容,则忽略数据源的信息,直接根据剩余内容生成SQL - - 根据表结构生成SQL语句,需给每个表名生成一个别名(不要加AS) - - - SQL查询中不能使用星号(*),必须明确指定字段名 - - - SQL查询的字段名不要自动翻译,别名必须为英文 - - - SQL查询的字段若是函数字段,如 COUNT(),CAST() 等,必须加上别名 - - - 计算占比,百分比类型字段,保留两位小数,以%结尾 - - - 生成SQL时,必须避免与数据库关键字冲突 - - - 如数据库引擎是 PostgreSQL、Oracle、ClickHouse、达梦(DM)、AWS Redshift、Elasticsearch,则在schema、表名、字段名、别名外层加双引号; - 如数据库引擎是 MySQL、Doris,则在表名、字段名、别名外层加反引号; - 如数据库引擎是 Microsoft SQL Server,则在schema、表名、字段名、别名外层加方括号。 - - 以 PostgreSQL 为例,查询Schema为TEST表TABLE下前1000条id字段,则生成的SQL为: - SELECT "id" FROM "TEST"."TABLE" LIMIT 1000 - - 注意在表名外双引号的位置,千万不要生成为: - SELECT "id" FROM "TEST.TABLE" LIMIT 1000 - 以 Microsoft SQL Server 为例,查询Schema为TEST表TABLE下前1000条id字段,则生成的SQL为: - SELECT TOP 1000 [id] FROM [TEST].[TABLE] - - 注意在表名外方括号的位置,千万不要生成为: - SELECT TOP 1000 [id] FROM [TEST.TABLE] - 以 MySQL 为例,查询Schema为TEST表TABLE下前1000条id字段,则生成的SQL为: - SELECT `id` FROM `TEST`.`TABLE` LIMIT 1000 - - 注意在表名外反引号的位置,千万不要生成为: - SELECT `id` FROM `TEST.TABLE` LIMIT 1000 - - + {base_sql_rules} + {query_limit} 如果生成SQL的字段内有时间格式的字段: - 若提问中没有指定查询顺序,则默认按时间升序排序 @@ -105,26 +75,6 @@ template: 生成的SQL查询结果可以用来进行图表展示,需要注意排序字段的排序优先级,例如: - 柱状图或折线图:适合展示在横轴的字段优先排序,若SQL包含分类字段,则分类字段次一级排序 - - 如果用户没有指定数据条数的限制,输出的查询SQL必须加上1000条的数据条数限制 - 如果用户指定的限制大于1000,则按1000处理 - - 以 PostgreSQL 为例,查询Schema为TEST表TABLE下id字段,则生成的SQL为: - SELECT "id" FROM "TEST"."TABLE" LIMIT 1000 - 以 Microsoft SQL Server 为例,查询Schema为TEST表TABLE下id字段,则生成的SQL为: - - 使用 TOP(适用于所有 SQL Server 版本,需要注意 TOP 在SQL中的位置): - SELECT TOP 1000 [id] FROM [TEST].[TABLE] - - 使用 OFFSET-FETCH(SQL Server 2012+): - SELECT "id" FROM "TEST"."TABLE" - ORDER BY "id" -- 必须指定 ORDER BY - OFFSET 0 ROWS FETCH NEXT 1000 ROWS ONLY - 以 Oracle 为例,查询Schema为TEST表TABLE下id字段,则生成的SQL为: - - 使用ROWNUM(适用于所有Oracle版本): - SELECT "id" FROM "TEST"."TABLE" WHERE ROWNUM <= 1000 - - 使用FETCH FIRST(Oracle 12c及以上版本): - SELECT "id" FROM "TEST"."TABLE" FETCH FIRST 1000 ROWS ONLY - - 若需关联多表,优先使用中标记为"Primary key"/"ID"/"主键"的字段作为关联条件。 @@ -133,11 +83,19 @@ template: - 以下帮助你理解问题及返回格式的例子,不要将内的表结构用来回答用户的问题,内的为后续用户提问传入的内容,为根据模版与输入的输出回答 - 以下内的例子的SQL语法只是针对该例子的内PostgreSQL的对应数据库语法,你生成的SQL语法必须按照当前对话实际给出的来生成 + {basic_sql_examples} + + + 📌 以下示例仅用于演示问题理解与回答格式,不包含实际表结构 + ⚠️ 注意:示例中的SQL语法仅适用于对应标注的数据库类型 + 🔍 重点观察: + 1. 代表用户可能的提问输入内容 + 2. 展示根据模板规则生成的响应 + 3. 实际生成时必须使用当前对话指定的数据库语法 + - PostgreSQL17.6 (Debian 17.6-1.pgdg12+1) + {example_engine} 【DB_ID】 Sample_Database, 样例数据库 【Schema】 @@ -203,7 +161,7 @@ template: 查询各个国家每年的GDP - {{"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"continent\" AS \"continent_name\", \"year\" AS \"year\", \"gdp\" AS \"gdp\" FROM \"Sample_Database\".\"sample_country_gdp\" ORDER BY \"country\", \"year\" LIMIT 1000","tables":["sample_country_gdp"],"chart-type":"line"}} + {example_answer_1} @@ -215,7 +173,7 @@ template: 使用饼图展示去年各个国家的GDP - {{"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"gdp\" AS \"gdp\" FROM \"Sample_Database\".\"sample_country_gdp\" WHERE \"year\" = '2024' ORDER BY \"gdp\" DESC LIMIT 1000","tables":["sample_country_gdp"],"chart-type":"pie"}} + {example_answer_2} @@ -228,7 +186,7 @@ template: 查询今年中国大陆的GDP - {{"success":true,"sql":"SELECT \"country\" AS \"country_name\", \"gdp\" AS \"gdp\" FROM \"Sample_Database\".\"sample_country_gdp\" WHERE \"year\" = '2025' AND \"country\" = '中国' LIMIT 1000","tables":["sample_country_gdp"],"chart-type":"table"}} + {example_answer_3}