diff --git a/backend/apps/chat/api/chat.py b/backend/apps/chat/api/chat.py index b71568dd..dc446685 100644 --- a/backend/apps/chat/api/chat.py +++ b/backend/apps/chat/api/chat.py @@ -14,6 +14,7 @@ from apps.chat.models.chat_model import CreateChat, ChatRecord, RenameChat, ChatQuestion, AxisObj from apps.chat.task.llm import LLMService from common.core.deps import CurrentAssistant, SessionDep, CurrentUser, Trans +from common.utils.data_format import DataFormat router = APIRouter(tags=["Data Q&A"], prefix="/chat") @@ -245,9 +246,9 @@ async def export_excel(session: SessionDep, chat_record_id: int, trans: Trans): def inner(): - data_list = LLMService.convert_large_numbers_in_object_array(_data + _predict_data) + data_list = DataFormat.convert_large_numbers_in_object_array(_data + _predict_data) - md_data, _fields_list = LLMService.convert_object_array_for_pandas(fields, data_list) + md_data, _fields_list = DataFormat.convert_object_array_for_pandas(fields, data_list) # data, _fields_list, col_formats = LLMService.format_pd_data(fields, _data + _predict_data) diff --git a/backend/apps/chat/task/llm.py b/backend/apps/chat/task/llm.py index 242f10cc..801cbf16 100644 --- a/backend/apps/chat/task/llm.py +++ b/backend/apps/chat/task/llm.py @@ -45,6 +45,7 @@ from common.core.db import engine from common.core.deps import CurrentAssistant, CurrentUser from common.error import SingleMessageError, SQLBotDBError, ParseSQLResultError, SQLBotDBConnectionError +from common.utils.data_format import DataFormat from common.utils.utils import SQLBotLogUtil, extract_nested_json, prepare_for_orjson warnings.filterwarnings("ignore") @@ -1039,7 +1040,7 @@ def run_task(self, in_chat: bool = True, stream: bool = True, result = self.execute_sql(sql=real_execute_sql) - _data = self.convert_large_numbers_in_object_array(result.get('data')) + _data = DataFormat.convert_large_numbers_in_object_array(result.get('data')) result["data"] = _data self.save_sql_data(session=_session, data_obj=result) @@ -1057,7 +1058,7 @@ def run_task(self, in_chat: bool = True, stream: bool = True, for field in result.get('fields'): _column_list.append(AxisObj(name=field, value=field)) - md_data, _fields_list = self.convert_object_array_for_pandas(_column_list, result.get('data')) + md_data, _fields_list = DataFormat.convert_object_array_for_pandas(_column_list, result.get('data')) # data, _fields_list, col_formats = self.format_pd_data(_column_list, result.get('data')) @@ -1065,7 +1066,7 @@ def run_task(self, in_chat: bool = True, stream: bool = True, yield 'The SQL execution result is empty.\n\n' else: df = pd.DataFrame(_data, columns=_fields_list) - df_safe = self.safe_convert_to_string(df) + df_safe = DataFormat.safe_convert_to_string(df) markdown_table = df_safe.to_markdown(index=False) yield markdown_table + '\n\n' else: @@ -1115,7 +1116,7 @@ def run_task(self, in_chat: bool = True, stream: bool = True, _column_list.append( AxisObj(name=field if not _fields.get(field) else _fields.get(field), value=field)) - md_data, _fields_list = self.convert_object_array_for_pandas(_column_list, result.get('data')) + md_data, _fields_list = DataFormat.convert_object_array_for_pandas(_column_list, result.get('data')) # data, _fields_list, col_formats = self.format_pd_data(_column_list, result.get('data')) @@ -1123,7 +1124,7 @@ def run_task(self, in_chat: bool = True, stream: bool = True, yield 'The SQL execution result is empty.\n\n' else: df = pd.DataFrame(md_data, columns=_fields_list) - df_safe = self.safe_convert_to_string(df) + df_safe = DataFormat.safe_convert_to_string(df) markdown_table = df_safe.to_markdown(index=False) yield markdown_table + '\n\n' @@ -1176,120 +1177,7 @@ def run_task(self, in_chat: bool = True, stream: bool = True, self.finish(_session) session_maker.remove() - @staticmethod - def safe_convert_to_string(df): - df_copy = df.copy() - - def format_value(x): - if pd.isna(x): - return "" - - return "\u200b" + str(x) - - for col in df_copy.columns: - df_copy[col] = df_copy[col].apply(format_value) - - return df_copy - - @staticmethod - def convert_large_numbers_in_object_array(obj_array, int_threshold=1e15, float_threshold=1e10): - """处理对象数组,将每个对象中的大数字转换为字符串""" - - def format_float_without_scientific(value): - """格式化浮点数,避免科学记数法""" - if value == 0: - return "0" - formatted = f"{value:.15f}" - if '.' in formatted: - formatted = formatted.rstrip('0').rstrip('.') - return formatted - - def process_object(obj): - """处理单个对象""" - if not isinstance(obj, dict): - return obj - - processed_obj = {} - for key, value in obj.items(): - if isinstance(value, (int, float)): - # 只转换大数字 - if isinstance(value, int) and abs(value) >= int_threshold: - processed_obj[key] = str(value) - elif isinstance(value, float) and (abs(value) >= float_threshold or abs(value) < 1e-6): - processed_obj[key] = format_float_without_scientific(value) - else: - processed_obj[key] = value - elif isinstance(value, dict): - # 处理嵌套对象 - processed_obj[key] = process_object(value) - elif isinstance(value, list): - # 处理对象中的数组 - processed_obj[key] = [process_item(item) for item in value] - else: - processed_obj[key] = value - return processed_obj - - def process_item(item): - """处理数组中的项目""" - if isinstance(item, dict): - return process_object(item) - return item - - return [process_item(obj) for obj in obj_array] - @staticmethod - def convert_object_array_for_pandas(column_list: list, data_list: list): - _fields_list = [] - for field_idx, field in enumerate(column_list): - _fields_list.append(field.name) - - md_data = [] - for inner_data in data_list: - _row = [] - for field_idx, field in enumerate(column_list): - value = inner_data.get(field.value) - _row.append(value) - md_data.append(_row) - return md_data, _fields_list - - @staticmethod - def format_pd_data(column_list: list, data_list: list, col_formats: dict = None): - # 预处理数据并记录每列的格式类型 - # 格式类型:'text'(文本)、'number'(数字)、'default'(默认) - _fields_list = [] - - if col_formats is None: - col_formats = {} - for field_idx, field in enumerate(column_list): - _fields_list.append(field.name) - col_formats[field_idx] = 'default' # 默认不特殊处理 - - data = [] - - for _data in data_list: - _row = [] - for field_idx, field in enumerate(column_list): - value = _data.get(field.value) - if value is not None: - # 检查是否为数字且需要特殊处理 - if isinstance(value, (int, float)): - # 整数且超过15位 → 转字符串并标记为文本列 - if isinstance(value, int) and len(str(abs(value))) > 15: - value = str(value) - col_formats[field_idx] = 'text' - # 小数且超过15位有效数字 → 转字符串并标记为文本列 - elif isinstance(value, float): - decimal_str = format(value, '.16f').rstrip('0').rstrip('.') - if len(decimal_str) > 15: - value = str(value) - col_formats[field_idx] = 'text' - # 其他数字列标记为数字格式(避免科学记数法) - elif col_formats[field_idx] != 'text': - col_formats[field_idx] = 'number' - _row.append(value) - data.append(_row) - - return data, _fields_list, col_formats def run_recommend_questions_task_async(self): self.future = executor.submit(self.run_recommend_questions_task_cache) diff --git a/backend/apps/data_training/api/data_training.py b/backend/apps/data_training/api/data_training.py index 58f4a1f9..071dcdd1 100644 --- a/backend/apps/data_training/api/data_training.py +++ b/backend/apps/data_training/api/data_training.py @@ -7,11 +7,11 @@ from fastapi.responses import StreamingResponse from apps.chat.models.chat_model import AxisObj -from apps.chat.task.llm import LLMService from apps.data_training.curd.data_training import page_data_training, create_training, update_training, delete_training, \ enable_training, get_all_data_training from apps.data_training.models.data_training_model import DataTrainingInfo from common.core.deps import SessionDep, CurrentUser, Trans +from common.utils.data_format import DataFormat router = APIRouter(tags=["DataTraining"], prefix="/system/data-training") @@ -53,9 +53,9 @@ async def enable(session: SessionDep, id: int, enabled: bool, trans: Trans): @router.get("/export") async def export_excel(session: SessionDep, trans: Trans, current_user: CurrentUser, - word: Optional[str] = Query(None, description="搜索术语(可选)")): + question: Optional[str] = Query(None, description="搜索术语(可选)")): def inner(): - _list = get_all_data_training(session, word, oid=current_user.oid) + _list = get_all_data_training(session, question, oid=current_user.oid) data_list = [] for obj in _list: @@ -75,7 +75,7 @@ def inner(): fields.append( AxisObj(name=trans('i18n_data_training.advanced_application'), value='advanced_application_name')) - md_data, _fields_list = LLMService.convert_object_array_for_pandas(fields, data_list) + md_data, _fields_list = DataFormat.convert_object_array_for_pandas(fields, data_list) df = pd.DataFrame(md_data, columns=_fields_list) diff --git a/backend/apps/terminology/api/terminology.py b/backend/apps/terminology/api/terminology.py index 42bd5389..7b8b573e 100644 --- a/backend/apps/terminology/api/terminology.py +++ b/backend/apps/terminology/api/terminology.py @@ -7,11 +7,11 @@ from fastapi.responses import StreamingResponse from apps.chat.models.chat_model import AxisObj -from apps.chat.task.llm import LLMService from apps.terminology.curd.terminology import page_terminology, create_terminology, update_terminology, \ delete_terminology, enable_terminology, get_all_terminology from apps.terminology.models.terminology_model import TerminologyInfo from common.core.deps import SessionDep, CurrentUser, Trans +from common.utils.data_format import DataFormat router = APIRouter(tags=["Terminology"], prefix="/system/terminology") @@ -62,8 +62,8 @@ def inner(): "word": obj.word, "other_words": ', '.join(obj.other_words) if obj.other_words else '', "description": obj.description, - "all_data_sources": 'Y' if obj.specific_ds else 'N', - "datasource": ', '.join(obj.datasource_names) if obj.datasource_names else '', + "all_data_sources": 'N' if obj.specific_ds else 'Y', + "datasource": ', '.join(obj.datasource_names) if obj.datasource_names and obj.specific_ds else '', } data_list.append(_data) @@ -74,7 +74,7 @@ def inner(): fields.append(AxisObj(name=trans('i18n_terminology.effective_data_sources'), value='datasource')) fields.append(AxisObj(name=trans('i18n_terminology.all_data_sources'), value='all_data_sources')) - md_data, _fields_list = LLMService.convert_object_array_for_pandas(fields, data_list) + md_data, _fields_list = DataFormat.convert_object_array_for_pandas(fields, data_list) df = pd.DataFrame(md_data, columns=_fields_list) diff --git a/backend/common/utils/data_format.py b/backend/common/utils/data_format.py new file mode 100644 index 00000000..63037fa3 --- /dev/null +++ b/backend/common/utils/data_format.py @@ -0,0 +1,117 @@ +import pandas as pd + +class DataFormat: + @staticmethod + def safe_convert_to_string(df): + df_copy = df.copy() + + def format_value(x): + if pd.isna(x): + return "" + + return "\u200b" + str(x) + + for col in df_copy.columns: + df_copy[col] = df_copy[col].apply(format_value) + + return df_copy + + @staticmethod + def convert_large_numbers_in_object_array(obj_array, int_threshold=1e15, float_threshold=1e10): + """处理对象数组,将每个对象中的大数字转换为字符串""" + + def format_float_without_scientific(value): + """格式化浮点数,避免科学记数法""" + if value == 0: + return "0" + formatted = f"{value:.15f}" + if '.' in formatted: + formatted = formatted.rstrip('0').rstrip('.') + return formatted + + def process_object(obj): + """处理单个对象""" + if not isinstance(obj, dict): + return obj + + processed_obj = {} + for key, value in obj.items(): + if isinstance(value, (int, float)): + # 只转换大数字 + if isinstance(value, int) and abs(value) >= int_threshold: + processed_obj[key] = str(value) + elif isinstance(value, float) and (abs(value) >= float_threshold or abs(value) < 1e-6): + processed_obj[key] = format_float_without_scientific(value) + else: + processed_obj[key] = value + elif isinstance(value, dict): + # 处理嵌套对象 + processed_obj[key] = process_object(value) + elif isinstance(value, list): + # 处理对象中的数组 + processed_obj[key] = [process_item(item) for item in value] + else: + processed_obj[key] = value + return processed_obj + + def process_item(item): + """处理数组中的项目""" + if isinstance(item, dict): + return process_object(item) + return item + + return [process_item(obj) for obj in obj_array] + + @staticmethod + def convert_object_array_for_pandas(column_list: list, data_list: list): + _fields_list = [] + for field_idx, field in enumerate(column_list): + _fields_list.append(field.name) + + md_data = [] + for inner_data in data_list: + _row = [] + for field_idx, field in enumerate(column_list): + value = inner_data.get(field.value) + _row.append(value) + md_data.append(_row) + return md_data, _fields_list + + @staticmethod + def format_pd_data(column_list: list, data_list: list, col_formats: dict = None): + # 预处理数据并记录每列的格式类型 + # 格式类型:'text'(文本)、'number'(数字)、'default'(默认) + _fields_list = [] + + if col_formats is None: + col_formats = {} + for field_idx, field in enumerate(column_list): + _fields_list.append(field.name) + col_formats[field_idx] = 'default' # 默认不特殊处理 + + data = [] + + for _data in data_list: + _row = [] + for field_idx, field in enumerate(column_list): + value = _data.get(field.value) + if value is not None: + # 检查是否为数字且需要特殊处理 + if isinstance(value, (int, float)): + # 整数且超过15位 → 转字符串并标记为文本列 + if isinstance(value, int) and len(str(abs(value))) > 15: + value = str(value) + col_formats[field_idx] = 'text' + # 小数且超过15位有效数字 → 转字符串并标记为文本列 + elif isinstance(value, float): + decimal_str = format(value, '.16f').rstrip('0').rstrip('.') + if len(decimal_str) > 15: + value = str(value) + col_formats[field_idx] = 'text' + # 其他数字列标记为数字格式(避免科学记数法) + elif col_formats[field_idx] != 'text': + col_formats[field_idx] = 'number' + _row.append(value) + data.append(_row) + + return data, _fields_list, col_formats \ No newline at end of file diff --git a/backend/locales/en.json b/backend/locales/en.json index 44cc60ed..d519de8c 100644 --- a/backend/locales/en.json +++ b/backend/locales/en.json @@ -62,7 +62,11 @@ }, "i18n_custom_prompt": { "exists_in_db": "Template name already exists", - "not_exists": "This template does not exist" + "not_exists": "This template does not exist", + "prompt_word_name": "Prompt word name", + "prompt_word_content": "Prompt word content", + "effective_data_sources": "Effective Data Sources", + "all_data_sources": "All Data Sources" }, "i18n_excel_export": { "data_is_empty": "Form data is empty, unable to export data" diff --git a/backend/locales/ko-KR.json b/backend/locales/ko-KR.json index ce87967f..4e3a4e6c 100644 --- a/backend/locales/ko-KR.json +++ b/backend/locales/ko-KR.json @@ -62,7 +62,11 @@ }, "i18n_custom_prompt": { "exists_in_db": "템플릿 이름이 이미 존재합니다", - "not_exists": "이 템플릿이 존재하지 않습니다" + "not_exists": "이 템플릿이 존재하지 않습니다", + "prompt_word_name": "프롬프트 이름", + "prompt_word_content": "프롬프트 내용", + "effective_data_sources": "유효 데이터 소스", + "all_data_sources": "모든 데이터 소스" }, "i18n_excel_export": { "data_is_empty": "폼 데이터가 비어 있어 데이터를 내보낼 수 없습니다" diff --git a/backend/locales/zh-CN.json b/backend/locales/zh-CN.json index 1b057e0a..113c9029 100644 --- a/backend/locales/zh-CN.json +++ b/backend/locales/zh-CN.json @@ -62,7 +62,11 @@ }, "i18n_custom_prompt": { "exists_in_db": "模版名称已存在", - "not_exists": "该模版不存在" + "not_exists": "该模版不存在", + "prompt_word_name": "提示词名称", + "prompt_word_content": "提示词内容", + "effective_data_sources": "生效数据源", + "all_data_sources": "所有数据源" }, "i18n_excel_export": { "data_is_empty": "表单数据为空,无法导出数据" diff --git a/backend/pyproject.toml b/backend/pyproject.toml index fbb0c684..4c77ea01 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -39,7 +39,7 @@ dependencies = [ "pyyaml (>=6.0.2,<7.0.0)", "fastapi-mcp (>=0.3.4,<0.4.0)", "tabulate>=0.9.0", - "sqlbot-xpack>=0.0.3.44,<1.0.0", + "sqlbot-xpack>=0.0.3.45,<1.0.0", "fastapi-cache2>=0.2.2", "sqlparse>=0.5.3", "redis>=6.2.0", diff --git a/frontend/src/api/prompt.ts b/frontend/src/api/prompt.ts index 01d73488..316b64e3 100644 --- a/frontend/src/api/prompt.ts +++ b/frontend/src/api/prompt.ts @@ -8,4 +8,10 @@ export const promptApi = { updateEmbedded: (data: any) => request.put(`/system/custom_prompt`, data), deleteEmbedded: (params: any) => request.delete('/system/custom_prompt', { data: params }), getOne: (id: any) => request.get(`/system/custom_prompt/${id}`), + export2Excel: (type: any, params: any) => + request.get(`/system/custom_prompt/${type}/export`, { + params, + responseType: 'blob', + requestOptions: { customError: true }, + }), } diff --git a/frontend/src/i18n/en.json b/frontend/src/i18n/en.json index f0c0edcd..76b13959 100644 --- a/frontend/src/i18n/en.json +++ b/frontend/src/i18n/en.json @@ -22,6 +22,7 @@ "replaced_with": "Please enter a prompt word. Example: When returning names, only the last name will be displayed, and the first name will be replaced with *.", "loss_exercise_caution": "Entering high-risk operations such as deleting a database or table or modifying the table structure may result in permanent data loss. Exercise caution!", "edit_prompt_word": "Edit prompt word", + "all_236_terms": "Export all {msg} {type} prompt records?", "prompt_word_name_de": "Do you want to delete the prompt word: {msg}?" }, "training": { diff --git a/frontend/src/i18n/ko-KR.json b/frontend/src/i18n/ko-KR.json index 09f19bbd..852eb721 100644 --- a/frontend/src/i18n/ko-KR.json +++ b/frontend/src/i18n/ko-KR.json @@ -22,6 +22,7 @@ "replaced_with": "프롬프트를 입력하세요. 예: 성명을 반환할 때 성씨만 표시하고 이름은 *로 대체", "loss_exercise_caution": "데이터베이스 삭제, 데이터 테이블 삭제 또는 테이블 구조 수정과 같은 고위험 작업 명령을 입력하면 데이터가 영구적으로 손실될 수 있으니 신중하게 작업하십시오!", "edit_prompt_word": "프롬프트 편집", + "all_236_terms": "모든 {msg}개의 {type} 프롬프트 기록을 내보내시겠습니까?", "prompt_word_name_de": "프롬프트를 삭제하시겠습니까: {msg}?" }, "training": { diff --git a/frontend/src/i18n/zh-CN.json b/frontend/src/i18n/zh-CN.json index e95b659b..40ee294f 100644 --- a/frontend/src/i18n/zh-CN.json +++ b/frontend/src/i18n/zh-CN.json @@ -22,6 +22,7 @@ "replaced_with": "请输入提示词,示例:返回姓名时,只显示姓氏,名字用*代替", "loss_exercise_caution": "若输入删除数据库、数据表或修改表结构等高风险的操作指令,可能导致数据永久丢失,请务必谨慎!", "edit_prompt_word": "编辑提示词", + "all_236_terms": "是否导出全部 {msg} 条{type}提示词?", "prompt_word_name_de": "是否删除提示词:{msg}?" }, "training": { diff --git a/frontend/src/views/system/prompt/index.vue b/frontend/src/views/system/prompt/index.vue index a6422a1f..1697f2dd 100644 --- a/frontend/src/views/system/prompt/index.vue +++ b/frontend/src/views/system/prompt/index.vue @@ -79,44 +79,67 @@ const cancelDelete = () => { checkAll.value = false isIndeterminate.value = false } -const exportBatchUser = () => { - ElMessageBox.confirm( - t('professional.selected_2_terms_de', { msg: multipleSelectionAll.value.length }), - { - confirmButtonType: 'primary', - confirmButtonText: t('professional.export'), - cancelButtonText: t('common.cancel'), - customClass: 'confirm-no_icon', - autofocus: false, - } - ).then(() => { - promptApi.deleteEmbedded(multipleSelectionAll.value.map((ele) => ele.id)).then(() => { - ElMessage({ - type: 'success', - message: t('dashboard.delete_success'), - }) - multipleSelectionAll.value = [] - search() - }) - }) -} -const exportAllUser = () => { - ElMessageBox.confirm(t('professional.all_236_terms', { msg: pageInfo.total }), { +const exportExcel = () => { + let title = '' + if (currentType.value === 'GENERATE_SQL') { + title = t('prompt.ask_sql') + } + if (currentType.value === 'ANALYSIS') { + title = t('prompt.data_analysis') + } + if (currentType.value === 'PREDICT_DATA') { + title = t('prompt.data_prediction') + } + ElMessageBox.confirm(t('prompt.all_236_terms', { msg: pageInfo.total, type: title }), { confirmButtonType: 'primary', confirmButtonText: t('professional.export'), cancelButtonText: t('common.cancel'), customClass: 'confirm-no_icon', autofocus: false, }).then(() => { - promptApi.deleteEmbedded(multipleSelectionAll.value.map((ele) => ele.id)).then(() => { - ElMessage({ - type: 'success', - message: t('dashboard.delete_success'), + searchLoading.value = true + promptApi + .export2Excel(currentType.value, keywords.value ? { name: keywords.value } : {}) + .then((res) => { + const blob = new Blob([res], { + type: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + }) + const link = document.createElement('a') + link.href = URL.createObjectURL(blob) + link.download = `${title}.xlsx` + document.body.appendChild(link) + link.click() + document.body.removeChild(link) + }) + .catch(async (error) => { + if (error.response) { + try { + let text = await error.response.data.text() + try { + text = JSON.parse(text) + } finally { + ElMessage({ + message: text, + type: 'error', + showClose: true, + }) + } + } catch (e) { + console.error('Error processing error response:', e) + } + } else { + console.error('Other error:', error) + ElMessage({ + message: error, + type: 'error', + showClose: true, + }) + } + }) + .finally(() => { + searchLoading.value = false }) - multipleSelectionAll.value = [] - search() - }) }) } const deleteBatchUser = () => { @@ -374,20 +397,18 @@ const typeChange = (val: any) => { - + + + {{ $t('professional.export_all') }} + + + + {{ $t('user.batch_import') }} +