From e867ec45736f5ec9b714209c4adc9c9aa11dec38 Mon Sep 17 00:00:00 2001 From: Chenglong Wang Date: Tue, 1 Jul 2025 17:53:14 -0700 Subject: [PATCH 1/6] model dialog prep --- src/app/dfSlice.tsx | 46 +++- src/views/ModelSelectionDialog.tsx | 346 ++++++++++++++++++++++------- 2 files changed, 297 insertions(+), 95 deletions(-) diff --git a/src/app/dfSlice.tsx b/src/app/dfSlice.tsx index 201e0961..046cb629 100644 --- a/src/app/dfSlice.tsx +++ b/src/app/dfSlice.tsx @@ -44,11 +44,19 @@ export interface ModelConfig { api_version?: string; } +// Define model slot types +export type ModelSlotType = 'generation' | 'hint'; + +export interface ModelSlots { + generation?: string; // model id assigned to generation tasks + hint?: string; // model id assigned to hint tasks +} + // Define a type for the slice state export interface DataFormulatorState { sessionId: string | undefined; models: ModelConfig[]; - selectedModelId: string | undefined; + modelSlots: ModelSlots; testedModels: {id: string, status: 'ok' | 'error' | 'testing' | 'unknown', message: string}[]; tables : DictTable[]; @@ -89,7 +97,7 @@ export interface DataFormulatorState { const initialState: DataFormulatorState = { sessionId: undefined, models: [], - selectedModelId: undefined, + modelSlots: {}, testedModels: [], tables: [], @@ -263,7 +271,7 @@ export const dataFormulatorSlice = createSlice({ // avoid resetting inputted models // state.oaiModels = state.oaiModels.filter((m: any) => m.endpoint != 'default'); - state.selectedModelId = state.models.length > 0 ? state.models[0].id : undefined; + state.modelSlots = {}; state.testedModels = []; state.tables = []; @@ -289,7 +297,7 @@ export const dataFormulatorSlice = createSlice({ let savedState = action.payload; state.models = savedState.models; - state.selectedModelId = savedState.selectedModelId; + state.modelSlots = savedState.modelSlots || {}; state.testedModels = []; // models should be tested again //state.table = undefined; @@ -318,16 +326,25 @@ export const dataFormulatorSlice = createSlice({ state.config = action.payload; }, selectModel: (state, action: PayloadAction) => { - state.selectedModelId = action.payload; + state.modelSlots = { ...state.modelSlots, generation: action.payload }; + }, + setModelSlot: (state, action: PayloadAction<{slotType: ModelSlotType, modelId: string | undefined}>) => { + state.modelSlots = { ...state.modelSlots, [action.payload.slotType]: action.payload.modelId }; + }, + setModelSlots: (state, action: PayloadAction) => { + state.modelSlots = action.payload; }, addModel: (state, action: PayloadAction) => { state.models = [...state.models, action.payload]; }, removeModel: (state, action: PayloadAction) => { state.models = state.models.filter(model => model.id != action.payload); - if (state.selectedModelId == action.payload) { - state.selectedModelId = undefined; - } + // Remove the model from all slots if it's assigned + Object.keys(state.modelSlots).forEach(slotType => { + if (state.modelSlots[slotType as ModelSlotType] === action.payload) { + state.modelSlots[slotType as ModelSlotType] = undefined; + } + }); }, updateModelStatus: (state, action: PayloadAction<{id: string, status: 'ok' | 'error' | 'testing' | 'unknown', message: string}>) => { let id = action.payload.id; @@ -743,8 +760,8 @@ export const dataFormulatorSlice = createSlice({ ...state.testedModels.filter(t => !defaultModels.map((m: ModelConfig) => m.id).includes(t.id)) ] - if (state.selectedModelId == undefined && defaultModels.length > 0) { - state.selectedModelId = defaultModels[0].id; + if (state.modelSlots.generation == undefined && defaultModels.length > 0) { + state.modelSlots.generation = defaultModels[0].id; } // console.log("load model complete"); @@ -769,7 +786,14 @@ export const dataFormulatorSlice = createSlice({ export const dfSelectors = { getActiveModel: (state: DataFormulatorState) : ModelConfig => { - return state.models.find(m => m.id == state.selectedModelId) || state.models[0]; + return state.models.find(m => m.id == state.modelSlots.generation) || state.models[0]; + }, + getModelBySlot: (state: DataFormulatorState, slotType: ModelSlotType) : ModelConfig | undefined => { + const modelId = state.modelSlots[slotType]; + return modelId ? state.models.find(m => m.id === modelId) : undefined; + }, + getAllSlotTypes: () : ModelSlotType[] => { + return ['generation', 'hint']; }, getActiveBaseTableIds: (state: DataFormulatorState) => { let focusedTableId = state.focusedTableId; diff --git a/src/views/ModelSelectionDialog.tsx b/src/views/ModelSelectionDialog.tsx index fcfbaf48..3b18c978 100644 --- a/src/views/ModelSelectionDialog.tsx +++ b/src/views/ModelSelectionDialog.tsx @@ -9,6 +9,9 @@ import { DataFormulatorState, dfActions, ModelConfig, + ModelSlots, + ModelSlotType, + dfSelectors, } from '../app/dfSlice' import _ from 'lodash'; @@ -38,10 +41,11 @@ import { MenuItem, OutlinedInput, Paper, + Box, } from '@mui/material'; -import { styled } from '@mui/material/styles'; +import { alpha, styled, useTheme } from '@mui/material/styles'; import SettingsIcon from '@mui/icons-material/Settings'; import AddCircleIcon from '@mui/icons-material/AddCircle'; @@ -66,15 +70,16 @@ const decodeHtmlEntities = (text: string): string => { }; export const ModelSelectionButton: React.FC<{}> = ({ }) => { + const theme = useTheme(); const dispatch = useDispatch(); const models = useSelector((state: DataFormulatorState) => state.models); - const selectedModelId = useSelector((state: DataFormulatorState) => state.selectedModelId); + const modelSlots = useSelector((state: DataFormulatorState) => state.modelSlots); const testedModels = useSelector((state: DataFormulatorState) => state.testedModels); const [modelDialogOpen, setModelDialogOpen] = useState(false); const [showKeys, setShowKeys] = useState(false); - const [tempSelectedModelId, setTempSelectedModelId] = useState(selectedModelId); + const [tempModelSlots, setTempModelSlots] = useState(modelSlots); const [providerModelOptions, setProviderModelOptions] = useState<{[key: string]: string[]}>({ 'openai': [], 'azure': [], @@ -137,6 +142,20 @@ export const ModelSelectionButton: React.FC<{}> = ({ }) => { return id != undefined ? (testedModels.find(t => (t.id == id))?.status || 'unknown') : 'unknown'; } + // Helper functions for slot management + const updateTempSlot = (slotType: ModelSlotType, modelId: string | undefined) => { + setTempModelSlots(prev => ({ ...prev, [slotType]: modelId })); + }; + + const isModelAssignedToSlot = (modelId: string, slotType: ModelSlotType) => { + return tempModelSlots[slotType] === modelId; + }; + + // Ensure tempModelSlots is updated when modelSlots changes + React.useEffect(() => { + setTempModelSlots(modelSlots); + }, [modelSlots]); + const [newEndpoint, setNewEndpoint] = useState(""); // openai, azure, ollama etc const [newModel, setNewModel] = useState(""); const [newApiKey, setNewApiKey] = useState(""); @@ -210,17 +229,99 @@ export const ModelSelectionButton: React.FC<{}> = ({ }) => { let readyToTest = newModel && (newApiKey || newApiBase); + // Create enhanced slot assignment summary component + const SlotAssignmentSummary: React.FC = () => { + const slotTypes = dfSelectors.getAllSlotTypes(); + + return ( + + Model Assignments + + {slotTypes.map(slotType => { + const assignedModelId = tempModelSlots[slotType]; + const assignedModel = assignedModelId ? models.find(m => m.id === assignedModelId) : undefined; + + return ( + + + {slotType} tasks + {slotType == 'generation' ? '' : '(small faster model recommended)'} + + + + + + + {assignedModel && ( + + {assignedModel.endpoint}/{assignedModel.model}{assignedModel.api_base && ` (${assignedModel.api_base})`} + + )} + + ); + })} + + + ); + }; + let newModelEntry = { - event.stopPropagation(); - setTempSelectedModelId(undefined); - }} > - - - = ({ }) => { placeholder="api_version" /> + + {/* Empty cell for Current Assignments */} + = ({ }) => { let model = {endpoint, model: newModel, api_key: newApiKey, api_base: newApiBase, api_version: newApiVersion, id: id}; dispatch(dfActions.addModel(model)); - dispatch(dfActions.selectModel(id)); - setTempSelectedModelId(id); - testModel(model); + // Create a custom test function that assigns to slot on success + const testAndAssignModel = (model: ModelConfig) => { + updateModelStatus(model, 'testing', ""); + let message = { + method: 'POST', + headers: { 'Content-Type': 'application/json', }, + body: JSON.stringify({ + model: model, + }), + }; + fetch(getUrls().TEST_MODEL, {...message }) + .then((response) => response.json()) + .then((data) => { + let status = data["status"] || 'error'; + updateModelStatus(model, status, data["message"] || ""); + // Only assign to slot if test is successful + if (status === 'ok') { + updateTempSlot('generation', id); + } + }).catch((error) => { + updateModelStatus(model, 'error', error.message) + }); + }; + + testAndAssignModel(model); setNewEndpoint(""); setNewModel(""); @@ -411,26 +537,25 @@ export const ModelSelectionButton: React.FC<{}> = ({ }) => { let modelTable = - +
- - provider - api_key - model - api_base - api_version + Provider + API Key + Model + API Base + API Version + Assignments Status Action {models.map((model) => { - let isItemSelected = tempSelectedModelId != undefined && tempSelectedModelId == model.id; let status = getStatus(model.id); - let statusIcon = status == "unknown" ? : ( status == 'testing' ? : - (status == "ok" ? : )) + let statusIcon = status == "unknown" ? : ( status == 'testing' ? : + (status == "ok" ? : )) let message = "the model is ready to use"; if (status == "unknown") { @@ -440,57 +565,115 @@ export const ModelSelectionButton: React.FC<{}> = ({ }) => { message = decodeHtmlEntities(rawMessage); } - const borderStyle = ['error', 'unknown'].includes(status) ? '1px dashed lightgray' : undefined; - const noBorderStyle = ['error', 'unknown'].includes(status) ? 'none' : undefined; + const borderStyle = ['error'].includes(status) ? '1px dashed lightgray' : undefined; + const noBorderStyle = ['error'].includes(status) ? 'none' : undefined; return ( { setTempSelectedModelId(model.id) }} - sx={{ cursor: 'pointer', '& .MuiTableCell-root': { p: 0.5, fontSize: 14 }}} + sx={{ + '& .MuiTableCell-root': { fontSize: '0.75rem' }, + '&:hover': { backgroundColor: '#f8f9fa' }, + backgroundColor: status == 'ok' ? alpha(theme.palette.success.main, 0.07) : '#fff' + }} > - - - - - {model.endpoint} + + + {model.endpoint} + - {model.api_key ? (showKeys ? + {model.api_key ? (showKeys ? {model.api_key} - : "************") - : N/A + : ••••••••••••) + : None } - {model.model} + + {model.model} + - {model.api_base} + {model.api_base ? ( + + {model.api_base} + + ) : ( + + Default + + )} - {model.api_version} + {model.api_version ? ( + + {model.api_version} + + ) : ( + + Default + + )} + + + + {dfSelectors.getAllSlotTypes().map(slotType => { + const isAssigned = isModelAssignedToSlot(model.id, slotType); + return isAssigned ? ( + + {slotType} + + ) : null; + })} + {!dfSelectors.getAllSlotTypes().some(slotType => isModelAssignedToSlot(model.id, slotType)) && ( + + Not assigned + + )} + - + - { testModel(model) }} + sx={{ p: 0.75, fontSize: "0.75rem", textTransform: "none" }} + startIcon={statusIcon} > - {statusIcon} - + {status == 'ok' ? 'ready' : 'test'} + @@ -499,34 +682,24 @@ export const ModelSelectionButton: React.FC<{}> = ({ }) => { size="small" onClick={()=>{ dispatch(dfActions.removeModel(model.id)); - if ((tempSelectedModelId) - && tempSelectedModelId == model.id) { - if (models.length == 0) { - setTempSelectedModelId(undefined); - } else { - let chosenModel = models[models.length - 1]; - setTempSelectedModelId(chosenModel.id) + // Remove from all slots if assigned + dfSelectors.getAllSlotTypes().forEach(slotType => { + if (isModelAssignedToSlot(model.id, slotType)) { + updateTempSlot(slotType, undefined); } - } - }}> - + }); + }} + sx={{ p: 0.75 }} + > + - {['error', 'unknown'].includes(status) && ( - { setTempSelectedModelId(model.id) }} - sx={{ - cursor: 'pointer', - '&:hover': { - backgroundColor: 'rgba(0, 0, 0, 0.04)', - }, - }} - > - - + {['error'].includes(status) && ( + + + {message} @@ -538,13 +711,13 @@ export const ModelSelectionButton: React.FC<{}> = ({ }) => { })} {newModelEntry} - - - Model configuration based on LiteLLM, check out supported endpoint / models here. - If using custom providers that are compatible with the OpenAI API, choose 'openai' as the provider. + + + Configuration: Based on LiteLLM. See supported providers. + Use 'openai' provider for OpenAI-compatible APIs. - - Models with limited code generation capabilities (e.g., llama3.2) may fail frequently to derive new data. + + Note: Models with limited code generation capabilities may fail frequently. @@ -553,9 +726,11 @@ export const ModelSelectionButton: React.FC<{}> = ({ }) => { return <> - + = ({ }) => { } }} > - Select Model + Configure Models for Different Tasks + + + Available Models {modelTable} @@ -578,13 +756,13 @@ export const ModelSelectionButton: React.FC<{}> = ({ }) => { {showKeys ? 'hide' : 'show'} keys )} - + dispatch(dfActions.setModelSlots(tempModelSlots)); + setModelDialogOpen(false);}}>Apply Slot Assignments From 5f51378c08efb192ebf10686b2519314b4018224 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Pe=C3=B1a?= Date: Wed, 2 Jul 2025 10:06:29 +0200 Subject: [PATCH 2/6] Adds support for MySQL port configuration Introduces a new optional parameter for specifying the MySQL server port, defaulting to 3306. Updates connection instructions to include port information for both local and remote connections. Enhances clarity in troubleshooting steps for testing connections with the specified port. --- .../data_formulator/data_loader/mysql_data_loader.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/py-src/data_formulator/data_loader/mysql_data_loader.py b/py-src/data_formulator/data_loader/mysql_data_loader.py index 9de29ea2..4e8d5299 100644 --- a/py-src/data_formulator/data_loader/mysql_data_loader.py +++ b/py-src/data_formulator/data_loader/mysql_data_loader.py @@ -14,6 +14,7 @@ def list_params() -> bool: {"name": "user", "type": "string", "required": True, "default": "root", "description": ""}, {"name": "password", "type": "string", "required": False, "default": "", "description": "leave blank for no password"}, {"name": "host", "type": "string", "required": True, "default": "localhost", "description": ""}, + {"name": "port", "type": "int", "required": False, "default": 3306, "description": "MySQL server port (default 3306)"}, {"name": "database", "type": "string", "required": True, "default": "mysql", "description": ""} ] return params_list @@ -25,11 +26,11 @@ def auth_instructions() -> str: 1. Local MySQL Setup: - Ensure MySQL server is running on your machine - - Default connection: host='localhost', user='root' + - Default connection: host='localhost', user='root', port=3306 - If you haven't set a root password, leave password field empty 2. Remote MySQL Connection: - - Obtain host address, username, and password from your database administrator + - Obtain host address, port, username, and password from your database administrator - Ensure the MySQL server allows remote connections - Check that your IP is whitelisted in MySQL's user permissions @@ -37,11 +38,12 @@ def auth_instructions() -> str: - user: Your MySQL username (default: 'root') - password: Your MySQL password (leave empty if no password set) - host: MySQL server address (default: 'localhost') + - port: MySQL server port (default: 3306) - database: Target database name to connect to 4. Troubleshooting: - Verify MySQL service is running: `brew services list` (macOS) or `sudo systemctl status mysql` (Linux) - - Test connection: `mysql -u [username] -p -h [host] [database]` + - Test connection: `mysql -u [username] -p -h [host] -P [port] [database]` """ def __init__(self, params: Dict[str, Any], duck_db_conn: duckdb.DuckDBPyConnection): @@ -54,7 +56,7 @@ def __init__(self, params: Dict[str, Any], duck_db_conn: duckdb.DuckDBPyConnecti attatch_string = "" for key, value in self.params.items(): - if value: + if value is not None and value != "": attatch_string += f"{key}={value} " # Detach existing mysqldb connection if it exists From feea866bb42d42dbae9b4112bcb8c48b9454ca5e Mon Sep 17 00:00:00 2001 From: Chenglong Wang Date: Wed, 2 Jul 2025 10:51:32 -0700 Subject: [PATCH 3/6] model dialog --- src/app/dfSlice.tsx | 5 ++- src/views/ModelSelectionDialog.tsx | 68 ++++++++++++++++++------------ 2 files changed, 46 insertions(+), 27 deletions(-) diff --git a/src/app/dfSlice.tsx b/src/app/dfSlice.tsx index 046cb629..013abf4d 100644 --- a/src/app/dfSlice.tsx +++ b/src/app/dfSlice.tsx @@ -752,7 +752,10 @@ export const dataFormulatorSlice = createSlice({ state.models = [ ...defaultModels, - ...state.models.filter(e => !defaultModels.map((m: ModelConfig) => m.endpoint).includes(e.endpoint)) + ...state.models.filter(e => !defaultModels.some((m: ModelConfig) => + m.endpoint === e.endpoint && m.model === e.model && + m.api_base === e.api_base && m.api_version === e.api_version + )) ]; state.testedModels = [ diff --git a/src/views/ModelSelectionDialog.tsx b/src/views/ModelSelectionDialog.tsx index 3b18c978..86993131 100644 --- a/src/views/ModelSelectionDialog.tsx +++ b/src/views/ModelSelectionDialog.tsx @@ -242,7 +242,7 @@ export const ModelSelectionButton: React.FC<{}> = ({ }) => { const assignedModel = assignedModelId ? models.find(m => m.id === assignedModelId) : undefined; return ( - = ({ }) => { }} > - {slotType} tasks - {slotType == 'generation' ? '' : '(small faster model recommended)'} + {slotType} tasks @@ -270,17 +269,36 @@ export const ModelSelectionButton: React.FC<{}> = ({ }) => { sx={{ fontSize: '0.875rem' }} renderValue={(selected) => { if (!selected) { - return No model assigned; + return + + + No model assigned + + + + ; } const model = models.find(m => m.id === selected); - return model ? `${model.endpoint}/${model.model}` : 'Unknown model'; + return model ? + + + {model.endpoint}/{model.model} + {model.api_base && ( + + ({model.api_base}) + + )} + + + + : 'Unknown model'; }} > No assignment - {models.filter(m => getStatus(m.id) === 'ok').map((model) => ( - + {models.map((model) => ( + @@ -292,28 +310,21 @@ export const ModelSelectionButton: React.FC<{}> = ({ }) => { )} - + {getStatus(model.id) === 'ok' ? + : getStatus(model.id) === 'error' ? + : } ))} - - {assignedModel && ( - - {assignedModel.endpoint}/{assignedModel.model}{assignedModel.api_base && ` (${assignedModel.api_base})`} - - )} - + ); })} + + Note: Models with strong code generation capabilities is recommended for generation tasks. + ); }; @@ -499,7 +510,11 @@ export const ModelSelectionButton: React.FC<{}> = ({ }) => { updateModelStatus(model, status, data["message"] || ""); // Only assign to slot if test is successful if (status === 'ok') { - updateTempSlot('generation', id); + for (let slotType of dfSelectors.getAllSlotTypes()) { + if (!tempModelSlots[slotType]) { + updateTempSlot(slotType, id); + } + } } }).catch((error) => { updateModelStatus(model, 'error', error.message) @@ -567,6 +582,9 @@ export const ModelSelectionButton: React.FC<{}> = ({ }) => { const borderStyle = ['error'].includes(status) ? '1px dashed lightgray' : undefined; const noBorderStyle = ['error'].includes(status) ? 'none' : undefined; + + // Check if model is assigned to any slot + const isAssignedToAnySlot = dfSelectors.getAllSlotTypes().some(slotType => isModelAssignedToSlot(model.id, slotType)); return ( @@ -575,7 +593,7 @@ export const ModelSelectionButton: React.FC<{}> = ({ }) => { sx={{ '& .MuiTableCell-root': { fontSize: '0.75rem' }, '&:hover': { backgroundColor: '#f8f9fa' }, - backgroundColor: status == 'ok' ? alpha(theme.palette.success.main, 0.07) : '#fff' + backgroundColor: isAssignedToAnySlot ? alpha(theme.palette.success.main, 0.07) : '#fff' }} > @@ -716,9 +734,7 @@ export const ModelSelectionButton: React.FC<{}> = ({ }) => { Configuration: Based on LiteLLM. See supported providers. Use 'openai' provider for OpenAI-compatible APIs. - - Note: Models with limited code generation capabilities may fail frequently. - + From 2aec38c6c4dd23eee4b6f89b22369fbe12cfa50e Mon Sep 17 00:00:00 2001 From: Chenglong Wang Date: Wed, 2 Jul 2025 11:07:49 -0700 Subject: [PATCH 4/6] minor version bump --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 7a55c6b1..5f82bddd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "data_formulator" -version = "0.2.1.4" +version = "0.2.1.5" requires-python = ">=3.9" authors = [ From b2f93981b2cc30ec7d2aa6c39564a2d1267e9fd6 Mon Sep 17 00:00:00 2001 From: Chenglong Wang <93549116+Chenglong-MS@users.noreply.github.com> Date: Wed, 2 Jul 2025 11:12:04 -0700 Subject: [PATCH 5/6] Update src/views/ModelSelectionDialog.tsx Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/views/ModelSelectionDialog.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/views/ModelSelectionDialog.tsx b/src/views/ModelSelectionDialog.tsx index 86993131..3e286f92 100644 --- a/src/views/ModelSelectionDialog.tsx +++ b/src/views/ModelSelectionDialog.tsx @@ -323,7 +323,7 @@ export const ModelSelectionButton: React.FC<{}> = ({ }) => { })} - Note: Models with strong code generation capabilities is recommended for generation tasks. + Note: Models with strong code generation capabilities are recommended for generation tasks. ); From ee195b195790fc13daf743ad980029896faa7d4f Mon Sep 17 00:00:00 2001 From: Chenglong Wang Date: Wed, 2 Jul 2025 11:13:12 -0700 Subject: [PATCH 6/6] typo fix --- py-src/data_formulator/data_loader/mysql_data_loader.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/py-src/data_formulator/data_loader/mysql_data_loader.py b/py-src/data_formulator/data_loader/mysql_data_loader.py index 4e8d5299..e96bb89a 100644 --- a/py-src/data_formulator/data_loader/mysql_data_loader.py +++ b/py-src/data_formulator/data_loader/mysql_data_loader.py @@ -54,17 +54,17 @@ def __init__(self, params: Dict[str, Any], duck_db_conn: duckdb.DuckDBPyConnecti self.duck_db_conn.install_extension("mysql") self.duck_db_conn.load_extension("mysql") - attatch_string = "" + attach_string = "" for key, value in self.params.items(): if value is not None and value != "": - attatch_string += f"{key}={value} " + attach_string += f"{key}={value} " # Detach existing mysqldb connection if it exists try: self.duck_db_conn.execute("DETACH mysqldb;") except: pass # Ignore if mysqldb doesn't exist # Register MySQL connection - self.duck_db_conn.execute(f"ATTACH '{attatch_string}' AS mysqldb (TYPE mysql);") + self.duck_db_conn.execute(f"ATTACH '{attach_string}' AS mysqldb (TYPE mysql);") def list_tables(self, table_filter: str = None): tables_df = self.duck_db_conn.execute(f"""