Skip to content

Commit 4c5cc9d

Browse files
authored
art: remove data quality (#423)
1 parent e6fb6a7 commit 4c5cc9d

3 files changed

Lines changed: 43 additions & 246 deletions

File tree

frontend/src/pages/DataManagement/Detail/components/DataQuality.tsx

Lines changed: 11 additions & 236 deletions
Original file line numberDiff line numberDiff line change
@@ -1,76 +1,9 @@
11
import React, { useMemo } from "react";
2-
import { Card, Table, Progress } from "antd";
3-
import { AlertTriangle, Tags, BarChart3 } from "lucide-react";
2+
import { Card, Table, Progress, Empty } from "antd";
3+
import { Tags, BarChart3 } from "lucide-react";
44
import { Dataset } from "@/pages/DataManagement/dataset.model.ts";
55
import { useTranslation } from "react-i18next";
66

7-
type DatasetType = "image" | "text" | "tabular";
8-
9-
interface FileStats {
10-
totalFiles: number;
11-
corrupted?: number;
12-
unlabeled?: number;
13-
lowQuality?: number;
14-
missingFields?: number;
15-
duplicateRows?: number;
16-
}
17-
18-
interface Props {
19-
dataset?: Dataset;
20-
datasetType?: DatasetType;
21-
fileStats?: FileStats;
22-
}
23-
24-
function clamp(v: number, min = 0, max = 100) {
25-
return Math.max(min, Math.min(max, v));
26-
}
27-
28-
function randInt(min: number, max: number) {
29-
return Math.floor(Math.random() * (max - min + 1)) + min;
30-
}
31-
32-
function getMockMetrics(datasetType: DatasetType, stats: FileStats) {
33-
const { t } = useTranslation();
34-
const total = Math.max(1, stats.totalFiles || 1);
35-
const corrupted = stats.corrupted || 0;
36-
const unlabeled = stats.unlabeled || 0;
37-
const lowQuality = stats.lowQuality || 0;
38-
const missingFields = stats.missingFields || 0;
39-
const duplicateRows = stats.duplicateRows || 0;
40-
41-
if (datasetType === "image") {
42-
const clarity = clamp(100 - (lowQuality / total) * 120 - (corrupted / total) * 100);
43-
const colorConsistency = clamp(100 - (lowQuality / total) * 80);
44-
const annotationCompleteness = clamp(100 - (unlabeled / total) * 150 - (corrupted / total) * 50);
45-
return [
46-
{ metric: t("dataManagement.quality.imageClarity"), value: Math.round(clarity * 10) / 10, color: "bg-green-500" },
47-
{ metric: t("dataManagement.quality.colorConsistency"), value: Math.round(colorConsistency * 10) / 10, color: "bg-blue-500" },
48-
{ metric: t("dataManagement.quality.annotationCompleteness"), value: Math.round(annotationCompleteness * 10) / 10, color: "bg-purple-500" },
49-
];
50-
}
51-
52-
if (datasetType === "text") {
53-
const tokenQuality = clamp(100 - (corrupted / total) * 90 - (missingFields / total) * 60);
54-
const labelConsistency = clamp(100 - (unlabeled / total) * 140 - (corrupted / total) * 40);
55-
const metadataCompleteness = clamp(100 - (missingFields / total) * 150);
56-
return [
57-
{ metric: t("dataManagement.quality.tokenQuality"), value: Math.round(tokenQuality * 10) / 10, color: "bg-green-500" },
58-
{ metric: t("dataManagement.quality.labelConsistency"), value: Math.round(labelConsistency * 10) / 10, color: "bg-blue-500" },
59-
{ metric: t("dataManagement.quality.metadataCompleteness"), value: Math.round(metadataCompleteness * 10) / 10, color: "bg-purple-500" },
60-
];
61-
}
62-
63-
// tabular
64-
const missingValueScore = clamp(100 - (missingFields / total) * 200 - (corrupted / total) * 50);
65-
const typeConsistency = clamp(100 - (corrupted / total) * 120 - (duplicateRows / total) * 40);
66-
const uniqueness = clamp(100 - (duplicateRows / total) * 200);
67-
return [
68-
{ metric: t("dataManagement.quality.missingValueControl"), value: Math.round(missingValueScore * 10) / 10, color: "bg-green-500" },
69-
{ metric: t("dataManagement.quality.typeConsistency"), value: Math.round(typeConsistency * 10) / 10, color: "bg-blue-500" },
70-
{ metric: t("dataManagement.quality.uniqueness"), value: Math.round(uniqueness * 10) / 10, color: "bg-purple-500" },
71-
];
72-
}
73-
747
// 数据集标签分布统计组件
758
interface LabelDistributionProps {
769
distribution?: Record<string, Record<string, number>>;
@@ -151,7 +84,7 @@ function LabelDistributionStats({ distribution }: LabelDistributionProps) {
15184
key: 'percentage',
15285
width: 200,
15386
sorter: (a: any, b: any) => a.percentage - b.percentage,
154-
render: (percentage: number, record: any) => (
87+
render: (percentage: number) => (
15588
<div className="flex items-center gap-3">
15689
<Progress
15790
percent={parseFloat(percentage.toFixed(1))}
@@ -183,39 +116,22 @@ function LabelDistributionStats({ distribution }: LabelDistributionProps) {
183116
if (!distribution || Object.keys(distribution).length === 0) {
184117
return (
185118
<Card className="bg-gray-50">
186-
<div className="text-center py-8 text-gray-400">
187-
<Tags className="w-12 h-12 mx-auto mb-3 opacity-50" />
188-
<p>{t("dataManagement.quality.labelDistribution.noData")}</p>
189-
</div>
119+
<Empty
120+
image={Empty.PRESENTED_IMAGE_SIMPLE}
121+
description={t("dataManagement.quality.labelDistribution.noData")}
122+
/>
190123
</Card>
191124
);
192125
}
193126

194127
return (
195128
<div className="space-y-4">
196-
{/* 统计概览 */}
197-
<Card className="bg-gradient-to-r from-blue-50 to-indigo-50 border-blue-200">
198-
<div className="flex items-center justify-between">
199-
<div className="flex items-center gap-3">
200-
<div className="p-2 bg-blue-500 rounded-lg">
201-
<Tags className="w-5 h-5 text-white" />
202-
</div>
203-
<div>
204-
<h3 className="font-semibold text-gray-800">{t("dataManagement.quality.labelDistribution.statisticsTitle")}</h3>
205-
<p className="text-sm text-gray-600">
206-
{t("dataManagement.quality.labelDistribution.statisticsSummary", { categoryCount: Object.keys(categoryGroups).length, totalLabels })}
207-
</p>
208-
</div>
209-
</div>
210-
</div>
211-
</Card>
212-
213129
{/* 表格视图 */}
214130
<Card
215131
title={
216132
<div className="flex items-center gap-2">
217133
<BarChart3 className="w-4 h-4" />
218-
<span>{t("dataManagement.quality.labelDistribution.detailsCardTitle")}</span>
134+
<span>{t("dataManagement.quality.labelDistribution.title")}</span>
219135
</div>
220136
}
221137
>
@@ -225,160 +141,19 @@ function LabelDistributionStats({ distribution }: LabelDistributionProps) {
225141
rowKey={(record) => `${record.category}-${record.label}`}
226142
pagination={{
227143
pageSize: 10,
228-
showTotal: (total) => `共 ${total} 条`,
144+
showTotal: (total) => t("dataManagement.quality.labelDistribution.paginationTotal", { total }),
229145
showSizeChanger: true,
230146
}}
231147
size="small"
232148
/>
233149
</Card>
234-
235-
{/* 分类卡片视图 */}
236-
<div className="grid md:grid-cols-2 gap-4">
237-
{Object.entries(categoryGroups).map(([category, items]) => {
238-
const categoryTotal = items.reduce((sum, item) => sum + item.count, 0);
239-
const topLabels = items.slice(0, 5); // 只显示前5个
240-
241-
return (
242-
<Card
243-
key={category}
244-
title={<span className="font-semibold">{category}</span>}
245-
size="small"
246-
>
247-
<div className="space-y-3">
248-
<div className="text-sm text-gray-600">
249-
{t("dataManagement.quality.labelDistribution.totalLabels", { count: categoryTotal })}
250-
</div>
251-
{topLabels.map((item) => (
252-
<div key={item.label} className="space-y-1">
253-
<div className="flex justify-between text-sm">
254-
<span className="truncate flex-1" title={item.label}>
255-
{item.label}
256-
</span>
257-
<span className="font-medium ml-2">{item.count}</span>
258-
</div>
259-
<div className="w-full bg-gray-200 rounded-full h-2">
260-
<div
261-
className="bg-gradient-to-r from-blue-500 to-indigo-500 h-2 rounded-full transition-all duration-300"
262-
style={{
263-
width: `${(item.count / categoryTotal) * 100}%`,
264-
}}
265-
/>
266-
</div>
267-
</div>
268-
))}
269-
{items.length > 5 && (
270-
<div className="text-xs text-gray-500 text-center pt-2">
271-
{t("dataManagement.quality.labelDistribution.moreLabels", { count: items.length - 5 })}
272-
</div>
273-
)}
274-
</div>
275-
</Card>
276-
);
277-
})}
278-
</div>
279150
</div>
280151
);
281152
}
282153

283-
export default function DataQuality(props: Props = {}) {
284-
const { dataset, datasetType: propDatasetType, fileStats: propFileStats } = props;
285-
const { t } = useTranslation();
286-
287-
// Prefer dataset fields when available, then explicit props, then sensible defaults
288-
const inferredTypeFromDataset = (dataset && ((dataset as any).type || (dataset as any).datasetType)) as DatasetType | undefined;
289-
const datasetType: DatasetType = (propDatasetType || inferredTypeFromDataset || "image") as DatasetType;
290-
291-
// Try to obtain file stats from dataset if provided
292-
let fileStatsFromSource: FileStats | undefined = propFileStats;
293-
let detailedFieldsProvided = false; // track whether detailed fields exist (not defaulted)
294-
295-
if (!fileStatsFromSource && dataset) {
296-
if ((dataset as any).fileStats) {
297-
fileStatsFromSource = (dataset as any).fileStats as FileStats;
298-
// consider detailed if any field beyond totalFiles present
299-
const fs = fileStatsFromSource as any;
300-
detailedFieldsProvided = fs.corrupted !== undefined || fs.unlabeled !== undefined || fs.lowQuality !== undefined || fs.missingFields !== undefined || fs.duplicateRows !== undefined;
301-
} else {
302-
// attempt to infer total files from common fields
303-
let total = 0;
304-
const dsAny = dataset as any;
305-
if (typeof dsAny.files === "number") total = dsAny.files;
306-
else if (Array.isArray(dsAny.files)) total = dsAny.files.length;
307-
else if (typeof dsAny.fileCount === "number") total = dsAny.fileCount;
308-
309-
fileStatsFromSource = {
310-
totalFiles: Math.max(1, total || 1),
311-
corrupted: dsAny.corrupted !== undefined ? dsAny.corrupted : undefined,
312-
unlabeled: dsAny.unlabeled !== undefined ? dsAny.unlabeled : undefined,
313-
lowQuality: dsAny.lowQuality !== undefined ? dsAny.lowQuality : undefined,
314-
missingFields: dsAny.missingFields !== undefined ? dsAny.missingFields : undefined,
315-
duplicateRows: dsAny.duplicateRows !== undefined ? dsAny.duplicateRows : undefined,
316-
};
317-
detailedFieldsProvided = !!(dsAny.corrupted || dsAny.unlabeled || dsAny.lowQuality || dsAny.missingFields || dsAny.duplicateRows);
318-
}
319-
}
320-
321-
// if props provided, check if they included detailed fields
322-
if (propFileStats) {
323-
fileStatsFromSource = propFileStats;
324-
const p = propFileStats as any;
325-
detailedFieldsProvided = p.corrupted !== undefined || p.unlabeled !== undefined || p.lowQuality !== undefined || p.missingFields !== undefined || p.duplicateRows !== undefined;
326-
}
327-
328-
// final fallback defaults (note: these are complete defaults)
329-
const finalFileStats: FileStats = fileStatsFromSource ?? { totalFiles: 120, corrupted: 3, unlabeled: 6, lowQuality: 5, missingFields: 0, duplicateRows: 0 };
330-
// if we landed on fallback defaults, mark detailedFieldsProvided = false so we apply jitter
331-
const completeSource = detailedFieldsProvided || !!fileStatsFromSource;
332-
333-
// compute metrics once and apply jitter if data incomplete
334-
const { metrics, integrityMetrics } = React.useMemo(() => {
335-
const baseMetrics = getMockMetrics(datasetType, finalFileStats);
336-
337-
const baseIntegrity =
338-
datasetType === "image"
339-
? [
340-
{ metric: t("dataManagement.quality.fileIntegrity"), value: clamp(100 - ((finalFileStats.corrupted || 0) / Math.max(1, finalFileStats.totalFiles)) * 100), color: "bg-green-500" },
341-
{ metric: t("dataManagement.quality.metadataCompleteness"), value: clamp(100 - ((finalFileStats.missingFields || 0) / Math.max(1, finalFileStats.totalFiles)) * 100), color: "bg-blue-500" },
342-
{ metric: t("dataManagement.quality.labelConsistency"), value: clamp(100 - ((finalFileStats.unlabeled || 0) / Math.max(1, finalFileStats.totalFiles)) * 120), color: "bg-purple-500" },
343-
]
344-
: datasetType === "text"
345-
? [
346-
{ metric: t("dataManagement.quality.fileIntegrity"), value: clamp(100 - ((finalFileStats.corrupted || 0) / Math.max(1, finalFileStats.totalFiles)) * 100), color: "bg-green-500" },
347-
{ metric: t("dataManagement.quality.fieldIntegrity"), value: clamp(100 - ((finalFileStats.missingFields || 0) / Math.max(1, finalFileStats.totalFiles)) * 120), color: "bg-blue-500" },
348-
{ metric: t("dataManagement.quality.labelConsistency"), value: clamp(100 - ((finalFileStats.unlabeled || 0) / Math.max(1, finalFileStats.totalFiles)) * 120), color: "bg-purple-500" },
349-
]
350-
: [
351-
{ metric: t("dataManagement.quality.fileIntegrity"), value: clamp(100 - ((finalFileStats.corrupted || 0) / Math.max(1, finalFileStats.totalFiles)) * 100), color: "bg-green-500" },
352-
{ metric: t("dataManagement.quality.columnIntegrity"), value: clamp(100 - ((finalFileStats.missingFields || 0) / Math.max(1, finalFileStats.totalFiles)) * 120), color: "bg-blue-500" },
353-
{ metric: t("dataManagement.quality.duplicateRate"), value: clamp(100 - ((finalFileStats.duplicateRows || 0) / Math.max(1, finalFileStats.totalFiles)) * 200), color: "bg-purple-500" },
354-
];
355-
356-
// if source data is incomplete or only totalFiles known, apply a small random reduction so values are not all 100%
357-
if (!completeSource) {
358-
// jitter range can be tuned; using 4-12% to make results realistic but not drastic
359-
const jitterMax = 12;
360-
const jitterMin = 4;
361-
362-
const jittered = baseMetrics.map((m) => {
363-
// don't reduce below 40 for readability
364-
const jitter = randInt(jitterMin, jitterMax);
365-
return { ...m, value: clamp(Math.round((m.value - jitter) * 10) / 10) };
366-
});
367-
368-
const integrityJittered = baseIntegrity.map((m) => {
369-
const jitter = randInt(jitterMin, jitterMax);
370-
return { ...m, value: clamp(Math.round((m.value - jitter) * 10) / 10) };
371-
});
372-
373-
return { metrics: jittered, integrityMetrics: integrityJittered };
374-
}
375-
376-
return { metrics: baseMetrics, integrityMetrics: baseIntegrity };
377-
}, [datasetType, finalFileStats, completeSource]);
378-
154+
export default function DataQuality({ dataset }: { dataset?: Dataset }) {
379155
return (
380-
<div className="mt-0 space-y-6">
381-
{/* 数据集标签统计 */}
156+
<div className="mt-0">
382157
<LabelDistributionStats distribution={(dataset as any)?.distribution} />
383158
</div>
384159
);

frontend/src/pages/DataManagement/Detail/components/Overview.tsx

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -295,8 +295,13 @@ export default function Overview({ dataset, filesOperation, fetchDataset }) {
295295
message.warning(t("dataManagement.placeholders.folderName"));
296296
return Promise.reject();
297297
}
298-
await handleRenameDirectory(fullPath, record.fileName, newDirName);
299-
fetchDataset();
298+
try {
299+
await handleRenameDirectory(fullPath, record.fileName, newDirName);
300+
fetchDataset();
301+
} catch (error) {
302+
// 错误已经在 handleRenameDirectory 中处理,这里只需要阻止 modal 关闭
303+
return Promise.reject();
304+
}
300305
},
301306
});
302307
}}
@@ -363,8 +368,13 @@ export default function Overview({ dataset, filesOperation, fetchDataset }) {
363368
message.warning(t("dataManagement.placeholders.fileName"));
364369
return Promise.reject();
365370
}
366-
await handleRenameFile(record, newBaseName);
367-
fetchDataset();
371+
try {
372+
await handleRenameFile(record, newBaseName);
373+
fetchDataset();
374+
} catch (error) {
375+
// 错误已经在 handleRenameFile 中处理,这里只需要阻止 modal 关闭
376+
return Promise.reject();
377+
}
368378
},
369379
});
370380
}}

frontend/src/pages/DataManagement/Detail/useFilesOperation.ts

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -533,9 +533,15 @@ export function useFilesOperation(dataset: Dataset) {
533533
const currentPrefix = pagination.prefix || "";
534534
await fetchFiles(currentPrefix, 1, pagination.pageSize);
535535
message.success({ content: `文件 ${file.fileName} 重命名成功` });
536-
} catch (error) {
537-
message.error({ content: `文件 ${file.fileName} 重命名失败` });
538-
throw error;
536+
} catch (error: any) {
537+
// 解析错误信息,提取更友好的提示
538+
const errorMsg = error?.response?.data?.message || error?.message || error?.toString();
539+
540+
if (errorMsg?.includes("已存在") || errorMsg?.includes("already exists") || errorMsg?.includes("duplicate")) {
541+
message.error({ content: `文件名 "${trimmed}" 已存在,请使用其他名称` });
542+
} else {
543+
message.error({ content: `文件 ${file.fileName} 重命名失败:${errorMsg}` });
544+
}
539545
}
540546
},
541547
handleRenameDirectory: async (directoryPath: string, oldName: string, newName: string) => {
@@ -549,9 +555,15 @@ export function useFilesOperation(dataset: Dataset) {
549555
const currentPrefix = pagination.prefix || "";
550556
await fetchFiles(currentPrefix, 1, pagination.pageSize);
551557
message.success({ content: `文件夹 ${oldName} 重命名为 ${trimmed} 成功` });
552-
} catch (error) {
553-
message.error({ content: `文件夹 ${oldName} 重命名失败` });
554-
throw error;
558+
} catch (error: any) {
559+
// 解析错误信息,提取更友好的提示
560+
const errorMsg = error?.response?.data?.message || error?.message || error?.toString();
561+
562+
if (errorMsg?.includes("已存在") || errorMsg?.includes("already exists") || errorMsg?.includes("duplicate")) {
563+
message.error({ content: `文件夹名 "${trimmed}" 已存在,请使用其他名称` });
564+
} else {
565+
message.error({ content: `文件夹 ${oldName} 重命名失败:${errorMsg}` });
566+
}
555567
}
556568
},
557569
};

0 commit comments

Comments
 (0)