Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
247 changes: 11 additions & 236 deletions frontend/src/pages/DataManagement/Detail/components/DataQuality.tsx
Original file line number Diff line number Diff line change
@@ -1,76 +1,9 @@
import React, { useMemo } from "react";
import { Card, Table, Progress } from "antd";
import { AlertTriangle, Tags, BarChart3 } from "lucide-react";
import { Card, Table, Progress, Empty } from "antd";
import { Tags, BarChart3 } from "lucide-react";
import { Dataset } from "@/pages/DataManagement/dataset.model.ts";
import { useTranslation } from "react-i18next";

type DatasetType = "image" | "text" | "tabular";

interface FileStats {
totalFiles: number;
corrupted?: number;
unlabeled?: number;
lowQuality?: number;
missingFields?: number;
duplicateRows?: number;
}

interface Props {
dataset?: Dataset;
datasetType?: DatasetType;
fileStats?: FileStats;
}

function clamp(v: number, min = 0, max = 100) {
return Math.max(min, Math.min(max, v));
}

function randInt(min: number, max: number) {
return Math.floor(Math.random() * (max - min + 1)) + min;
}

function getMockMetrics(datasetType: DatasetType, stats: FileStats) {
const { t } = useTranslation();
const total = Math.max(1, stats.totalFiles || 1);
const corrupted = stats.corrupted || 0;
const unlabeled = stats.unlabeled || 0;
const lowQuality = stats.lowQuality || 0;
const missingFields = stats.missingFields || 0;
const duplicateRows = stats.duplicateRows || 0;

if (datasetType === "image") {
const clarity = clamp(100 - (lowQuality / total) * 120 - (corrupted / total) * 100);
const colorConsistency = clamp(100 - (lowQuality / total) * 80);
const annotationCompleteness = clamp(100 - (unlabeled / total) * 150 - (corrupted / total) * 50);
return [
{ metric: t("dataManagement.quality.imageClarity"), value: Math.round(clarity * 10) / 10, color: "bg-green-500" },
{ metric: t("dataManagement.quality.colorConsistency"), value: Math.round(colorConsistency * 10) / 10, color: "bg-blue-500" },
{ metric: t("dataManagement.quality.annotationCompleteness"), value: Math.round(annotationCompleteness * 10) / 10, color: "bg-purple-500" },
];
}

if (datasetType === "text") {
const tokenQuality = clamp(100 - (corrupted / total) * 90 - (missingFields / total) * 60);
const labelConsistency = clamp(100 - (unlabeled / total) * 140 - (corrupted / total) * 40);
const metadataCompleteness = clamp(100 - (missingFields / total) * 150);
return [
{ metric: t("dataManagement.quality.tokenQuality"), value: Math.round(tokenQuality * 10) / 10, color: "bg-green-500" },
{ metric: t("dataManagement.quality.labelConsistency"), value: Math.round(labelConsistency * 10) / 10, color: "bg-blue-500" },
{ metric: t("dataManagement.quality.metadataCompleteness"), value: Math.round(metadataCompleteness * 10) / 10, color: "bg-purple-500" },
];
}

// tabular
const missingValueScore = clamp(100 - (missingFields / total) * 200 - (corrupted / total) * 50);
const typeConsistency = clamp(100 - (corrupted / total) * 120 - (duplicateRows / total) * 40);
const uniqueness = clamp(100 - (duplicateRows / total) * 200);
return [
{ metric: t("dataManagement.quality.missingValueControl"), value: Math.round(missingValueScore * 10) / 10, color: "bg-green-500" },
{ metric: t("dataManagement.quality.typeConsistency"), value: Math.round(typeConsistency * 10) / 10, color: "bg-blue-500" },
{ metric: t("dataManagement.quality.uniqueness"), value: Math.round(uniqueness * 10) / 10, color: "bg-purple-500" },
];
}

// 数据集标签分布统计组件
interface LabelDistributionProps {
distribution?: Record<string, Record<string, number>>;
Expand Down Expand Up @@ -151,7 +84,7 @@ function LabelDistributionStats({ distribution }: LabelDistributionProps) {
key: 'percentage',
width: 200,
sorter: (a: any, b: any) => a.percentage - b.percentage,
render: (percentage: number, record: any) => (
render: (percentage: number) => (
<div className="flex items-center gap-3">
<Progress
percent={parseFloat(percentage.toFixed(1))}
Expand Down Expand Up @@ -183,39 +116,22 @@ function LabelDistributionStats({ distribution }: LabelDistributionProps) {
if (!distribution || Object.keys(distribution).length === 0) {
return (
<Card className="bg-gray-50">
<div className="text-center py-8 text-gray-400">
<Tags className="w-12 h-12 mx-auto mb-3 opacity-50" />
<p>{t("dataManagement.quality.labelDistribution.noData")}</p>
</div>
<Empty
image={Empty.PRESENTED_IMAGE_SIMPLE}
description={t("dataManagement.quality.labelDistribution.noData")}
/>
</Card>
);
}

return (
<div className="space-y-4">
{/* 统计概览 */}
<Card className="bg-gradient-to-r from-blue-50 to-indigo-50 border-blue-200">
<div className="flex items-center justify-between">
<div className="flex items-center gap-3">
<div className="p-2 bg-blue-500 rounded-lg">
<Tags className="w-5 h-5 text-white" />
</div>
<div>
<h3 className="font-semibold text-gray-800">{t("dataManagement.quality.labelDistribution.statisticsTitle")}</h3>
<p className="text-sm text-gray-600">
{t("dataManagement.quality.labelDistribution.statisticsSummary", { categoryCount: Object.keys(categoryGroups).length, totalLabels })}
</p>
</div>
</div>
</div>
</Card>

{/* 表格视图 */}
<Card
title={
<div className="flex items-center gap-2">
<BarChart3 className="w-4 h-4" />
<span>{t("dataManagement.quality.labelDistribution.detailsCardTitle")}</span>
<span>{t("dataManagement.quality.labelDistribution.title")}</span>
</div>
}
>
Expand All @@ -225,160 +141,19 @@ function LabelDistributionStats({ distribution }: LabelDistributionProps) {
rowKey={(record) => `${record.category}-${record.label}`}
pagination={{
pageSize: 10,
showTotal: (total) => `共 ${total} 条`,
showTotal: (total) => t("dataManagement.quality.labelDistribution.paginationTotal", { total }),
showSizeChanger: true,
}}
size="small"
/>
</Card>

{/* 分类卡片视图 */}
<div className="grid md:grid-cols-2 gap-4">
{Object.entries(categoryGroups).map(([category, items]) => {
const categoryTotal = items.reduce((sum, item) => sum + item.count, 0);
const topLabels = items.slice(0, 5); // 只显示前5个

return (
<Card
key={category}
title={<span className="font-semibold">{category}</span>}
size="small"
>
<div className="space-y-3">
<div className="text-sm text-gray-600">
{t("dataManagement.quality.labelDistribution.totalLabels", { count: categoryTotal })}
</div>
{topLabels.map((item) => (
<div key={item.label} className="space-y-1">
<div className="flex justify-between text-sm">
<span className="truncate flex-1" title={item.label}>
{item.label}
</span>
<span className="font-medium ml-2">{item.count}</span>
</div>
<div className="w-full bg-gray-200 rounded-full h-2">
<div
className="bg-gradient-to-r from-blue-500 to-indigo-500 h-2 rounded-full transition-all duration-300"
style={{
width: `${(item.count / categoryTotal) * 100}%`,
}}
/>
</div>
</div>
))}
{items.length > 5 && (
<div className="text-xs text-gray-500 text-center pt-2">
{t("dataManagement.quality.labelDistribution.moreLabels", { count: items.length - 5 })}
</div>
)}
</div>
</Card>
);
})}
</div>
</div>
);
}

export default function DataQuality(props: Props = {}) {
const { dataset, datasetType: propDatasetType, fileStats: propFileStats } = props;
const { t } = useTranslation();

// Prefer dataset fields when available, then explicit props, then sensible defaults
const inferredTypeFromDataset = (dataset && ((dataset as any).type || (dataset as any).datasetType)) as DatasetType | undefined;
const datasetType: DatasetType = (propDatasetType || inferredTypeFromDataset || "image") as DatasetType;

// Try to obtain file stats from dataset if provided
let fileStatsFromSource: FileStats | undefined = propFileStats;
let detailedFieldsProvided = false; // track whether detailed fields exist (not defaulted)

if (!fileStatsFromSource && dataset) {
if ((dataset as any).fileStats) {
fileStatsFromSource = (dataset as any).fileStats as FileStats;
// consider detailed if any field beyond totalFiles present
const fs = fileStatsFromSource as any;
detailedFieldsProvided = fs.corrupted !== undefined || fs.unlabeled !== undefined || fs.lowQuality !== undefined || fs.missingFields !== undefined || fs.duplicateRows !== undefined;
} else {
// attempt to infer total files from common fields
let total = 0;
const dsAny = dataset as any;
if (typeof dsAny.files === "number") total = dsAny.files;
else if (Array.isArray(dsAny.files)) total = dsAny.files.length;
else if (typeof dsAny.fileCount === "number") total = dsAny.fileCount;

fileStatsFromSource = {
totalFiles: Math.max(1, total || 1),
corrupted: dsAny.corrupted !== undefined ? dsAny.corrupted : undefined,
unlabeled: dsAny.unlabeled !== undefined ? dsAny.unlabeled : undefined,
lowQuality: dsAny.lowQuality !== undefined ? dsAny.lowQuality : undefined,
missingFields: dsAny.missingFields !== undefined ? dsAny.missingFields : undefined,
duplicateRows: dsAny.duplicateRows !== undefined ? dsAny.duplicateRows : undefined,
};
detailedFieldsProvided = !!(dsAny.corrupted || dsAny.unlabeled || dsAny.lowQuality || dsAny.missingFields || dsAny.duplicateRows);
}
}

// if props provided, check if they included detailed fields
if (propFileStats) {
fileStatsFromSource = propFileStats;
const p = propFileStats as any;
detailedFieldsProvided = p.corrupted !== undefined || p.unlabeled !== undefined || p.lowQuality !== undefined || p.missingFields !== undefined || p.duplicateRows !== undefined;
}

// final fallback defaults (note: these are complete defaults)
const finalFileStats: FileStats = fileStatsFromSource ?? { totalFiles: 120, corrupted: 3, unlabeled: 6, lowQuality: 5, missingFields: 0, duplicateRows: 0 };
// if we landed on fallback defaults, mark detailedFieldsProvided = false so we apply jitter
const completeSource = detailedFieldsProvided || !!fileStatsFromSource;

// compute metrics once and apply jitter if data incomplete
const { metrics, integrityMetrics } = React.useMemo(() => {
const baseMetrics = getMockMetrics(datasetType, finalFileStats);

const baseIntegrity =
datasetType === "image"
? [
{ metric: t("dataManagement.quality.fileIntegrity"), value: clamp(100 - ((finalFileStats.corrupted || 0) / Math.max(1, finalFileStats.totalFiles)) * 100), color: "bg-green-500" },
{ metric: t("dataManagement.quality.metadataCompleteness"), value: clamp(100 - ((finalFileStats.missingFields || 0) / Math.max(1, finalFileStats.totalFiles)) * 100), color: "bg-blue-500" },
{ metric: t("dataManagement.quality.labelConsistency"), value: clamp(100 - ((finalFileStats.unlabeled || 0) / Math.max(1, finalFileStats.totalFiles)) * 120), color: "bg-purple-500" },
]
: datasetType === "text"
? [
{ metric: t("dataManagement.quality.fileIntegrity"), value: clamp(100 - ((finalFileStats.corrupted || 0) / Math.max(1, finalFileStats.totalFiles)) * 100), color: "bg-green-500" },
{ metric: t("dataManagement.quality.fieldIntegrity"), value: clamp(100 - ((finalFileStats.missingFields || 0) / Math.max(1, finalFileStats.totalFiles)) * 120), color: "bg-blue-500" },
{ metric: t("dataManagement.quality.labelConsistency"), value: clamp(100 - ((finalFileStats.unlabeled || 0) / Math.max(1, finalFileStats.totalFiles)) * 120), color: "bg-purple-500" },
]
: [
{ metric: t("dataManagement.quality.fileIntegrity"), value: clamp(100 - ((finalFileStats.corrupted || 0) / Math.max(1, finalFileStats.totalFiles)) * 100), color: "bg-green-500" },
{ metric: t("dataManagement.quality.columnIntegrity"), value: clamp(100 - ((finalFileStats.missingFields || 0) / Math.max(1, finalFileStats.totalFiles)) * 120), color: "bg-blue-500" },
{ metric: t("dataManagement.quality.duplicateRate"), value: clamp(100 - ((finalFileStats.duplicateRows || 0) / Math.max(1, finalFileStats.totalFiles)) * 200), color: "bg-purple-500" },
];

// if source data is incomplete or only totalFiles known, apply a small random reduction so values are not all 100%
if (!completeSource) {
// jitter range can be tuned; using 4-12% to make results realistic but not drastic
const jitterMax = 12;
const jitterMin = 4;

const jittered = baseMetrics.map((m) => {
// don't reduce below 40 for readability
const jitter = randInt(jitterMin, jitterMax);
return { ...m, value: clamp(Math.round((m.value - jitter) * 10) / 10) };
});

const integrityJittered = baseIntegrity.map((m) => {
const jitter = randInt(jitterMin, jitterMax);
return { ...m, value: clamp(Math.round((m.value - jitter) * 10) / 10) };
});

return { metrics: jittered, integrityMetrics: integrityJittered };
}

return { metrics: baseMetrics, integrityMetrics: baseIntegrity };
}, [datasetType, finalFileStats, completeSource]);

export default function DataQuality({ dataset }: { dataset?: Dataset }) {
return (
<div className="mt-0 space-y-6">
{/* 数据集标签统计 */}
<div className="mt-0">
<LabelDistributionStats distribution={(dataset as any)?.distribution} />
</div>
);
Expand Down
18 changes: 14 additions & 4 deletions frontend/src/pages/DataManagement/Detail/components/Overview.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -289,8 +289,13 @@ export default function Overview({ dataset, filesOperation, fetchDataset }) {
message.warning(t("dataManagement.placeholders.folderName"));
return Promise.reject();
}
await handleRenameDirectory(fullPath, record.fileName, newDirName);
fetchDataset();
try {
await handleRenameDirectory(fullPath, record.fileName, newDirName);
fetchDataset();
} catch (error) {
// 错误已经在 handleRenameDirectory 中处理,这里只需要阻止 modal 关闭
return Promise.reject();
}
},
});
}}
Expand Down Expand Up @@ -357,8 +362,13 @@ export default function Overview({ dataset, filesOperation, fetchDataset }) {
message.warning(t("dataManagement.placeholders.fileName"));
return Promise.reject();
}
await handleRenameFile(record, newBaseName);
fetchDataset();
try {
await handleRenameFile(record, newBaseName);
fetchDataset();
} catch (error) {
// 错误已经在 handleRenameFile 中处理,这里只需要阻止 modal 关闭
return Promise.reject();
}
},
});
}}
Expand Down
24 changes: 18 additions & 6 deletions frontend/src/pages/DataManagement/Detail/useFilesOperation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -319,9 +319,15 @@ export function useFilesOperation(dataset: Dataset) {
const currentPrefix = pagination.prefix || "";
await fetchFiles(currentPrefix, 1, pagination.pageSize);
message.success({ content: `文件 ${file.fileName} 重命名成功` });
} catch (error) {
message.error({ content: `文件 ${file.fileName} 重命名失败` });
throw error;
} catch (error: any) {
// 解析错误信息,提取更友好的提示
const errorMsg = error?.response?.data?.message || error?.message || error?.toString();

if (errorMsg?.includes("已存在") || errorMsg?.includes("already exists") || errorMsg?.includes("duplicate")) {
message.error({ content: `文件名 "${trimmed}" 已存在,请使用其他名称` });
} else {
message.error({ content: `文件 ${file.fileName} 重命名失败:${errorMsg}` });
}
}
},
handleRenameDirectory: async (directoryPath: string, oldName: string, newName: string) => {
Expand All @@ -335,9 +341,15 @@ export function useFilesOperation(dataset: Dataset) {
const currentPrefix = pagination.prefix || "";
await fetchFiles(currentPrefix, 1, pagination.pageSize);
message.success({ content: `文件夹 ${oldName} 重命名为 ${trimmed} 成功` });
} catch (error) {
message.error({ content: `文件夹 ${oldName} 重命名失败` });
throw error;
} catch (error: any) {
// 解析错误信息,提取更友好的提示
const errorMsg = error?.response?.data?.message || error?.message || error?.toString();

if (errorMsg?.includes("已存在") || errorMsg?.includes("already exists") || errorMsg?.includes("duplicate")) {
message.error({ content: `文件夹名 "${trimmed}" 已存在,请使用其他名称` });
} else {
message.error({ content: `文件夹 ${oldName} 重命名失败:${errorMsg}` });
}
}
},
};
Expand Down
Loading