11import React , { useMemo } from "react" ;
2- import { Card , Table , Progress } from "antd" ;
3- import { AlertTriangle , Tags , BarChart3 } from "lucide-react" ;
2+ import { Card , Table , Progress , Empty } from "antd" ;
3+ import { Tags , BarChart3 } from "lucide-react" ;
44import { Dataset } from "@/pages/DataManagement/dataset.model.ts" ;
55import { useTranslation } from "react-i18next" ;
66
7- type DatasetType = "image" | "text" | "tabular" ;
8-
9- interface FileStats {
10- totalFiles : number ;
11- corrupted ?: number ;
12- unlabeled ?: number ;
13- lowQuality ?: number ;
14- missingFields ?: number ;
15- duplicateRows ?: number ;
16- }
17-
18- interface Props {
19- dataset ?: Dataset ;
20- datasetType ?: DatasetType ;
21- fileStats ?: FileStats ;
22- }
23-
24- function clamp ( v : number , min = 0 , max = 100 ) {
25- return Math . max ( min , Math . min ( max , v ) ) ;
26- }
27-
28- function randInt ( min : number , max : number ) {
29- return Math . floor ( Math . random ( ) * ( max - min + 1 ) ) + min ;
30- }
31-
32- function getMockMetrics ( datasetType : DatasetType , stats : FileStats ) {
33- const { t } = useTranslation ( ) ;
34- const total = Math . max ( 1 , stats . totalFiles || 1 ) ;
35- const corrupted = stats . corrupted || 0 ;
36- const unlabeled = stats . unlabeled || 0 ;
37- const lowQuality = stats . lowQuality || 0 ;
38- const missingFields = stats . missingFields || 0 ;
39- const duplicateRows = stats . duplicateRows || 0 ;
40-
41- if ( datasetType === "image" ) {
42- const clarity = clamp ( 100 - ( lowQuality / total ) * 120 - ( corrupted / total ) * 100 ) ;
43- const colorConsistency = clamp ( 100 - ( lowQuality / total ) * 80 ) ;
44- const annotationCompleteness = clamp ( 100 - ( unlabeled / total ) * 150 - ( corrupted / total ) * 50 ) ;
45- return [
46- { metric : t ( "dataManagement.quality.imageClarity" ) , value : Math . round ( clarity * 10 ) / 10 , color : "bg-green-500" } ,
47- { metric : t ( "dataManagement.quality.colorConsistency" ) , value : Math . round ( colorConsistency * 10 ) / 10 , color : "bg-blue-500" } ,
48- { metric : t ( "dataManagement.quality.annotationCompleteness" ) , value : Math . round ( annotationCompleteness * 10 ) / 10 , color : "bg-purple-500" } ,
49- ] ;
50- }
51-
52- if ( datasetType === "text" ) {
53- const tokenQuality = clamp ( 100 - ( corrupted / total ) * 90 - ( missingFields / total ) * 60 ) ;
54- const labelConsistency = clamp ( 100 - ( unlabeled / total ) * 140 - ( corrupted / total ) * 40 ) ;
55- const metadataCompleteness = clamp ( 100 - ( missingFields / total ) * 150 ) ;
56- return [
57- { metric : t ( "dataManagement.quality.tokenQuality" ) , value : Math . round ( tokenQuality * 10 ) / 10 , color : "bg-green-500" } ,
58- { metric : t ( "dataManagement.quality.labelConsistency" ) , value : Math . round ( labelConsistency * 10 ) / 10 , color : "bg-blue-500" } ,
59- { metric : t ( "dataManagement.quality.metadataCompleteness" ) , value : Math . round ( metadataCompleteness * 10 ) / 10 , color : "bg-purple-500" } ,
60- ] ;
61- }
62-
63- // tabular
64- const missingValueScore = clamp ( 100 - ( missingFields / total ) * 200 - ( corrupted / total ) * 50 ) ;
65- const typeConsistency = clamp ( 100 - ( corrupted / total ) * 120 - ( duplicateRows / total ) * 40 ) ;
66- const uniqueness = clamp ( 100 - ( duplicateRows / total ) * 200 ) ;
67- return [
68- { metric : t ( "dataManagement.quality.missingValueControl" ) , value : Math . round ( missingValueScore * 10 ) / 10 , color : "bg-green-500" } ,
69- { metric : t ( "dataManagement.quality.typeConsistency" ) , value : Math . round ( typeConsistency * 10 ) / 10 , color : "bg-blue-500" } ,
70- { metric : t ( "dataManagement.quality.uniqueness" ) , value : Math . round ( uniqueness * 10 ) / 10 , color : "bg-purple-500" } ,
71- ] ;
72- }
73-
747// 数据集标签分布统计组件
758interface LabelDistributionProps {
769 distribution ?: Record < string , Record < string , number > > ;
@@ -151,7 +84,7 @@ function LabelDistributionStats({ distribution }: LabelDistributionProps) {
15184 key : 'percentage' ,
15285 width : 200 ,
15386 sorter : ( a : any , b : any ) => a . percentage - b . percentage ,
154- render : ( percentage : number , record : any ) => (
87+ render : ( percentage : number ) => (
15588 < div className = "flex items-center gap-3" >
15689 < Progress
15790 percent = { parseFloat ( percentage . toFixed ( 1 ) ) }
@@ -183,39 +116,22 @@ function LabelDistributionStats({ distribution }: LabelDistributionProps) {
183116 if ( ! distribution || Object . keys ( distribution ) . length === 0 ) {
184117 return (
185118 < Card className = "bg-gray-50" >
186- < div className = "text-center py-8 text-gray-400" >
187- < Tags className = "w-12 h-12 mx-auto mb-3 opacity-50" />
188- < p > { t ( "dataManagement.quality.labelDistribution.noData" ) } </ p >
189- </ div >
119+ < Empty
120+ image = { Empty . PRESENTED_IMAGE_SIMPLE }
121+ description = { t ( "dataManagement.quality.labelDistribution.noData" ) }
122+ / >
190123 </ Card >
191124 ) ;
192125 }
193126
194127 return (
195128 < div className = "space-y-4" >
196- { /* 统计概览 */ }
197- < Card className = "bg-gradient-to-r from-blue-50 to-indigo-50 border-blue-200" >
198- < div className = "flex items-center justify-between" >
199- < div className = "flex items-center gap-3" >
200- < div className = "p-2 bg-blue-500 rounded-lg" >
201- < Tags className = "w-5 h-5 text-white" />
202- </ div >
203- < div >
204- < h3 className = "font-semibold text-gray-800" > { t ( "dataManagement.quality.labelDistribution.statisticsTitle" ) } </ h3 >
205- < p className = "text-sm text-gray-600" >
206- { t ( "dataManagement.quality.labelDistribution.statisticsSummary" , { categoryCount : Object . keys ( categoryGroups ) . length , totalLabels } ) }
207- </ p >
208- </ div >
209- </ div >
210- </ div >
211- </ Card >
212-
213129 { /* 表格视图 */ }
214130 < Card
215131 title = {
216132 < div className = "flex items-center gap-2" >
217133 < BarChart3 className = "w-4 h-4" />
218- < span > { t ( "dataManagement.quality.labelDistribution.detailsCardTitle " ) } </ span >
134+ < span > { t ( "dataManagement.quality.labelDistribution.title " ) } </ span >
219135 </ div >
220136 }
221137 >
@@ -225,160 +141,19 @@ function LabelDistributionStats({ distribution }: LabelDistributionProps) {
225141 rowKey = { ( record ) => `${ record . category } -${ record . label } ` }
226142 pagination = { {
227143 pageSize : 10 ,
228- showTotal : ( total ) => `共 ${ total } 条` ,
144+ showTotal : ( total ) => t ( "dataManagement.quality.labelDistribution.paginationTotal" , { total } ) ,
229145 showSizeChanger : true ,
230146 } }
231147 size = "small"
232148 />
233149 </ Card >
234-
235- { /* 分类卡片视图 */ }
236- < div className = "grid md:grid-cols-2 gap-4" >
237- { Object . entries ( categoryGroups ) . map ( ( [ category , items ] ) => {
238- const categoryTotal = items . reduce ( ( sum , item ) => sum + item . count , 0 ) ;
239- const topLabels = items . slice ( 0 , 5 ) ; // 只显示前5个
240-
241- return (
242- < Card
243- key = { category }
244- title = { < span className = "font-semibold" > { category } </ span > }
245- size = "small"
246- >
247- < div className = "space-y-3" >
248- < div className = "text-sm text-gray-600" >
249- { t ( "dataManagement.quality.labelDistribution.totalLabels" , { count : categoryTotal } ) }
250- </ div >
251- { topLabels . map ( ( item ) => (
252- < div key = { item . label } className = "space-y-1" >
253- < div className = "flex justify-between text-sm" >
254- < span className = "truncate flex-1" title = { item . label } >
255- { item . label }
256- </ span >
257- < span className = "font-medium ml-2" > { item . count } </ span >
258- </ div >
259- < div className = "w-full bg-gray-200 rounded-full h-2" >
260- < div
261- className = "bg-gradient-to-r from-blue-500 to-indigo-500 h-2 rounded-full transition-all duration-300"
262- style = { {
263- width : `${ ( item . count / categoryTotal ) * 100 } %` ,
264- } }
265- />
266- </ div >
267- </ div >
268- ) ) }
269- { items . length > 5 && (
270- < div className = "text-xs text-gray-500 text-center pt-2" >
271- { t ( "dataManagement.quality.labelDistribution.moreLabels" , { count : items . length - 5 } ) }
272- </ div >
273- ) }
274- </ div >
275- </ Card >
276- ) ;
277- } ) }
278- </ div >
279150 </ div >
280151 ) ;
281152}
282153
283- export default function DataQuality ( props : Props = { } ) {
284- const { dataset, datasetType : propDatasetType , fileStats : propFileStats } = props ;
285- const { t } = useTranslation ( ) ;
286-
287- // Prefer dataset fields when available, then explicit props, then sensible defaults
288- const inferredTypeFromDataset = ( dataset && ( ( dataset as any ) . type || ( dataset as any ) . datasetType ) ) as DatasetType | undefined ;
289- const datasetType : DatasetType = ( propDatasetType || inferredTypeFromDataset || "image" ) as DatasetType ;
290-
291- // Try to obtain file stats from dataset if provided
292- let fileStatsFromSource : FileStats | undefined = propFileStats ;
293- let detailedFieldsProvided = false ; // track whether detailed fields exist (not defaulted)
294-
295- if ( ! fileStatsFromSource && dataset ) {
296- if ( ( dataset as any ) . fileStats ) {
297- fileStatsFromSource = ( dataset as any ) . fileStats as FileStats ;
298- // consider detailed if any field beyond totalFiles present
299- const fs = fileStatsFromSource as any ;
300- detailedFieldsProvided = fs . corrupted !== undefined || fs . unlabeled !== undefined || fs . lowQuality !== undefined || fs . missingFields !== undefined || fs . duplicateRows !== undefined ;
301- } else {
302- // attempt to infer total files from common fields
303- let total = 0 ;
304- const dsAny = dataset as any ;
305- if ( typeof dsAny . files === "number" ) total = dsAny . files ;
306- else if ( Array . isArray ( dsAny . files ) ) total = dsAny . files . length ;
307- else if ( typeof dsAny . fileCount === "number" ) total = dsAny . fileCount ;
308-
309- fileStatsFromSource = {
310- totalFiles : Math . max ( 1 , total || 1 ) ,
311- corrupted : dsAny . corrupted !== undefined ? dsAny . corrupted : undefined ,
312- unlabeled : dsAny . unlabeled !== undefined ? dsAny . unlabeled : undefined ,
313- lowQuality : dsAny . lowQuality !== undefined ? dsAny . lowQuality : undefined ,
314- missingFields : dsAny . missingFields !== undefined ? dsAny . missingFields : undefined ,
315- duplicateRows : dsAny . duplicateRows !== undefined ? dsAny . duplicateRows : undefined ,
316- } ;
317- detailedFieldsProvided = ! ! ( dsAny . corrupted || dsAny . unlabeled || dsAny . lowQuality || dsAny . missingFields || dsAny . duplicateRows ) ;
318- }
319- }
320-
321- // if props provided, check if they included detailed fields
322- if ( propFileStats ) {
323- fileStatsFromSource = propFileStats ;
324- const p = propFileStats as any ;
325- detailedFieldsProvided = p . corrupted !== undefined || p . unlabeled !== undefined || p . lowQuality !== undefined || p . missingFields !== undefined || p . duplicateRows !== undefined ;
326- }
327-
328- // final fallback defaults (note: these are complete defaults)
329- const finalFileStats : FileStats = fileStatsFromSource ?? { totalFiles : 120 , corrupted : 3 , unlabeled : 6 , lowQuality : 5 , missingFields : 0 , duplicateRows : 0 } ;
330- // if we landed on fallback defaults, mark detailedFieldsProvided = false so we apply jitter
331- const completeSource = detailedFieldsProvided || ! ! fileStatsFromSource ;
332-
333- // compute metrics once and apply jitter if data incomplete
334- const { metrics, integrityMetrics } = React . useMemo ( ( ) => {
335- const baseMetrics = getMockMetrics ( datasetType , finalFileStats ) ;
336-
337- const baseIntegrity =
338- datasetType === "image"
339- ? [
340- { metric : t ( "dataManagement.quality.fileIntegrity" ) , value : clamp ( 100 - ( ( finalFileStats . corrupted || 0 ) / Math . max ( 1 , finalFileStats . totalFiles ) ) * 100 ) , color : "bg-green-500" } ,
341- { metric : t ( "dataManagement.quality.metadataCompleteness" ) , value : clamp ( 100 - ( ( finalFileStats . missingFields || 0 ) / Math . max ( 1 , finalFileStats . totalFiles ) ) * 100 ) , color : "bg-blue-500" } ,
342- { metric : t ( "dataManagement.quality.labelConsistency" ) , value : clamp ( 100 - ( ( finalFileStats . unlabeled || 0 ) / Math . max ( 1 , finalFileStats . totalFiles ) ) * 120 ) , color : "bg-purple-500" } ,
343- ]
344- : datasetType === "text"
345- ? [
346- { metric : t ( "dataManagement.quality.fileIntegrity" ) , value : clamp ( 100 - ( ( finalFileStats . corrupted || 0 ) / Math . max ( 1 , finalFileStats . totalFiles ) ) * 100 ) , color : "bg-green-500" } ,
347- { metric : t ( "dataManagement.quality.fieldIntegrity" ) , value : clamp ( 100 - ( ( finalFileStats . missingFields || 0 ) / Math . max ( 1 , finalFileStats . totalFiles ) ) * 120 ) , color : "bg-blue-500" } ,
348- { metric : t ( "dataManagement.quality.labelConsistency" ) , value : clamp ( 100 - ( ( finalFileStats . unlabeled || 0 ) / Math . max ( 1 , finalFileStats . totalFiles ) ) * 120 ) , color : "bg-purple-500" } ,
349- ]
350- : [
351- { metric : t ( "dataManagement.quality.fileIntegrity" ) , value : clamp ( 100 - ( ( finalFileStats . corrupted || 0 ) / Math . max ( 1 , finalFileStats . totalFiles ) ) * 100 ) , color : "bg-green-500" } ,
352- { metric : t ( "dataManagement.quality.columnIntegrity" ) , value : clamp ( 100 - ( ( finalFileStats . missingFields || 0 ) / Math . max ( 1 , finalFileStats . totalFiles ) ) * 120 ) , color : "bg-blue-500" } ,
353- { metric : t ( "dataManagement.quality.duplicateRate" ) , value : clamp ( 100 - ( ( finalFileStats . duplicateRows || 0 ) / Math . max ( 1 , finalFileStats . totalFiles ) ) * 200 ) , color : "bg-purple-500" } ,
354- ] ;
355-
356- // if source data is incomplete or only totalFiles known, apply a small random reduction so values are not all 100%
357- if ( ! completeSource ) {
358- // jitter range can be tuned; using 4-12% to make results realistic but not drastic
359- const jitterMax = 12 ;
360- const jitterMin = 4 ;
361-
362- const jittered = baseMetrics . map ( ( m ) => {
363- // don't reduce below 40 for readability
364- const jitter = randInt ( jitterMin , jitterMax ) ;
365- return { ...m , value : clamp ( Math . round ( ( m . value - jitter ) * 10 ) / 10 ) } ;
366- } ) ;
367-
368- const integrityJittered = baseIntegrity . map ( ( m ) => {
369- const jitter = randInt ( jitterMin , jitterMax ) ;
370- return { ...m , value : clamp ( Math . round ( ( m . value - jitter ) * 10 ) / 10 ) } ;
371- } ) ;
372-
373- return { metrics : jittered , integrityMetrics : integrityJittered } ;
374- }
375-
376- return { metrics : baseMetrics , integrityMetrics : baseIntegrity } ;
377- } , [ datasetType , finalFileStats , completeSource ] ) ;
378-
154+ export default function DataQuality ( { dataset } : { dataset ?: Dataset } ) {
379155 return (
380- < div className = "mt-0 space-y-6" >
381- { /* 数据集标签统计 */ }
156+ < div className = "mt-0" >
382157 < LabelDistributionStats distribution = { ( dataset as any ) ?. distribution } />
383158 </ div >
384159 ) ;
0 commit comments