Skip to content

Commit f62bc00

Browse files
committed
Add benchmark analysis scripts for PDF comparison
- Introduced multiple scripts for analyzing near-miss PDF files, focusing on visual and text differences. - Implemented detailed visual analysis, row height comparisons, and text extraction for benchmarking. - Added functionality to categorize failures by improvement type and identify quick wins. - Created reports for near misses, close misses, and visual-only issues to facilitate further improvements. - Enhanced overall benchmarking process with structured output for easier interpretation of results.
1 parent 9f73022 commit f62bc00

File tree

359 files changed

+7852
-3437
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

359 files changed

+7852
-3437
lines changed

README.md

Lines changed: 139 additions & 139 deletions
Large diffs are not rendered by default.

README.zh-CN.md

Lines changed: 347 additions & 107 deletions
Large diffs are not rendered by default.

documents/README.fr.md

Lines changed: 347 additions & 107 deletions
Large diffs are not rendered by default.

documents/README.it.md

Lines changed: 347 additions & 107 deletions
Large diffs are not rendered by default.

documents/README.ja.md

Lines changed: 347 additions & 107 deletions
Large diffs are not rendered by default.

documents/README.ko.md

Lines changed: 347 additions & 107 deletions
Large diffs are not rendered by default.

documents/README.zh-TW.md

Lines changed: 347 additions & 107 deletions
Large diffs are not rendered by default.
Lines changed: 263 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,263 @@
1+
#!/usr/bin/env python3
2+
"""Update all README files with benchmark scores from comparison_report.json."""
3+
4+
import json
5+
import re
6+
import os
7+
8+
REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
9+
REPORT_PATH = os.path.join(REPO_ROOT, "tests", "MiniPdf.Benchmark", "reports", "comparison_report.json")
10+
11+
# Load report data
12+
with open(REPORT_PATH, "r", encoding="utf-8") as f:
13+
report_data = json.load(f)
14+
15+
# Build lookup: name -> {score, label, icon}
16+
scores_map = {}
17+
for item in report_data:
18+
name = item["name"]
19+
score = item.get("overall_score")
20+
if score is not None:
21+
pct = score * 100
22+
if pct >= 90:
23+
icon = "\U0001f7e2" # 🟢
24+
elif pct >= 70:
25+
icon = "\U0001f7e1" # 🟡
26+
else:
27+
icon = "\U0001f534" # 🔴
28+
# Build human-readable label from name (remove classicNN_ prefix)
29+
parts = name.split("_", 1)
30+
label = parts[1].replace("_", " ").title() if len(parts) > 1 else name
31+
# Fix casing for special words
32+
label = label.replace("Xml", "xml").replace("Cjk", "Cjk").replace("Kpi", "kpi")
33+
label = label.replace("Ohlc", "ohlc").replace("3D", "3d").replace("Vs", "vs")
34+
scores_map[name] = {"score": pct, "icon": icon, "label": label}
35+
36+
# Compute summary statistics
37+
all_scores = [item["overall_score"] for item in report_data if item.get("overall_score") is not None]
38+
total_count = len(all_scores)
39+
excellent_count = sum(1 for s in all_scores if s >= 0.90)
40+
acceptable_count = sum(1 for s in all_scores if 0.70 <= s < 0.90)
41+
needs_improve_count = sum(1 for s in all_scores if s < 0.70)
42+
avg_score = sum(all_scores) / len(all_scores) * 100 if all_scores else 0
43+
44+
print(f"Total: {total_count}, Excellent: {excellent_count}, Acceptable: {acceptable_count}, Needs Improvement: {needs_improve_count}, Avg: {avg_score:.1f}%")
45+
46+
# Define the files to update with their specific patterns
47+
FILES = {
48+
"README.md": {
49+
"path": os.path.join(REPO_ROOT, "README.md"),
50+
"img_prefix": "tests/MiniPdf.Benchmark/reports/images/",
51+
"summary_patterns": {
52+
"count_line": r"\*\*120 classic test cases\*\*.*?30 chart cases\)",
53+
"count_replace": f"**{total_count} classic test cases** (including 30 image-embedding cases, 30 chart cases, and 30 styling cases)",
54+
"table_excellent": (r"(\| 🟢 Excellent \| )\d+", f"\\g<1>{excellent_count}"),
55+
"table_acceptable": (r"(\| 🟡 Acceptable \| )\d+", f"\\g<1>{acceptable_count}"),
56+
"table_needs": (r"(\| 🔴 Needs Improvement \| )\d+", f"\\g<1>{needs_improve_count}"),
57+
"avg_score": (r"\*\*Average overall score: [\d.]+%\*\*", f"**Average overall score: {avg_score:.1f}%**"),
58+
},
59+
},
60+
"README.zh-CN.md": {
61+
"path": os.path.join(REPO_ROOT, "README.zh-CN.md"),
62+
"img_prefix": "tests/MiniPdf.Benchmark/reports/images/",
63+
"summary_patterns": {
64+
"count_line": r"\*\*120 个经典测试用例\*\*.*?30 个图表测试\)",
65+
"count_replace": f"**{total_count} 个经典测试用例**(包含 30 个图片嵌入测试、30 个图表测试和 30 个样式测试)",
66+
"table_excellent": (r"(\| 🟢 优秀 \| )\d+", f"\\g<1>{excellent_count}"),
67+
"table_acceptable": (r"(\| 🟡 可接受 \| )\d+", f"\\g<1>{acceptable_count}"),
68+
"table_needs": (r"(\| 🔴 待改进 \| )\d+", f"\\g<1>{needs_improve_count}"),
69+
"avg_score": (r"\*\*整体平均分: [\d.]+%\*\*", f"**整体平均分: {avg_score:.1f}%**"),
70+
},
71+
},
72+
"README.zh-TW.md": {
73+
"path": os.path.join(REPO_ROOT, "documents", "README.zh-TW.md"),
74+
"img_prefix": "../tests/MiniPdf.Benchmark/reports/images/",
75+
"summary_patterns": {
76+
"count_line": r"\*\*120 個經典測試案例\*\*.*?30 個圖表測試\)",
77+
"count_replace": f"**{total_count} 個經典測試案例**(包含 30 個圖片嵌入測試、30 個圖表測試和 30 個樣式測試)",
78+
"table_excellent": (r"(\| 🟢 優秀 \| )\d+", f"\\g<1>{excellent_count}"),
79+
"table_acceptable": (r"(\| 🟡 可接受 \| )\d+", f"\\g<1>{acceptable_count}"),
80+
"table_needs": (r"(\| 🔴 待改進 \| )\d+", f"\\g<1>{needs_improve_count}"),
81+
"avg_score": (r"\*\*整體平均分: [\d.]+%\*\*", f"**整體平均分: {avg_score:.1f}%**"),
82+
},
83+
},
84+
"README.ja.md": {
85+
"path": os.path.join(REPO_ROOT, "documents", "README.ja.md"),
86+
"img_prefix": "../tests/MiniPdf.Benchmark/reports/images/",
87+
"summary_patterns": {
88+
"count_line": r"\*\*120 件のクラシックテストケース\*\*.*?チャート 30 件を含む\)",
89+
"count_replace": f"**{total_count} 件のクラシックテストケース**(画像埋め込み 30 件、チャート 30 件、スタイリング 30 件を含む)",
90+
"table_excellent": (r"(\| 🟢 優秀 \| )\d+", f"\\g<1>{excellent_count}"),
91+
"table_acceptable": (r"(\| 🟡 許容範囲 \| )\d+", f"\\g<1>{acceptable_count}"),
92+
"table_needs": (r"(\| 🔴 要改善 \| )\d+", f"\\g<1>{needs_improve_count}"),
93+
"avg_score": (r"\*\*総合平均スコア: [\d.]+%\*\*", f"**総合平均スコア: {avg_score:.1f}%**"),
94+
},
95+
},
96+
"README.ko.md": {
97+
"path": os.path.join(REPO_ROOT, "documents", "README.ko.md"),
98+
"img_prefix": "../tests/MiniPdf.Benchmark/reports/images/",
99+
"summary_patterns": {
100+
"count_line": r"\*\*120개 클래식 테스트 케이스\*\*.*?차트 30개 포함)",
101+
"count_replace": f"**{total_count}개 클래식 테스트 케이스**(이미지 임베딩 30개, 차트 30개, 스타일링 30개 포함)",
102+
"table_excellent": (r"(\| 🟢 우수 \| )\d+", f"\\g<1>{excellent_count}"),
103+
"table_acceptable": (r"(\| 🟡 허용 \| )\d+", f"\\g<1>{acceptable_count}"),
104+
"table_needs": (r"(\| 🔴 개선 필요 \| )\d+", f"\\g<1>{needs_improve_count}"),
105+
"avg_score": (r"\*\*전체 평균 점수: [\d.]+%\*\*", f"**전체 평균 점수: {avg_score:.1f}%**"),
106+
},
107+
},
108+
"README.it.md": {
109+
"path": os.path.join(REPO_ROOT, "documents", "README.it.md"),
110+
"img_prefix": "../tests/MiniPdf.Benchmark/reports/images/",
111+
"summary_patterns": {
112+
"count_line": r"\*\*120 casi di test classici\*\*.*?30 casi con grafici\)",
113+
"count_replace": f"**{total_count} casi di test classici** (inclusi 30 casi con immagini incorporate, 30 casi con grafici e 30 casi di stile)",
114+
"table_excellent": (r"(\| 🟢 Eccellente \| )\d+", f"\\g<1>{excellent_count}"),
115+
"table_acceptable": (r"(\| 🟡 Accettabile \| )\d+", f"\\g<1>{acceptable_count}"),
116+
"table_needs": (r"(\| 🔴 Da migliorare \| )\d+", f"\\g<1>{needs_improve_count}"),
117+
"avg_score": (r"\*\*Punteggio medio complessivo: [\d.]+%\*\*", f"**Punteggio medio complessivo: {avg_score:.1f}%**"),
118+
},
119+
},
120+
"README.fr.md": {
121+
"path": os.path.join(REPO_ROOT, "documents", "README.fr.md"),
122+
"img_prefix": "../tests/MiniPdf.Benchmark/reports/images/",
123+
"summary_patterns": {
124+
"count_line": r"\*\*120 cas de test classiques\*\*.*?30 cas de graphiques\)",
125+
"count_replace": f"**{total_count} cas de test classiques** (dont 30 cas d'images intégrées, 30 cas de graphiques et 30 cas de style)",
126+
"table_excellent": (r"(\| 🟢 Excellent \| )\d+", f"\\g<1>{excellent_count}"),
127+
"table_acceptable": (r"(\| 🟡 Acceptable \| )\d+", f"\\g<1>{acceptable_count}"),
128+
"table_needs": (r"(\| 🔴 À améliorer \| )\d+", f"\\g<1>{needs_improve_count}"),
129+
"avg_score": (r"\*\*Score moyen global : [\d.]+%\*\*", f"**Score moyen global : {avg_score:.1f}%**"),
130+
},
131+
},
132+
}
133+
134+
def get_display_name(name):
135+
"""Get human-readable display name from test case name."""
136+
parts = name.split("_", 1)
137+
if len(parts) > 1:
138+
return parts[1].replace("_", " ").capitalize()
139+
return name
140+
141+
def get_short_name(name):
142+
"""Extract classicNN from name."""
143+
return name.split("_")[0]
144+
145+
def generate_test_case_rows(name, img_prefix):
146+
"""Generate the HTML table rows for a test case."""
147+
info = scores_map.get(name)
148+
if not info:
149+
return ""
150+
short = get_short_name(name)
151+
display = get_display_name(name)
152+
icon = info["icon"]
153+
pct = info["score"]
154+
return f"""<tr>
155+
<td><b>{short}</b></td>
156+
<td>{display} {icon} {pct:.1f}%</td>
157+
</tr>
158+
<tr>
159+
<td><img src="{img_prefix}{name}_p1_minipdf.png" width="320"/></td>
160+
<td><img src="{img_prefix}{name}_p1_reference.png" width="320"/></td>
161+
</tr>"""
162+
163+
164+
def update_scores_in_content(content):
165+
"""Update all individual test case scores (icon + percentage) in content."""
166+
# Pattern: matches lines like " <td>Some description 🟢 99.8%</td>"
167+
def replace_score(match):
168+
prefix = match.group(1)
169+
name_part = match.group(2)
170+
# Try to find the test case by looking for the classicNN entry before this line
171+
return f"{prefix}{match.group(2)} {match.group(3)}"
172+
173+
# Instead, do name-based replacement: find each "<td><b>classicNN</b></td>" followed by score line
174+
# and update the score
175+
lines = content.split("\n")
176+
i = 0
177+
while i < len(lines):
178+
# Look for pattern: <td><b>classicNN</b></td>
179+
m = re.search(r'<td><b>(classic\d+)</b></td>', lines[i])
180+
if m:
181+
short_name = m.group(1)
182+
# Find the matching full name in report data
183+
full_name = None
184+
for item in report_data:
185+
if item["name"].startswith(short_name + "_"):
186+
full_name = item["name"]
187+
break
188+
if full_name and full_name in scores_map:
189+
info = scores_map[full_name]
190+
display = get_display_name(full_name)
191+
# The next line in the same <tr> should have the score
192+
# Check the line with the score (same line or next few lines)
193+
for j in range(i, min(i + 3, len(lines))):
194+
score_match = re.search(r'<td>(.+?)\s+[🟢🟡🔴]\s+[\d.]+%</td>', lines[j])
195+
if score_match:
196+
lines[j] = f' <td>{display} {info["icon"]} {info["score"]:.1f}%</td>'
197+
break
198+
i += 1
199+
return "\n".join(lines)
200+
201+
202+
def process_file(file_key, file_config):
203+
"""Process a single README file."""
204+
filepath = file_config["path"]
205+
img_prefix = file_config["img_prefix"]
206+
patterns = file_config["summary_patterns"]
207+
208+
print(f"\nProcessing {file_key}...")
209+
210+
with open(filepath, "r", encoding="utf-8") as f:
211+
content = f.read()
212+
213+
# 1. Update summary count line
214+
if "count_line" in patterns:
215+
content = re.sub(patterns["count_line"], patterns["count_replace"], content)
216+
217+
# 2. Update summary table counts
218+
for key in ["table_excellent", "table_acceptable", "table_needs"]:
219+
if key in patterns:
220+
pattern, replacement = patterns[key]
221+
content = re.sub(pattern, replacement, content)
222+
223+
# 3. Update average score
224+
if "avg_score" in patterns:
225+
pattern, replacement = patterns["avg_score"]
226+
content = re.sub(pattern, replacement, content)
227+
228+
# 4. Update individual test case scores
229+
content = update_scores_in_content(content)
230+
231+
# 5. Add missing test cases (classic121-150) if not present
232+
if "classic121" not in content:
233+
# Find the </table> tag and insert new entries before it
234+
# Get sorted list of cases from classic121 onwards
235+
new_cases = sorted(
236+
[item["name"] for item in report_data if item["name"].startswith("classic1") and int(item["name"].split("_")[0][7:]) >= 121],
237+
key=lambda x: int(x.split("_")[0][7:])
238+
)
239+
new_rows = []
240+
for name in new_cases:
241+
row = generate_test_case_rows(name, img_prefix)
242+
if row:
243+
new_rows.append(row)
244+
245+
if new_rows:
246+
insert_text = "\n".join(new_rows) + "\n"
247+
content = content.replace("</table>", insert_text + "</table>")
248+
print(f" Added {len(new_rows)} new test case entries (classic121-150)")
249+
250+
with open(filepath, "w", encoding="utf-8", newline="\n") as f:
251+
f.write(content)
252+
253+
print(f" Updated {file_key} successfully")
254+
255+
256+
# Process all files
257+
for key, config in FILES.items():
258+
if os.path.exists(config["path"]):
259+
process_file(key, config)
260+
else:
261+
print(f" Skipping {key}: file not found at {config['path']}")
262+
263+
print("\nAll README files updated!")

0 commit comments

Comments
 (0)