Skip to content

Commit 39cf6ef

Browse files
committed
Add versification warnings for invalid chapter or verse numbers in USFM
1 parent 611fbf2 commit 39cf6ef

1 file changed

Lines changed: 75 additions & 3 deletions

File tree

src/SIL.Machine/Corpora/UsfmVersificationErrorDetector.cs

Lines changed: 75 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@ public enum UsfmVersificationErrorType
1212
ExtraVerse,
1313
InvalidVerseRange,
1414
MissingVerseSegment,
15-
ExtraVerseSegment
15+
ExtraVerseSegment,
16+
InvalidChapterNumber,
17+
InvalidVerseNumber
1618
}
1719

1820
public class UsfmVersificationError
@@ -22,6 +24,7 @@ public class UsfmVersificationError
2224
private readonly int _expectedVerse;
2325
private readonly int _actualChapter;
2426
private readonly int _actualVerse;
27+
private readonly string _actualValue;
2528
private VerseRef? _verseRef = null;
2629

2730
public UsfmVersificationError(
@@ -43,6 +46,21 @@ public UsfmVersificationError(
4346
ProjectName = projectName;
4447
}
4548

49+
public UsfmVersificationError(
50+
int bookNum,
51+
int expectedChapter,
52+
string actualValue,
53+
string projectName,
54+
UsfmVersificationErrorType type
55+
)
56+
{
57+
_bookNum = bookNum;
58+
_expectedChapter = expectedChapter;
59+
_actualValue = actualValue;
60+
ProjectName = projectName;
61+
Type = type;
62+
}
63+
4664
public string ProjectName { get; private set; }
4765

4866
public UsfmVersificationErrorType Type { get; private set; }
@@ -104,8 +122,14 @@ public string ExpectedVerseRef
104122
{
105123
get
106124
{
107-
if (Type == UsfmVersificationErrorType.ExtraVerse)
125+
if (
126+
Type == UsfmVersificationErrorType.ExtraVerse
127+
|| Type == UsfmVersificationErrorType.InvalidChapterNumber
128+
|| Type == UsfmVersificationErrorType.InvalidVerseNumber
129+
)
130+
{
108131
return "";
132+
}
109133

110134
// We do not want to throw an exception here, and the VerseRef constructor can throw
111135
// an exception with certain invalid verse data; use TryParse instead.
@@ -154,11 +178,20 @@ out VerseRef correctedVerseRangeRef
154178
return defaultVerseRef.ToString();
155179
}
156180
}
181+
157182
public string ActualVerseRef
158183
{
159184
get
160185
{
161-
if (_verseRef != null)
186+
if (Type == UsfmVersificationErrorType.InvalidChapterNumber)
187+
{
188+
return $"{Canon.BookNumberToId(_bookNum)} {_actualValue}";
189+
}
190+
else if (Type == UsfmVersificationErrorType.InvalidVerseNumber)
191+
{
192+
return $"{Canon.BookNumberToId(_bookNum)} {_expectedChapter}:{_actualValue}";
193+
}
194+
else if (_verseRef != null)
162195
{
163196
return _verseRef.ToString();
164197
}
@@ -254,6 +287,22 @@ string pubNumber
254287

255288
_currentChapter = state.VerseRef.ChapterNum;
256289
_currentVerse = new VerseRef();
290+
291+
// See whether the chapter number is invalid
292+
VerseRef verseRef = state.VerseRef.Clone();
293+
verseRef.Chapter = number;
294+
if (verseRef.ChapterNum == -1)
295+
{
296+
_errors.Add(
297+
new UsfmVersificationError(
298+
_currentBook,
299+
_currentChapter,
300+
number,
301+
_projectName,
302+
UsfmVersificationErrorType.InvalidChapterNumber
303+
)
304+
);
305+
}
257306
}
258307

259308
public override void Verse(
@@ -264,6 +313,7 @@ public override void Verse(
264313
string pubNumber
265314
)
266315
{
316+
bool verseInError = false;
267317
_currentVerse = state.VerseRef;
268318
if (_currentBook > 0 && Canon.IsCanonical(_currentBook) && _currentChapter > 0)
269319
{
@@ -277,7 +327,29 @@ string pubNumber
277327
_currentVerse
278328
);
279329
if (versificationError.CheckError())
330+
{
280331
_errors.Add(versificationError);
332+
verseInError = true;
333+
}
334+
}
335+
336+
if (!verseInError)
337+
{
338+
// See whether the verse number is invalid
339+
VerseRef verseRef = _currentVerse.Clone();
340+
verseRef.Verse = number;
341+
if (verseRef.VerseNum == -1)
342+
{
343+
_errors.Add(
344+
new UsfmVersificationError(
345+
_currentBook,
346+
_currentChapter,
347+
number,
348+
_projectName,
349+
UsfmVersificationErrorType.InvalidVerseNumber
350+
)
351+
);
352+
}
281353
}
282354
}
283355
}

0 commit comments

Comments
 (0)