Skip to content

Commit b30d59c

Browse files
authored
Merge pull request #180 from Codeuctivity/main
OpenXmlRegex should ignore lastRenderedPageBreak (temporary layout marker does not represent any document text)
2 parents 7970000 + eff5701 commit b30d59c

File tree

8 files changed

+82
-3
lines changed

8 files changed

+82
-3
lines changed

.github/workflows/dotnet.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ jobs:
5151
steps:
5252
- uses: actions/checkout@v6
5353
- name: Download build artifacts
54-
uses: actions/download-artifact@v6
54+
uses: actions/download-artifact@v7
5555
with:
5656
name: build-artifacts
5757
- name: NugetPush
@@ -75,7 +75,7 @@ jobs:
7575
steps:
7676
- uses: actions/checkout@v6
7777
- name: Download build artifacts
78-
uses: actions/download-artifact@v6
78+
uses: actions/download-artifact@v7
7979
with:
8080
name: build-artifacts
8181
- name: NugetPush

OpenXmlPowerTools.Tests/DocumentAssemblerTests.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,7 @@ public void DA102_Throws(string name, string data)
176176

177177
[Theory]
178178
[InlineData("DA025-TemplateDocument.docx", "DA-Data.xml", false)]
179+
[InlineData("DA-lastRenderedPageBreak.docx", "DA-lastRenderedPageBreak.xml", false)]
179180
public void DA103_UseXmlDocument(string name, string data, bool err)
180181
{
181182
var sourceDir = new DirectoryInfo("../../../../TestFiles/");

OpenXmlPowerTools.Tests/OpenXmlRegexTests.cs

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,21 @@ public class OpenXmlRegexTests
197197
</w:body>
198198
</w:document>";
199199

200+
private const string LastRenderedPageBreakXmlString =
201+
@"<w:document xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">
202+
<w:body>
203+
<w:p>
204+
<w:r>
205+
<w:t>ThisIsAParagraphContainingNoNaturalLi</w:t>
206+
</w:r>
207+
<w:r>
208+
<w:lastRenderedPageBreak/>
209+
<w:t>neBreaksSoTheLineBreakIsForced.</w:t>
210+
</w:r>
211+
</w:p>
212+
</w:body>
213+
</w:document>";
214+
200215
private static string InnerText(XContainer e)
201216
{
202217
return e.Descendants(W.r)
@@ -365,5 +380,30 @@ public void CanReplaceTextWithFields()
365380
Assert.Equal(1, count);
366381
Assert.Equal("As stated in Article {__1} and this Section {__1.1}, this is described in Exhibit 4.", innerText);
367382
}
383+
384+
[Fact]
385+
public void CanMatchDespiteInvisibleLayoutMarkers()
386+
{
387+
XDocument partDocument = XDocument.Parse(LastRenderedPageBreakXmlString);
388+
XElement p = partDocument.Descendants(W.p).Last();
389+
390+
using (var stream = new MemoryStream())
391+
using (WordprocessingDocument wordDocument = WordprocessingDocument.Create(stream, DocumentType))
392+
{
393+
MainDocumentPart part = wordDocument.AddMainDocumentPart();
394+
part.PutXDocument(partDocument);
395+
396+
var content = partDocument.Descendants(W.p);
397+
var regex = new Regex(@"LineBreak");
398+
int count = OpenXmlRegex.Replace(content, regex, "LB", null);
399+
400+
p = partDocument.Descendants(W.p).Last();
401+
string innerText = InnerText(p);
402+
403+
Assert.Equal(2, count);
404+
Assert.Equal("ThisIsAParagraphContainingNoNaturalLBsSoTheLBIsForced.", innerText);
405+
}
406+
}
407+
368408
}
369409
}

OpenXmlPowerTools.Tests/UnicodeMapperTests.cs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,5 +124,34 @@ public void CanStringifySymbols()
124124
Assert.Equal(symFromChar1.ToString(SaveOptions.None), symFromChar2.ToString(SaveOptions.None));
125125
Assert.Equal(symFromChar1.ToString(SaveOptions.None), symFromChar3.ToString(SaveOptions.None));
126126
}
127+
128+
private const string LastRenderedPageBreakXmlString =
129+
@"<w:document xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">
130+
<w:body>
131+
<w:p>
132+
<w:r>
133+
<w:t>ThisIsAParagraphContainingNoNaturalLi</w:t>
134+
</w:r>
135+
<w:r>
136+
<w:lastRenderedPageBreak/>
137+
<w:t>neBreaksSoTheLineBreakIsForced.</w:t>
138+
</w:r>
139+
</w:p>
140+
</w:body>
141+
</w:document>";
142+
143+
[Fact]
144+
public void IgnoresTemporaryLayoutMarkers()
145+
{
146+
XDocument partDocument = XDocument.Parse(LastRenderedPageBreakXmlString);
147+
XElement p = partDocument.Descendants(W.p).Last();
148+
string actual = p.Descendants(W.r)
149+
.Select(UnicodeMapper.RunToString)
150+
.StringConcatenate();
151+
// p.Value is "the concatenated text content of this element", which
152+
// (in THIS test case, which does not feature any symbols or special
153+
// characters) should exactly match the output of UnicodeMapper:
154+
Assert.Equal(p.Value, actual);
155+
}
127156
}
128157
}

OpenXmlPowerTools/OpenXmlRegex.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -446,7 +446,7 @@ private static object WmlSearchAndReplaceTransform(XNode node, Regex regex, stri
446446
if (element.Name == W.r)
447447
{
448448
return element.Elements()
449-
.Where(e => e.Name != W.rPr)
449+
.Where(e => e.Name != W.rPr && e.Name != W.lastRenderedPageBreak)
450450
.Select(e => e.Name == W.t
451451
? ((string)e).Select(c =>
452452
new XElement(W.r,

OpenXmlPowerTools/UnicodeMapper.cs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,11 @@ public static string RunToString(XElement element)
9999
{
100100
return HorizontalTabulation.ToString();
101101
}
102+
// Ignore temporary layout markers that are not actual document content
103+
if (element.Name == W.lastRenderedPageBreak)
104+
{
105+
return string.Empty;
106+
}
102107

103108
if (element.Name == W.fldChar)
104109
{
13.4 KB
Binary file not shown.
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
<TestCase>
2+
<noun1>test</noun1>
3+
<noun2>emergency</noun2>
4+
</TestCase>

0 commit comments

Comments
 (0)