Skip to content

Commit 71e3f75

Browse files
committed
add support for parsing generic resourcesync documents, and provide particular implementation for sitemapindex and the changelistarchive
1 parent a34c664 commit 71e3f75

File tree

12 files changed

+473
-45
lines changed

12 files changed

+473
-45
lines changed

src/main/java/org/openarchives/resourcesync/CapabilityList.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,7 @@ public CapabilityList()
1515

1616
public CapabilityList(String describedBy, Date lastModified)
1717
{
18-
super();
19-
this.capability = ResourceSync.CAPABILITY_CAPABILITYLIST;
18+
super(ResourceSync.CAPABILITY_CAPABILITYLIST);
2019

2120
this.allowedCapabilities.add(ResourceSync.CAPABILITY_RESOURCELIST);
2221
this.allowedCapabilities.add(ResourceSync.CAPABILITY_RESOURCEDUMP);

src/main/java/org/openarchives/resourcesync/ChangeList.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,7 @@ public ChangeList(String capabilityList)
1616

1717
public ChangeList(Date lastMod, String capabilityList)
1818
{
19-
super();
20-
this.capability = ResourceSync.CAPABILITY_CHANGELIST;
19+
super(ResourceSync.CAPABILITY_CHANGELIST);
2120
this.setLastModified(lastMod);
2221

2322
if (capabilityList != null)

src/main/java/org/openarchives/resourcesync/ChangeListArchive.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package org.openarchives.resourcesync;
22

3+
import java.io.InputStream;
34
import java.util.Date;
45

56
public class ChangeListArchive extends SitemapIndex
@@ -16,8 +17,7 @@ public ChangeListArchive(Date lastMod)
1617

1718
public ChangeListArchive(Date lastMod, String capabilityList)
1819
{
19-
super();
20-
this.capability = ResourceSync.CAPABILITY_CHANGELIST;
20+
super(ResourceSync.CAPABILITY_CHANGELIST);
2121

2222
if (lastMod != null)
2323
{
@@ -34,6 +34,11 @@ public ChangeListArchive(Date lastMod, String capabilityList)
3434
}
3535
}
3636

37+
public ChangeListArchive(InputStream in)
38+
{
39+
super(ResourceSync.CAPABILITY_CHANGELIST, in);
40+
}
41+
3742
public void addChangeList(Sitemap sitemap)
3843
{
3944
this.addSitemap(sitemap);

src/main/java/org/openarchives/resourcesync/ResourceList.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,7 @@ public ResourceList(Date lastModified)
1616

1717
public ResourceList(Date lastMod, String capabilityList)
1818
{
19-
super();
20-
this.capability = ResourceSync.CAPABILITY_RESOURCELIST;
19+
super(ResourceSync.CAPABILITY_RESOURCELIST);
2120

2221
if (lastMod == null)
2322
{

src/main/java/org/openarchives/resourcesync/ResourceSyncDocument.java

Lines changed: 81 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,15 @@
22

33
import org.jdom2.Document;
44
import org.jdom2.Element;
5+
import org.jdom2.JDOMException;
6+
import org.jdom2.input.SAXBuilder;
57
import org.jdom2.output.Format;
68
import org.jdom2.output.XMLOutputter;
79

810
import java.io.IOException;
11+
import java.io.InputStream;
912
import java.io.OutputStream;
13+
import java.text.ParseException;
1014
import java.util.ArrayList;
1115
import java.util.Date;
1216
import java.util.HashMap;
@@ -16,7 +20,7 @@
1620

1721
public abstract class ResourceSyncDocument
1822
{
19-
// these options should be provided by the extending class
23+
// these options should be provided by the extending class through the constructor overrides
2024
protected String capability;
2125
protected String root;
2226

@@ -26,6 +30,33 @@ public abstract class ResourceSyncDocument
2630
protected TreeMap<Date, List<ResourceSyncEntry>> orderedEntries = new TreeMap<Date, List<ResourceSyncEntry>>();
2731
protected List<ResourceSyncLn> lns = new ArrayList<ResourceSyncLn>();
2832

33+
public ResourceSyncDocument(String root, String capability, InputStream in)
34+
{
35+
this.root = root;
36+
this.capability = capability;
37+
38+
try
39+
{
40+
if (in != null)
41+
{
42+
Element element = this.parse(in);
43+
this.populateDocument(element);
44+
}
45+
}
46+
catch (IOException e)
47+
{
48+
// do nothing, at least for the time being
49+
}
50+
catch (JDOMException e)
51+
{
52+
// do nothing, at least for the time being
53+
}
54+
catch (ParseException e)
55+
{
56+
// do nothing, at least for the time being
57+
}
58+
}
59+
2960
public ResourceSyncLn addLn(String rel, String href)
3061
{
3162
// rs:ln elements are repeatable and can have multiple ones with the same rel
@@ -80,6 +111,45 @@ public String getCapability()
80111
return capability;
81112
}
82113

114+
protected void populateDocument(Element element)
115+
throws ParseException
116+
{
117+
// metadata element
118+
Element mdElement = element.getChild("md", ResourceSync.NS_RS);
119+
120+
// - capability
121+
String capability = mdElement.getAttributeValue("capability", ResourceSync.NS_RS);
122+
if (!"".equals(capability))
123+
{
124+
this.capability = capability;
125+
}
126+
127+
// - modified
128+
String modified = mdElement.getAttributeValue("modified", ResourceSync.NS_ATOM);
129+
if (modified != null && !"".equals(modified))
130+
{
131+
Date lastMod = ResourceSync.DATE_FORMAT.parse(modified);
132+
this.setLastModified(lastMod);
133+
}
134+
135+
// rs:ln elements
136+
List<Element> lns = element.getChildren("ln", ResourceSync.NS_RS);
137+
for (Element ln : lns)
138+
{
139+
String rel = ln.getAttributeValue("rel", ResourceSync.NS_ATOM);
140+
String href = ln.getAttributeValue("href", ResourceSync.NS_ATOM);
141+
if (rel != null && !"".equals(rel) && href != null && !"".equals(href))
142+
{
143+
this.addLn(rel, href);
144+
}
145+
}
146+
147+
// each of the entries
148+
this.populateEntries(element);
149+
}
150+
151+
protected abstract void populateEntries(Element element) throws ParseException;
152+
83153
public Element getElement()
84154
{
85155
Element root = new Element(this.root, ResourceSync.NS_SITEMAP);
@@ -133,4 +203,14 @@ public void serialise(OutputStream out)
133203
XMLOutputter xmlOutputter = new XMLOutputter(Format.getPrettyFormat());
134204
xmlOutputter.output(doc, out);
135205
}
206+
207+
protected Element parse(InputStream in)
208+
throws IOException, JDOMException
209+
{
210+
SAXBuilder sax = new SAXBuilder();
211+
Document doc = sax.build(in);
212+
Element element = doc.getRootElement();
213+
element.detach();
214+
return element;
215+
}
136216
}

src/main/java/org/openarchives/resourcesync/ResourceSyncEntry.java

Lines changed: 158 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import org.jdom2.Element;
44

55
import java.io.InputStream;
6+
import java.text.ParseException;
67
import java.util.ArrayList;
78
import java.util.Date;
89
import java.util.HashMap;
@@ -152,6 +153,138 @@ public List<ResourceSyncLn> getLns()
152153
return lns;
153154
}
154155

156+
public void populateObject(Element element)
157+
throws ParseException
158+
{
159+
// loc
160+
Element locEl = element.getChild("loc", ResourceSync.NS_SITEMAP);
161+
if (locEl != null)
162+
{
163+
this.setLoc(locEl.getText().trim());
164+
}
165+
166+
// lastmod
167+
Element lmEl = element.getChild("lastmod", ResourceSync.NS_SITEMAP);
168+
if (lmEl != null)
169+
{
170+
Date lm = ResourceSync.DATE_FORMAT.parse(lmEl.getText().trim());
171+
this.setLastModified(lm);
172+
}
173+
174+
// changefreq
175+
Element cfEl = element.getChild("changefreq", ResourceSync.NS_SITEMAP);
176+
if (cfEl != null)
177+
{
178+
this.setChangeFreq(cfEl.getText().trim());
179+
}
180+
181+
// the metadata element
182+
Element mdElement = element.getChild("md", ResourceSync.NS_RS);
183+
184+
// - capability
185+
String capability = mdElement.getAttributeValue("capability", ResourceSync.NS_RS);
186+
if (capability != null && !"".equals(capability))
187+
{
188+
this.setCapability(capability);
189+
}
190+
191+
// - change
192+
String change = mdElement.getAttributeValue("change", ResourceSync.NS_RS);
193+
if (change != null && !"".equals(change))
194+
{
195+
this.setChange(change);
196+
}
197+
198+
// - hash
199+
String hashAttr = mdElement.getAttributeValue("hash", ResourceSync.NS_ATOM);
200+
if (hashAttr != null && !"".equals(hashAttr))
201+
{
202+
this.addHashesFromAttr(hashAttr);
203+
}
204+
205+
// - length
206+
String length = mdElement.getAttributeValue("length", ResourceSync.NS_ATOM);
207+
if (length != null && !"".equals(length))
208+
{
209+
long l = Long.parseLong(length);
210+
this.setLength(l);
211+
}
212+
213+
// - path
214+
String path = mdElement.getAttributeValue("path", ResourceSync.NS_RS);
215+
if (path != null && !"".equals(path))
216+
{
217+
this.setPath(path);
218+
}
219+
220+
// - type
221+
String type = mdElement.getAttributeValue("type", ResourceSync.NS_ATOM);
222+
if (type != null && !"".equals(type))
223+
{
224+
this.setType(type);
225+
}
226+
227+
// all the rs:ln elements
228+
List<Element> lns = element.getChildren("ln", ResourceSync.NS_RS);
229+
for (Element ln : lns)
230+
{
231+
String rel = ln.getAttributeValue("rel", ResourceSync.NS_ATOM);
232+
String href = ln.getAttributeValue("href", ResourceSync.NS_ATOM);
233+
if (rel != null && !"".equals(rel) && href != null && !"".equals(href))
234+
{
235+
ResourceSyncLn link = this.addLn(rel, href);
236+
237+
// hash
238+
String lnHashAttr = ln.getAttributeValue("hash", ResourceSync.NS_ATOM);
239+
if (lnHashAttr != null && !"".equals(lnHashAttr))
240+
{
241+
Map<String, String> hashMap = this.getHashesFromAttr(lnHashAttr);
242+
for (String key : hashMap.keySet())
243+
{
244+
link.addHash(key, hashMap.get(key));
245+
}
246+
}
247+
248+
// length
249+
String lnLength = ln.getAttributeValue("length", ResourceSync.NS_ATOM);
250+
if (lnLength != null && !"".equals(length))
251+
{
252+
long lnl = Long.parseLong(lnLength);
253+
link.setLength(lnl);
254+
}
255+
256+
// modified
257+
String modified = ln.getAttributeValue("modified", ResourceSync.NS_ATOM);
258+
if (modified != null && !"".equals(modified))
259+
{
260+
Date modDate = ResourceSync.DATE_FORMAT.parse(modified);
261+
link.setModified(modDate);
262+
}
263+
264+
// path
265+
String lnPath = ln.getAttributeValue("path", ResourceSync.NS_RS);
266+
if (lnPath != null && !"".equals(lnPath))
267+
{
268+
link.setPath(lnPath);
269+
}
270+
271+
// pri
272+
String pri = ln.getAttributeValue("pri"); // FIXME: namespace?
273+
if (pri != null && !"".equals(pri))
274+
{
275+
link.setPri(Integer.parseInt(pri));
276+
}
277+
278+
// type
279+
String lnType = ln.getAttributeValue("type", ResourceSync.NS_ATOM);
280+
if (lnType != null && !"".equals(lnType))
281+
{
282+
link.setType(lnType);
283+
}
284+
}
285+
}
286+
}
287+
155288
public Element getElement()
156289
{
157290
Element root = new Element(this.root, ResourceSync.NS_SITEMAP);
@@ -234,7 +367,7 @@ public Element getElement()
234367
}
235368
if (ln.getLength() > -1)
236369
{
237-
link.setAttribute("length", Integer.toString(ln.getLength()), ResourceSync.NS_ATOM);
370+
link.setAttribute("length", Long.toString(ln.getLength()), ResourceSync.NS_ATOM);
238371
trip = true;
239372
}
240373
if (ln.getModified() != null)
@@ -282,4 +415,28 @@ private String getHashAttr(Map<String, String> hashes)
282415
String attr = sb.toString().trim();
283416
return attr;
284417
}
418+
419+
protected void addHashesFromAttr(String hashAttr)
420+
{
421+
Map<String, String> hashMap = this.getHashesFromAttr(hashAttr);
422+
for (String key : hashMap.keySet())
423+
{
424+
this.addHash(key, hashMap.get(key));
425+
}
426+
}
427+
428+
protected Map<String, String> getHashesFromAttr(String hashAttr)
429+
{
430+
Map<String, String> map = new HashMap<String, String>();
431+
String[] bits = hashAttr.split(" ");
432+
for (String bit : bits)
433+
{
434+
String[] parts = bit.split(":");
435+
if (parts.length == 2)
436+
{
437+
map.put(parts[0], parts[1]);
438+
}
439+
}
440+
return map;
441+
}
285442
}

src/main/java/org/openarchives/resourcesync/ResourceSyncLn.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ public class ResourceSyncLn
99
{
1010
protected Map<String, String> hashes = new HashMap<String, String>();
1111
protected String href = null;
12-
protected int length = -1;
12+
protected long length = -1;
1313
protected Date modified = null;
1414
protected String path = null;
1515
protected String rel = null;
@@ -41,12 +41,12 @@ public void setHref(String href)
4141
this.href = href;
4242
}
4343

44-
public int getLength()
44+
public long getLength()
4545
{
4646
return length;
4747
}
4848

49-
public void setLength(int length)
49+
public void setLength(long length)
5050
{
5151
this.length = length;
5252
}

0 commit comments

Comments
 (0)