Skip to content

Commit 358434c

Browse files
author
ehanson8
committed
updates
1 parent 928eac7 commit 358434c

File tree

1 file changed

+25
-7
lines changed

1 file changed

+25
-7
lines changed

eadToCsv.py

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,29 @@
11
import csv
22
from bs4 import BeautifulSoup
33

4-
5-
64
def extractValuesFromComponentLevel (componentLevel):
75
level = componentLevel.name
86
componentLevelLabel = componentLevel['level']
97
unittitle = componentLevel.find('did').find('unittitle').text.replace('\n','').encode('utf-8')
108
try:
11-
unitdate = componentLevel.find('did').find('unitdate').text.encode('utf-8')
9+
unitdate = componentLevel.find('did').find('unitdate')
10+
dateExpression = unitdate.text.encode('utf-8').replace('\n','').replace(' ',' ').replace(' ',' ').encode('utf-8')
11+
try:
12+
dateType = unitdate['type']
13+
except:
14+
dateType = ''
15+
try:
16+
dateNormal = unitdate['normal']
17+
beginDate = dateNormal[:dateNormal.index('/')]
18+
endDate = dateNormal[dateNormal.index('/')+1:]
19+
except:
20+
beginDate = ''
21+
endDate = ''
1222
except:
13-
unitdate = ''
23+
dateExpression = ''
24+
dateType = ''
25+
beginDate = ''
26+
endDate = ''
1427
try:
1528
scopecontentElement = componentLevel.find('scopecontent').find_all('p')
1629
scopecontent = ''
@@ -45,14 +58,19 @@ def extractValuesFromComponentLevel (componentLevel):
4558
containerType2 = ''
4659
global sortOrder
4760
sortOrder += 1
48-
f.writerow([sortOrder]+[level]+[componentLevelLabel]+[unittitle]+[unitdate]+[scopecontent]+[containerType1]+[container1]+[containerId1]+[containerType2]+[container2]+[containerId2])
61+
62+
f.writerow([sortOrder]+[level]+[componentLevelLabel]+[containerType1]+[container1]+[containerType2]+[container2]+[unittitle]+[dateExpression]+[dateType]+[beginDate]+[endDate]+[scopecontent]+[containerId1]+[containerId2])
4963

5064
filepath = raw_input('Enter file path: ')
5165
fileName = raw_input('Enter file name: ')
66+
67+
filepath = 'C:/Users/ehanson8/Documents/GitHub/archivesspaceAPI/'
68+
fileName = 'MS.0037.xml'
69+
5270
xml = open(filepath+fileName)
5371

5472
f=csv.writer(open(filepath+'eadFields.csv', 'wb'))
55-
f.writerow(['sortOrder']+['<co?>']+['<co?> level']+['<unittitle>']+['<unitdate>']+['<scopecontent>']+['containerType1']+['container1']+['containerId1']+['containerType2']+['container2']+['containerId2'])
73+
f.writerow(['sortOrder']+['hierarchy']+['level']+['containerType1']+['container1']+['containerType2']+['container2']+['unittitle']+['dateexpression']+['datetype']+['begindate']+['enddate']+['scopecontent']+['containerId1']+['containerId2'])
5674
upperComponentLevels = BeautifulSoup(xml, 'lxml').find('dsc').find_all('c01')
5775
sortOrder = 0
5876
for upperComponentLevel in upperComponentLevels:
@@ -67,7 +85,7 @@ def extractValuesFromComponentLevel (componentLevel):
6785
except:
6886
scopecontent = ''
6987
sortOrder += 1
70-
f.writerow([sortOrder]+['c01']+[componentLevelLabel]+[unittitle]+['']+[scopecontent]+['']+['']+['']+['']+['']+[''])
88+
f.writerow([sortOrder]+['c01']+[componentLevelLabel]+['']+['']+['']+['']+[unittitle]+['']+['']+['']+['']+[scopecontent]+['']+[''])
7189

7290
componentLevelArray = upperComponentLevel.find_all('c02')
7391
for componentLevel in componentLevelArray:

0 commit comments

Comments
 (0)