Skip to content

Commit 4447a55

Browse files
committed
updates
1 parent 46e4a35 commit 4447a55

File tree

1 file changed

+20
-5
lines changed

1 file changed

+20
-5
lines changed

eadToCsv.py

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ def extractValuesFromComponentLevel (componentLevel):
55
level = componentLevel.name
66
componentLevelLabel = componentLevel['level']
77
unittitle = componentLevel.find('did').find('unittitle').text.replace('\n','').encode('utf-8')
8+
controlAccess = []
9+
originationList = []
810
try:
911
unitdate = componentLevel.find('did').find('unitdate')
1012
dateExpression = unitdate.text.encode('utf-8').replace('\n','').replace(' ',' ').replace(' ',' ').encode('utf-8')
@@ -32,6 +34,12 @@ def extractValuesFromComponentLevel (componentLevel):
3234
scopecontent = scopecontent + paragraphText
3335
except:
3436
scopecontent = ''
37+
try:
38+
subjects = componentLevel.find('controlaccess').find_all()
39+
for subject in subjects:
40+
controlAccess.append(subject.text.encode('utf-8'))
41+
except:
42+
subjects = ''
3543
try:
3644
container1 = componentLevel.find('did').find_all('container')[0].text.encode('utf-8')
3745
except:
@@ -56,17 +64,24 @@ def extractValuesFromComponentLevel (componentLevel):
5664
containerType2 = componentLevel.find('did').find_all('container')[1]['type']
5765
except:
5866
containerType2 = ''
67+
try:
68+
originations = componentLevel.find('did').find_all('origination')
69+
for origination in originations:
70+
if origination.find()['role'] == 'spn':
71+
originationList.append(origination.text.encode('utf-8'))
72+
except:
73+
originationList = ''
5974
global sortOrder
6075
sortOrder += 1
76+
f.writerow([sortOrder]+[level]+[componentLevelLabel]+[containerType1]+[container1]+[containerType2]+[container2]+[unittitle]+[dateExpression]+[dateType]+[beginDate]+[endDate]+[scopecontent]+[controlAccess]+[originationList]+[containerId1]+[containerId2])
6177

62-
f.writerow([sortOrder]+[level]+[componentLevelLabel]+[containerType1]+[container1]+[containerType2]+[container2]+[unittitle]+[dateExpression]+[dateType]+[beginDate]+[endDate]+[scopecontent]+[containerId1]+[containerId2])
63-
64-
filepath = raw_input('Enter file path: ')
78+
filepath = '/home/mjanowi3/archivesspace-api'
6579
filepath = ''
66-
fileName = raw_input('Enter file name: ')
6780
fileName = 'Coll.011.xml'
6881
xml = open(filepath+fileName)
6982

83+
84+
7085
f=csv.writer(open(filepath+'eadFields.csv', 'wb'))
7186
f.writerow(['sortOrder']+['hierarchy']+['level']+['containerType1']+['container1']+['containerType2']+['container2']+['unittitle']+['dateexpression']+['datetype']+['begindate']+['enddate']+['scopecontent']+['controlAccess']+['origination']+['containerId1']+['containerId2'])
7287
upperComponentLevels = BeautifulSoup(xml, 'lxml').find('dsc').find_all('c01')
@@ -102,7 +117,7 @@ def extractValuesFromComponentLevel (componentLevel):
102117
except:
103118
scopecontent = ''
104119
sortOrder += 1
105-
f.writerow([sortOrder]+['c01']+[componentLevelLabel]+['']+['']+['']+['']+[unittitle]+[dateExpression]+[dateType]+[beginDate]+[endDate]+[scopecontent]+['']+[''])
120+
f.writerow([sortOrder]+['c01']+[componentLevelLabel]+['']+['']+['']+['']+[unittitle]+[dateExpression]+[dateType]+[beginDate]+[endDate]+[scopecontent]+['']+['']+['']+[''])
106121

107122
componentLevelArray = upperComponentLevel.find_all('c02')
108123
for componentLevel in componentLevelArray:

0 commit comments

Comments
 (0)