@@ -5,6 +5,8 @@ def extractValuesFromComponentLevel (componentLevel):
55 level = componentLevel .name
66 componentLevelLabel = componentLevel ['level' ]
77 unittitle = componentLevel .find ('did' ).find ('unittitle' ).text .replace ('\n ' ,'' ).encode ('utf-8' )
8+ controlAccess = []
9+ originationList = []
810 try :
911 unitdate = componentLevel .find ('did' ).find ('unitdate' )
1012 dateExpression = unitdate .text .encode ('utf-8' ).replace ('\n ' ,'' ).replace (' ' ,' ' ).replace (' ' ,' ' ).encode ('utf-8' )
@@ -32,6 +34,12 @@ def extractValuesFromComponentLevel (componentLevel):
3234 scopecontent = scopecontent + paragraphText
3335 except :
3436 scopecontent = ''
37+ try :
38+ subjects = componentLevel .find ('controlaccess' ).find_all ()
39+ for subject in subjects :
40+ controlAccess .append (subject .text .encode ('utf-8' ))
41+ except :
42+ subjects = ''
3543 try :
3644 container1 = componentLevel .find ('did' ).find_all ('container' )[0 ].text .encode ('utf-8' )
3745 except :
@@ -56,17 +64,24 @@ def extractValuesFromComponentLevel (componentLevel):
5664 containerType2 = componentLevel .find ('did' ).find_all ('container' )[1 ]['type' ]
5765 except :
5866 containerType2 = ''
67+ try :
68+ originations = componentLevel .find ('did' ).find_all ('origination' )
69+ for origination in originations :
70+ if origination .find ()['role' ] == 'spn' :
71+ originationList .append (origination .text .encode ('utf-8' ))
72+ except :
73+ originationList = ''
5974 global sortOrder
6075 sortOrder += 1
76+ f .writerow ([sortOrder ]+ [level ]+ [componentLevelLabel ]+ [containerType1 ]+ [container1 ]+ [containerType2 ]+ [container2 ]+ [unittitle ]+ [dateExpression ]+ [dateType ]+ [beginDate ]+ [endDate ]+ [scopecontent ]+ [controlAccess ]+ [originationList ]+ [containerId1 ]+ [containerId2 ])
6177
62- f .writerow ([sortOrder ]+ [level ]+ [componentLevelLabel ]+ [containerType1 ]+ [container1 ]+ [containerType2 ]+ [container2 ]+ [unittitle ]+ [dateExpression ]+ [dateType ]+ [beginDate ]+ [endDate ]+ [scopecontent ]+ [containerId1 ]+ [containerId2 ])
63-
64- filepath = raw_input ('Enter file path: ' )
78+ filepath = '/home/mjanowi3/archivesspace-api'
6579filepath = ''
66- fileName = raw_input ('Enter file name: ' )
6780fileName = 'Coll.011.xml'
6881xml = open (filepath + fileName )
6982
83+
84+
7085f = csv .writer (open (filepath + 'eadFields.csv' , 'wb' ))
7186f .writerow (['sortOrder' ]+ ['hierarchy' ]+ ['level' ]+ ['containerType1' ]+ ['container1' ]+ ['containerType2' ]+ ['container2' ]+ ['unittitle' ]+ ['dateexpression' ]+ ['datetype' ]+ ['begindate' ]+ ['enddate' ]+ ['scopecontent' ]+ ['controlAccess' ]+ ['origination' ]+ ['containerId1' ]+ ['containerId2' ])
7287upperComponentLevels = BeautifulSoup (xml , 'lxml' ).find ('dsc' ).find_all ('c01' )
@@ -102,7 +117,7 @@ def extractValuesFromComponentLevel (componentLevel):
102117 except :
103118 scopecontent = ''
104119 sortOrder += 1
105- f .writerow ([sortOrder ]+ ['c01' ]+ [componentLevelLabel ]+ ['' ]+ ['' ]+ ['' ]+ ['' ]+ [unittitle ]+ [dateExpression ]+ [dateType ]+ [beginDate ]+ [endDate ]+ [scopecontent ]+ ['' ]+ ['' ])
120+ f .writerow ([sortOrder ]+ ['c01' ]+ [componentLevelLabel ]+ ['' ]+ ['' ]+ ['' ]+ ['' ]+ [unittitle ]+ [dateExpression ]+ [dateType ]+ [beginDate ]+ [endDate ]+ [scopecontent ]+ ['' ]+ ['' ]+ [ '' ] + [ '' ] )
106121
107122 componentLevelArray = upperComponentLevel .find_all ('c02' )
108123 for componentLevel in componentLevelArray :
0 commit comments