11import csv
22from bs4 import BeautifulSoup
33
4-
5-
64def extractValuesFromComponentLevel (componentLevel ):
75 level = componentLevel .name
86 componentLevelLabel = componentLevel ['level' ]
97 unittitle = componentLevel .find ('did' ).find ('unittitle' ).text .replace ('\n ' ,'' ).encode ('utf-8' )
108 try :
11- unitdate = componentLevel .find ('did' ).find ('unitdate' ).text .encode ('utf-8' )
9+ unitdate = componentLevel .find ('did' ).find ('unitdate' )
10+ dateExpression = unitdate .text .encode ('utf-8' ).replace ('\n ' ,'' ).replace (' ' ,' ' ).replace (' ' ,' ' ).encode ('utf-8' )
11+ try :
12+ dateType = unitdate ['type' ]
13+ except :
14+ dateType = ''
15+ try :
16+ dateNormal = unitdate ['normal' ]
17+ beginDate = dateNormal [:dateNormal .index ('/' )]
18+ endDate = dateNormal [dateNormal .index ('/' )+ 1 :]
19+ except :
20+ beginDate = ''
21+ endDate = ''
1222 except :
13- unitdate = ''
23+ dateExpression = ''
24+ dateType = ''
25+ beginDate = ''
26+ endDate = ''
1427 try :
1528 scopecontentElement = componentLevel .find ('scopecontent' ).find_all ('p' )
1629 scopecontent = ''
@@ -45,14 +58,19 @@ def extractValuesFromComponentLevel (componentLevel):
4558 containerType2 = ''
4659 global sortOrder
4760 sortOrder += 1
48- f .writerow ([sortOrder ]+ [level ]+ [componentLevelLabel ]+ [unittitle ]+ [unitdate ]+ [scopecontent ]+ [containerType1 ]+ [container1 ]+ [containerId1 ]+ [containerType2 ]+ [container2 ]+ [containerId2 ])
61+
62+ f .writerow ([sortOrder ]+ [level ]+ [componentLevelLabel ]+ [containerType1 ]+ [container1 ]+ [containerType2 ]+ [container2 ]+ [unittitle ]+ [dateExpression ]+ [dateType ]+ [beginDate ]+ [endDate ]+ [scopecontent ]+ [containerId1 ]+ [containerId2 ])
4963
5064filepath = raw_input ('Enter file path: ' )
5165fileName = raw_input ('Enter file name: ' )
66+
67+ filepath = 'C:/Users/ehanson8/Documents/GitHub/archivesspaceAPI/'
68+ fileName = 'MS.0037.xml'
69+
5270xml = open (filepath + fileName )
5371
5472f = csv .writer (open (filepath + 'eadFields.csv' , 'wb' ))
55- f .writerow (['sortOrder' ]+ ['<co?> ' ]+ ['<co?> level' ]+ ['<unittitle> ' ]+ ['<unitdate> ' ]+ ['<scopecontent> ' ]+ ['containerType1 ' ]+ ['container1 ' ]+ ['containerId1 ' ]+ ['containerType2 ' ]+ ['container2 ' ]+ ['containerId2' ])
73+ f .writerow (['sortOrder' ]+ ['hierarchy ' ]+ ['level' ]+ ['containerType1 ' ]+ ['container1 ' ]+ ['containerType2 ' ]+ ['container2 ' ]+ ['unittitle ' ]+ ['dateexpression ' ]+ ['datetype ' ]+ ['begindate' ] + [ 'enddate' ] + [ 'scopecontent' ] + [ 'containerId1 ' ]+ ['containerId2' ])
5674upperComponentLevels = BeautifulSoup (xml , 'lxml' ).find ('dsc' ).find_all ('c01' )
5775sortOrder = 0
5876for upperComponentLevel in upperComponentLevels :
@@ -67,7 +85,7 @@ def extractValuesFromComponentLevel (componentLevel):
6785 except :
6886 scopecontent = ''
6987 sortOrder += 1
70- f .writerow ([sortOrder ]+ ['c01' ]+ [componentLevelLabel ]+ [unittitle ]+ ['' ]+ [scopecontent ]+ ['' ]+ ['' ]+ ['' ]+ ['' ]+ ['' ]+ ['' ])
88+ f .writerow ([sortOrder ]+ ['c01' ]+ [componentLevelLabel ]+ ['' ]+ ['' ]+ ['' ]+ ['' ]+ [unittitle ] + [ '' ]+ ['' ]+ ['' ] + [ '' ] + [ scopecontent ]+ ['' ]+ ['' ])
7189
7290 componentLevelArray = upperComponentLevel .find_all ('c02' )
7391 for componentLevel in componentLevelArray :
0 commit comments