11import csv
22from bs4 import BeautifulSoup
33
4+
5+
46def extractValuesFromComponentLevel (componentLevel ):
57 level = componentLevel .name
68 componentLevelLabel = componentLevel ['level' ]
@@ -13,9 +15,8 @@ def extractValuesFromComponentLevel (componentLevel):
1315 scopecontentElement = componentLevel .find ('scopecontent' ).find_all ('p' )
1416 scopecontent = ''
1517 for paragraph in scopecontentElement :
16- paragraphText = paragraph .text .replace ('\\ n' ,'' ).replace (' ' ,' ' ).replace (' ' ,' ' ).encode ('utf-8' )
18+ paragraphText = paragraph .text .replace ('\n ' ,'' ).replace (' ' ,' ' ).replace (' ' ,' ' ).encode ('utf-8' )
1719 scopecontent = scopecontent + paragraphText
18- print scopecontent
1920 except :
2021 scopecontent = ''
2122 try :
@@ -42,16 +43,18 @@ def extractValuesFromComponentLevel (componentLevel):
4243 containerType2 = componentLevel .find ('did' ).find_all ('container' )[1 ]['type' ]
4344 except :
4445 containerType2 = ''
45-
46- f .writerow ([level ]+ [componentLevelLabel ]+ [unittitle ]+ [unitdate ]+ [scopecontent ]+ [containerType1 ]+ [container1 ]+ [containerId1 ]+ [containerType2 ]+ [container2 ]+ [containerId2 ])
46+ global sortOrder
47+ sortOrder += 1
48+ f .writerow ([sortOrder ]+ [level ]+ [componentLevelLabel ]+ [unittitle ]+ [unitdate ]+ [scopecontent ]+ [containerType1 ]+ [container1 ]+ [containerId1 ]+ [containerType2 ]+ [container2 ]+ [containerId2 ])
4749
4850filepath = raw_input ('Enter file path: ' )
4951fileName = raw_input ('Enter file name: ' )
5052xml = open (filepath + fileName )
5153
5254f = csv .writer (open (filepath + 'eadFields.csv' , 'wb' ))
53- f .writerow (['<co?>' ]+ ['<co?> level' ]+ ['<unittitle>' ]+ ['<unitdate>' ]+ ['<scopecontent>' ]+ ['containerType1' ]+ ['container1' ]+ ['containerId1' ]+ ['containerType2' ]+ ['container2' ]+ ['containerId2' ])
55+ f .writerow (['sortOrder' ] + [ ' <co?>' ]+ ['<co?> level' ]+ ['<unittitle>' ]+ ['<unitdate>' ]+ ['<scopecontent>' ]+ ['containerType1' ]+ ['container1' ]+ ['containerId1' ]+ ['containerType2' ]+ ['container2' ]+ ['containerId2' ])
5456upperComponentLevels = BeautifulSoup (xml , 'lxml' ).find ('dsc' ).find_all ('c01' )
57+ sortOrder = 0
5558for upperComponentLevel in upperComponentLevels :
5659 componentLevelLabel = upperComponentLevel ['level' ]
5760 unittitle = upperComponentLevel .find ('did' ).find ('unittitle' ).text .encode ('utf-8' )
@@ -61,10 +64,10 @@ def extractValuesFromComponentLevel (componentLevel):
6164 for paragraph in scopecontentElement :
6265 paragraphText = paragraph .text .replace ('\\ n' ,'' ).replace (' ' ,' ' ).replace (' ' ,' ' ).encode ('utf-8' )
6366 scopecontent = scopecontent + paragraphText
64- print scopecontent
6567 except :
6668 scopecontent = ''
67- f .writerow (['c01' ]+ [componentLevelLabel ]+ [unittitle ]+ ['' ]+ [scopecontent ]+ ['' ]+ ['' ]+ ['' ]+ ['' ]+ ['' ]+ ['' ])
69+ sortOrder += 1
70+ f .writerow ([sortOrder ]+ ['c01' ]+ [componentLevelLabel ]+ [unittitle ]+ ['' ]+ [scopecontent ]+ ['' ]+ ['' ]+ ['' ]+ ['' ]+ ['' ]+ ['' ])
6871
6972 componentLevelArray = upperComponentLevel .find_all ('c02' )
7073 for componentLevel in componentLevelArray :
0 commit comments