@@ -10,11 +10,16 @@ def extractValuesFromComponentLevel (componentLevel):
1010 except :
1111 unitdate = ''
1212 try :
13- scopecontent = componentLevel .find ('scopecontent' ).content .replace ('<head>' ,'' ).replace ('</head>' ,'' ).replace ('<p>' ,'' ).replace ('</p>' ,' ' ).encode ('utf-8' )
13+ scopecontentElement = componentLevel .find ('scopecontent' ).find_all ('p' )
14+ scopecontent = ''
15+ for paragraph in scopecontentElement :
16+ paragraphText = paragraph .text .replace ('\\ n' ,'' ).replace (' ' ,' ' ).encode ('utf-8' )
17+ scopecontent = scopecontent + paragraphText
18+ print scopecontent
1419 except :
1520 scopecontent = ''
1621 try :
17- container1 = componentLevel .find ('did' ).find_all ('container' )[0 ].text .encode ( 'utf-8 ' )
22+ container1 = componentLevel .find ('did' ).find_all ('container' )[0 ].textparagraph . text .replace ( ' \\ n' , ' ' )
1823 except :
1924 container1 = ''
2025 try :
@@ -50,7 +55,17 @@ def extractValuesFromComponentLevel (componentLevel):
5055for upperComponentLevel in upperComponentLevels :
5156 componentLevelLabel = upperComponentLevel ['level' ]
5257 unittitle = upperComponentLevel .find ('did' ).find ('unittitle' ).text .encode ('utf-8' )
53- f .writerow (['c01' ]+ [componentLevelLabel ]+ [unittitle ]+ ['' ]+ ['' ]+ ['' ])
58+ try :
59+ scopecontentElement = upperComponentLevel .find ('scopecontent' ).find_all ('p' )
60+ scopecontent = ''
61+ for paragraph in scopecontentElement :
62+ paragraphText = paragraph .text .replace ('\\ n' ,'' ).replace (' ' ,' ' ).encode ('utf-8' )
63+ scopecontent = scopecontent + paragraphText
64+ print scopecontent
65+ except :
66+ scopecontent = ''
67+ f .writerow (['c01' ]+ [componentLevelLabel ]+ [unittitle ]+ ['' ]+ [scopecontent ]+ ['' ]+ ['' ]+ ['' ]+ ['' ]+ ['' ]+ ['' ])
68+
5469 componentLevelArray = upperComponentLevel .find_all ('c02' )
5570 for componentLevel in componentLevelArray :
5671 extractValuesFromComponentLevel (componentLevel )
0 commit comments