Skip to content

Commit d6738bc

Browse files
author
ehanson8
committed
updates
1 parent ab3fe81 commit d6738bc

File tree

1 file changed

+18
-3
lines changed

1 file changed

+18
-3
lines changed

eadToCsv.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,16 @@ def extractValuesFromComponentLevel (componentLevel):
1010
except:
1111
unitdate = ''
1212
try:
13-
scopecontent = componentLevel.find('scopecontent').content.replace('<head>','').replace('</head>','').replace('<p>','').replace('</p>',' ').encode('utf-8')
13+
scopecontentElement = componentLevel.find('scopecontent').find_all('p')
14+
scopecontent = ''
15+
for paragraph in scopecontentElement:
16+
paragraphText = paragraph.text.replace('\\n','').replace(' ',' ').encode('utf-8')
17+
scopecontent = scopecontent + paragraphText
18+
print scopecontent
1419
except:
1520
scopecontent = ''
1621
try:
17-
container1 = componentLevel.find('did').find_all('container')[0].text.encode('utf-8')
22+
container1 = componentLevel.find('did').find_all('container')[0].textparagraph.text.replace('\\n','')
1823
except:
1924
container1 = ''
2025
try:
@@ -50,7 +55,17 @@ def extractValuesFromComponentLevel (componentLevel):
5055
for upperComponentLevel in upperComponentLevels:
5156
componentLevelLabel = upperComponentLevel['level']
5257
unittitle = upperComponentLevel.find('did').find('unittitle').text.encode('utf-8')
53-
f.writerow(['c01']+[componentLevelLabel]+[unittitle]+['']+['']+[''])
58+
try:
59+
scopecontentElement = upperComponentLevel.find('scopecontent').find_all('p')
60+
scopecontent = ''
61+
for paragraph in scopecontentElement:
62+
paragraphText = paragraph.text.replace('\\n','').replace(' ',' ').encode('utf-8')
63+
scopecontent = scopecontent + paragraphText
64+
print scopecontent
65+
except:
66+
scopecontent = ''
67+
f.writerow(['c01']+[componentLevelLabel]+[unittitle]+['']+[scopecontent]+['']+['']+['']+['']+['']+[''])
68+
5469
componentLevelArray = upperComponentLevel.find_all('c02')
5570
for componentLevel in componentLevelArray:
5671
extractValuesFromComponentLevel(componentLevel)

0 commit comments

Comments
 (0)