Skip to content

Commit c90246f

Browse files
author
ehanson8
committed
updates
1 parent 867365a commit c90246f

File tree

1 file changed

+10
-7
lines changed

1 file changed

+10
-7
lines changed

eadToCsv.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import csv
22
from bs4 import BeautifulSoup
33

4+
5+
46
def extractValuesFromComponentLevel (componentLevel):
57
level = componentLevel.name
68
componentLevelLabel = componentLevel['level']
@@ -13,9 +15,8 @@ def extractValuesFromComponentLevel (componentLevel):
1315
scopecontentElement = componentLevel.find('scopecontent').find_all('p')
1416
scopecontent = ''
1517
for paragraph in scopecontentElement:
16-
paragraphText = paragraph.text.replace('\\n','').replace(' ',' ').replace(' ',' ').encode('utf-8')
18+
paragraphText = paragraph.text.replace('\n','').replace(' ',' ').replace(' ',' ').encode('utf-8')
1719
scopecontent = scopecontent + paragraphText
18-
print scopecontent
1920
except:
2021
scopecontent = ''
2122
try:
@@ -42,16 +43,18 @@ def extractValuesFromComponentLevel (componentLevel):
4243
containerType2 = componentLevel.find('did').find_all('container')[1]['type']
4344
except:
4445
containerType2 = ''
45-
46-
f.writerow([level]+[componentLevelLabel]+[unittitle]+[unitdate]+[scopecontent]+[containerType1]+[container1]+[containerId1]+[containerType2]+[container2]+[containerId2])
46+
global sortOrder
47+
sortOrder += 1
48+
f.writerow([sortOrder]+[level]+[componentLevelLabel]+[unittitle]+[unitdate]+[scopecontent]+[containerType1]+[container1]+[containerId1]+[containerType2]+[container2]+[containerId2])
4749

4850
filepath = raw_input('Enter file path: ')
4951
fileName = raw_input('Enter file name: ')
5052
xml = open(filepath+fileName)
5153

5254
f=csv.writer(open(filepath+'eadFields.csv', 'wb'))
53-
f.writerow(['<co?>']+['<co?> level']+['<unittitle>']+['<unitdate>']+['<scopecontent>']+['containerType1']+['container1']+['containerId1']+['containerType2']+['container2']+['containerId2'])
55+
f.writerow(['sortOrder']+['<co?>']+['<co?> level']+['<unittitle>']+['<unitdate>']+['<scopecontent>']+['containerType1']+['container1']+['containerId1']+['containerType2']+['container2']+['containerId2'])
5456
upperComponentLevels = BeautifulSoup(xml, 'lxml').find('dsc').find_all('c01')
57+
sortOrder = 0
5558
for upperComponentLevel in upperComponentLevels:
5659
componentLevelLabel = upperComponentLevel['level']
5760
unittitle = upperComponentLevel.find('did').find('unittitle').text.encode('utf-8')
@@ -61,10 +64,10 @@ def extractValuesFromComponentLevel (componentLevel):
6164
for paragraph in scopecontentElement:
6265
paragraphText = paragraph.text.replace('\\n','').replace(' ',' ').replace(' ',' ').encode('utf-8')
6366
scopecontent = scopecontent + paragraphText
64-
print scopecontent
6567
except:
6668
scopecontent = ''
67-
f.writerow(['c01']+[componentLevelLabel]+[unittitle]+['']+[scopecontent]+['']+['']+['']+['']+['']+[''])
69+
sortOrder += 1
70+
f.writerow([sortOrder]+['c01']+[componentLevelLabel]+[unittitle]+['']+[scopecontent]+['']+['']+['']+['']+['']+[''])
6871

6972
componentLevelArray = upperComponentLevel.find_all('c02')
7073
for componentLevel in componentLevelArray:

0 commit comments

Comments
 (0)