Skip to content

Commit 46e4a35

Browse files
committed
updates
1 parent dd62072 commit 46e4a35

File tree

7 files changed

+153
-82
lines changed

7 files changed

+153
-82
lines changed

.gitignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,4 +51,6 @@ data/*
5151
!data/.keep
5252
.profile
5353
*.csv
54-
*.json
54+
*.json
55+
local/*
56+
*.xml

eadToCsv.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,18 +62,20 @@ def extractValuesFromComponentLevel (componentLevel):
6262
f.writerow([sortOrder]+[level]+[componentLevelLabel]+[containerType1]+[container1]+[containerType2]+[container2]+[unittitle]+[dateExpression]+[dateType]+[beginDate]+[endDate]+[scopecontent]+[containerId1]+[containerId2])
6363

6464
filepath = raw_input('Enter file path: ')
65+
filepath = ''
6566
fileName = raw_input('Enter file name: ')
67+
fileName = 'Coll.011.xml'
6668
xml = open(filepath+fileName)
6769

6870
f=csv.writer(open(filepath+'eadFields.csv', 'wb'))
69-
f.writerow(['sortOrder']+['hierarchy']+['level']+['containerType1']+['container1']+['containerType2']+['container2']+['unittitle']+['dateexpression']+['datetype']+['begindate']+['enddate']+['scopecontent']+['containerId1']+['containerId2'])
71+
f.writerow(['sortOrder']+['hierarchy']+['level']+['containerType1']+['container1']+['containerType2']+['container2']+['unittitle']+['dateexpression']+['datetype']+['begindate']+['enddate']+['scopecontent']+['controlAccess']+['origination']+['containerId1']+['containerId2'])
7072
upperComponentLevels = BeautifulSoup(xml, 'lxml').find('dsc').find_all('c01')
7173
sortOrder = 0
7274
for upperComponentLevel in upperComponentLevels:
7375
componentLevelLabel = upperComponentLevel['level']
7476
unittitle = upperComponentLevel.find('did').find('unittitle').text.encode('utf-8')
7577
try:
76-
unitdate = componentLevel.find('did').find('unitdate')
78+
unitdate = upperComponentLevel.find('did').find('unitdate')
7779
dateExpression = unitdate.text.encode('utf-8').replace('\n','').replace(' ',' ').replace(' ',' ').encode('utf-8')
7880
try:
7981
dateType = unitdate['type']

extractMarcFields.py

Lines changed: 0 additions & 75 deletions
This file was deleted.

getSingleRecord.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,17 @@
11
import json
22
import requests
33
import secrets
4+
import argparse
5+
6+
parser = argparse.ArgumentParser()
7+
parser.add_argument('-u', '--uri', help='URI of the object to retreive. optional - if not provided, the script will ask for input')
8+
9+
args = parser.parse_args()
10+
11+
if args.uri:
12+
uri = args.uri
13+
else:
14+
uri = raw_input('Enter handle: ')
415

516
baseURL = secrets.baseURL
617
user = secrets.user
@@ -10,9 +21,8 @@
1021
session = auth["session"]
1122
headers = {'X-ArchivesSpace-Session':session, 'Content_Type':'application/json'}
1223

13-
endpoint = '/repositories/3/resources/1126'
14-
15-
output = requests.get(baseURL + endpoint, headers=headers).json()
16-
f=open('ASrecord.json', 'w')
24+
output = requests.get(baseURL + uri, headers=headers).json()
25+
uri = uri.replace('/repositories/3/','').replace('/','-')
26+
f=open(uri+'.json', 'w')
1727
results=(json.dump(output, f))
1828
f.close()

getTopContainers.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,11 @@
1717
endpoint = '/repositories/3/top_containers?all_ids=true'
1818

1919
ids = requests.get(baseURL + endpoint, headers=headers).json()
20+
print len(ids)
2021

2122
records = []
2223
for id in ids:
24+
print id
2325
endpoint = '/repositories/3/top_containers/'+str(id)
2426
output = requests.get(baseURL + endpoint, headers=headers).json()
2527
records.append(output)

postContainersToRecords.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
import json
2+
import requests
3+
import secrets
4+
import csv
5+
6+
targetFile = raw_input('Enter file name: ')
7+
targetRecord = raw_input('Enter record type and id (e.g. \'accessions/2049\'): ')
8+
9+
baseURL = secrets.baseURL
10+
user = secrets.user
11+
password = secrets.password
12+
13+
auth = requests.post(baseURL + '/users/'+user+'/login?password='+password).json()
14+
session = auth["session"]
15+
headers = {'X-ArchivesSpace-Session':session, 'Content_Type':'application/json'}
16+
17+
csv = csv.DictReader(open(targetFile))
18+
19+
asRecord = requests.get(baseURL+'/repositories/3/'+targetRecord, headers=headers).json()
20+
print baseURL+'/repositories/3/'+targetRecord
21+
f=open(targetRecord+'asRecordBackup.json', 'w')
22+
json.dump(asRecord, f)
23+
instanceArray = asRecord['instances']
24+
25+
for row in csv:
26+
uri = row['uri']
27+
print uri
28+
top_container = {}
29+
top_container['ref'] = uri
30+
sub_container = {}
31+
sub_container['top_container'] = top_container
32+
instance = {}
33+
instance['sub_container'] = sub_container
34+
instance['instance_type'] = 'mixed_materials'
35+
instanceArray.append(instance)
36+
asRecord['instances'] = instanceArray
37+
f2=open(targetRecord+'asRecordModified.json', 'w')
38+
json.dump(asRecord, f2)
39+
asRecord = json.dumps(asRecord)
40+
post = requests.post(baseURL+'/repositories/3/'+targetRecord, headers=headers, data=asRecord).json()
41+
print post
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
import json
2+
import requests
3+
import secrets
4+
import time
5+
import csv
6+
from datetime import datetime
7+
8+
def addUriLink (key, valueSource):
9+
uri = '/repositories/3/resources/'+row['ResourceUri']
10+
value = row[valueSource]
11+
print value
12+
asRecord = requests.get(baseURL+uri, headers=headers).json()
13+
updatedRecord = asRecord
14+
if key == 'subjects':
15+
subjects = updatedRecord['subjects']
16+
originalSubjects = updatedRecord['subjects']
17+
subject = {}
18+
subject['ref'] = value
19+
if subject not in subjects:
20+
subjects.append(subject)
21+
updatedRecord['subjects'] = subjects
22+
print updatedRecord['subjects']
23+
updatedRecord = json.dumps(updatedRecord)
24+
print baseURL + uri
25+
post = requests.post(baseURL + uri, headers=headers, data=updatedRecord).json()
26+
print post
27+
f.writerow([uri]+[subjects]+[post])
28+
else:
29+
print 'no update'
30+
f.writerow([uri]+['no update']+[])
31+
elif key == 'linked_agents':
32+
agents = updatedRecord['linked_agents']
33+
print 'originalAgents'
34+
print agents
35+
originalAgents = updatedRecord['linked_agents']
36+
agent = {}
37+
agent['terms'] = []
38+
agent['ref'] = value
39+
if row['tag'].startswith('1'):
40+
agent['role'] = 'creator'
41+
elif row['tag'].startswith('7'):
42+
agent['role'] = 'creator'
43+
elif row['tag'].startswith('6'):
44+
agent['role'] = 'subject'
45+
else:
46+
'print error'
47+
f.writerow([uri]+['tag error']+[])
48+
if agent not in agents:
49+
agents.append(agent)
50+
print 'updatedAgents'
51+
print agents
52+
updatedRecord['linked_agents'] = agents
53+
updatedRecord = json.dumps(updatedRecord)
54+
print baseURL + uri
55+
post = requests.post(baseURL + uri, headers=headers, data=updatedRecord).json()
56+
print post
57+
f.writerow([uri]+[agents]+[post])
58+
else:
59+
print 'no update'
60+
print agent
61+
f.writerow([uri]+['no update']+[])
62+
else:
63+
'print error'
64+
f.writerow([uri]+['error']+[])
65+
66+
startTime = time.time()
67+
68+
baseURL = secrets.baseURL
69+
user = secrets.user
70+
password = secrets.password
71+
72+
auth = requests.post(baseURL + '/users/'+user+'/login?password='+password).json()
73+
session = auth["session"]
74+
headers = {'X-ArchivesSpace-Session':session, 'Content_Type':'application/json'}
75+
76+
filename = raw_input('Enter filename (including \'.csv\'): ')
77+
78+
f=csv.writer(open(filename+'Post'+datetime.now().strftime('%Y-%m-%d %H.%M.%S')+'.csv', 'wb'))
79+
f.writerow(['uri']+['links']+['post'])
80+
81+
with open(filename) as csvfile:
82+
reader = csv.DictReader(csvfile)
83+
for row in reader:
84+
addUriLink ('linked_agents', 'agentUri')
85+
#addUriLink ('subjects', 'SubjectUri')
86+
elapsedTime = time.time() - startTime
87+
m, s = divmod(elapsedTime, 60)
88+
h, m = divmod(m, 60)
89+
print 'Total script run time: ', '%d:%02d:%02d' % (h, m, s)

0 commit comments

Comments
 (0)