Skip to content

Commit b2f7f57

Browse files
author
ehanson8
committed
updates
1 parent 436924c commit b2f7f57

File tree

2 files changed

+174
-0
lines changed

2 files changed

+174
-0
lines changed

getTopContainerCountByResource.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
import json
2+
import requests
3+
import secrets
4+
import time
5+
import csv
6+
7+
startTime = time.time()
8+
9+
baseURL = secrets.baseURL
10+
user = secrets.user
11+
password = secrets.password
12+
13+
auth = requests.post(baseURL + '/users/'+user+'/login?password='+password).json()
14+
session = auth["session"]
15+
headers = {'X-ArchivesSpace-Session':session, 'Content_Type':'application/json'}
16+
17+
endpoint = '/repositories/3/resources?all_ids=true'
18+
19+
ids = requests.get(baseURL + endpoint, headers=headers).json()
20+
21+
f=csv.writer(open('topContainerCountByResource.csv', 'wb'))
22+
f.writerow(['title']+['uri']+['id_0']+['id_1']+['id_2']+['id_3']+['tcCount'])
23+
24+
f2=csv.writer(open('topContainersLinks.csv', 'wb'))
25+
f2.writerow(['resourceUri']+['topContainerUri'])
26+
27+
topContainerLinks = []
28+
uniqueTopContainers = []
29+
for id in ids:
30+
print 'id', id
31+
endpoint = '/repositories/3/resources/'+str(id)
32+
output = requests.get(baseURL + endpoint, headers=headers).json()
33+
topContainers =[]
34+
title = output['title'].encode('utf-8')
35+
uri = output['uri']
36+
id0 = output['id_0']
37+
try:
38+
id1 = output['id_1']
39+
except:
40+
id1=''
41+
try:
42+
id2 = output['id_2']
43+
except:
44+
id2 = ''
45+
try:
46+
id3 = output['id_3']
47+
except:
48+
id3=''
49+
searchEndpoint = '/repositories/3/top_containers/search'
50+
output = requests.get(baseURL + searchEndpoint, headers=headers).json()
51+
page = 1
52+
payload = {'page': page, 'page_size': '3000', 'root_record': endpoint}
53+
search = requests.get(baseURL+'/search', headers=headers, params=payload).json()
54+
results = []
55+
resultsPage = search['results']
56+
for result in resultsPage:
57+
results.append(result)
58+
59+
while resultsPage != []:
60+
page = page + 1
61+
payload = {'page': page, 'page_size': '3000', 'root_record': endpoint}
62+
search = requests.get(baseURL+'/search', headers=headers, params=payload).json()
63+
resultsPage = search['results']
64+
for result in resultsPage:
65+
results.append(result)
66+
resourceTopContainers = []
67+
for result in results:
68+
try:
69+
topContainers = result['top_container_uri_u_sstr']
70+
for topContainer in topContainers:
71+
if topContainer not in resourceTopContainers:
72+
resourceTopContainers.append(topContainer)
73+
if topContainer not in uniqueTopContainers:
74+
uniqueTopContainers.append(topContainer)
75+
topContainerLink = str(id) +'|'+topContainer
76+
if topContainerLink not in topContainerLinks:
77+
topContainerLinks.append(topContainerLink)
78+
except:
79+
topContainer = ''
80+
topContainerCount = len(resourceTopContainers)
81+
print 'top containers', topContainerCount
82+
f.writerow([title]+[uri]+[id0]+[id1]+[id2]+[id3]+[topContainerCount])
83+
84+
for topContainerLink in topContainerLinks:
85+
f2.writerow([topContainerLink[:topContainerLink.index('|')]]+[topContainerLink[topContainerLink.index('|')+1:]])
86+
87+
f3=csv.writer(open('uniqueTopContainers.csv', 'wb'))
88+
f3.writerow(['topContainer'])
89+
for topContainer in uniqueTopContainers:
90+
f3.writerow([topContainer])
91+
92+
elapsedTime = time.time() - startTime
93+
m, s = divmod(elapsedTime, 60)
94+
h, m = divmod(m, 60)
95+
print 'Total script run time: ', '%d:%02d:%02d' % (h, m, s)
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
import json
2+
import requests
3+
import secrets
4+
import time
5+
import csv
6+
7+
startTime = time.time()
8+
9+
baseURL = secrets.baseURL
10+
user = secrets.user
11+
password = secrets.password
12+
13+
auth = requests.post(baseURL + '/users/'+user+'/login?password='+password).json()
14+
session = auth["session"]
15+
headers = {'X-ArchivesSpace-Session':session, 'Content_Type':'application/json'}
16+
17+
endpoint = '/repositories/3/resources?all_ids=true'
18+
19+
ids = requests.get(baseURL + endpoint, headers=headers).json()
20+
21+
f=csv.writer(open('topContainerCountByResourceNoAOs.csv', 'wb'))
22+
f.writerow(['title']+['uri']+['id_0']+['id_1']+['id_2']+['id_3']+['topContainerCount'])
23+
24+
f2=csv.writer(open('topContainersLinksNoAOs.csv', 'wb'))
25+
f2.writerow(['resourceUri']+['topContainerUri'])
26+
27+
uniqueTopContainers = []
28+
topContainerLinks = []
29+
for id in ids:
30+
endpoint = '/repositories/3/resources/'+str(id)
31+
output = requests.get(baseURL + endpoint, headers=headers).json()
32+
topContainersByResource = []
33+
title = output['title'].encode('utf-8')
34+
uri = output['uri']
35+
id0 = output['id_0']
36+
try:
37+
id1 = output['id_1']
38+
except:
39+
id1=''
40+
try:
41+
id2 = output['id_2']
42+
except:
43+
id2 = ''
44+
try:
45+
id3 = output['id_3']
46+
except:
47+
id3=''
48+
try:
49+
instances = output['instances']
50+
for instance in instances:
51+
try:
52+
topContainer = instance['sub_container']['top_container']['ref']
53+
topContainersByResource.append(topContainer)
54+
except:
55+
print id, 'No top containers'
56+
except:
57+
pass
58+
for topContainer in topContainersByResource:
59+
topContainerLink = str(id) +'|'+topContainer
60+
if topContainerLink not in topContainerLinks:
61+
topContainerLinks.append(topContainerLink)
62+
if topContainer not in uniqueTopContainers:
63+
uniqueTopContainers.append(topContainer)
64+
topContainerCountByResource = len(topContainersByResource)
65+
f.writerow([title]+[uri]+[id0]+[id1]+[id2]+[id3]+[topContainerCountByResource])
66+
print id, len(uniqueTopContainers)
67+
68+
for topContainerLink in topContainerLinks:
69+
f2.writerow([topContainerLink[:topContainerLink.index('|')]]+[topContainerLink[topContainerLink.index('|')+1:]])
70+
71+
f3=csv.writer(open('uniqueTopContainersNoAOs.csv', 'wb'))
72+
f3.writerow(['topContainer'])
73+
for topContainer in uniqueTopContainers:
74+
f3.writerow([topContainer])
75+
76+
elapsedTime = time.time() - startTime
77+
m, s = divmod(elapsedTime, 60)
78+
h, m = divmod(m, 60)
79+
print 'Total script run time: ', '%d:%02d:%02d' % (h, m, s)

0 commit comments

Comments
 (0)