Skip to content

Commit ab99e1d

Browse files
author
ehanson8
committed
updates
1 parent 72b530e commit ab99e1d

11 files changed

+177
-74
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,8 @@ Network Trash Folder
4646
Temporary Items
4747
.apdisk
4848
secrets.py
49+
secretsProd.py
4950
*.pyc
5051
data/*
5152
!data/.keep
52-
.profile
53+
.profile

compareTwoKeysInCommunity.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,21 @@
44
import csv
55
import time
66

7+
secretsVersion = raw_input('To edit production server, enter the name of the secrets file: ')
8+
if secretsVersion != '':
9+
try:
10+
secrets = __import__(secretsVersion)
11+
print 'Editing Production'
12+
except ImportError:
13+
print 'Editing Stage'
14+
715
baseURL = secrets.baseURL
816
email = secrets.email
917
password = secrets.password
1018
filePath = secrets.filePath
19+
verify = secrets.verify
20+
21+
requests.packages.urllib3.disable_warnings()
1122

1223
communityID = raw_input('Enter community ID: ')
1324
key = raw_input('Enter first key: ')
@@ -16,23 +27,23 @@
1627
startTime = time.time()
1728
data = json.dumps({'email':email,'password':password})
1829
header = {'content-type':'application/json','accept':'application/json'}
19-
session = requests.post(baseURL+'/rest/login', headers=header, data=data).content
30+
session = requests.post(baseURL+'/rest/login', headers=header, verify=verify, data=data).content
2031
headerAuth = {'content-type':'application/json','accept':'application/json', 'rest-dspace-token':session}
2132
print 'authenticated'
2233

2334
itemList = []
2435
endpoint = baseURL+'/rest/communities'
25-
collections = requests.get(baseURL+'/rest/communities/'+str(communityID)+'/collections', headers=headerAuth).json()
36+
collections = requests.get(baseURL+'/rest/communities/'+str(communityID)+'/collections', headers=headerAuth, verify=verify).json()
2637
for j in range (0, len (collections)):
2738
collectionID = collections[j]['id']
2839
if collectionID != 24:
2940
offset = 0
3041
items = ''
3142
while items != []:
32-
items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=1000&offset='+str(offset), headers=headerAuth)
43+
items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=1000&offset='+str(offset), headers=headerAuth, verify=verify)
3344
while items.status_code != 200:
3445
time.sleep(5)
35-
items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=1000&offset='+str(offset), headers=headerAuth)
46+
items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=1000&offset='+str(offset), headers=headerAuth, verify=verify)
3647
items = items.json()
3748
for k in range (0, len (items)):
3849
itemID = items[k]['id']
@@ -47,7 +58,7 @@
4758
for number, itemID in enumerate(itemList):
4859
itemsRemaining = len(itemList) - number
4960
print 'Items remaining: ', itemsRemaining, 'ItemID: ', itemID
50-
metadata = requests.get(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=headerAuth).json()
61+
metadata = requests.get(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=headerAuth, verify=verify).json()
5162
itemTuple = (itemID,)
5263
tupleValue1 = ''
5364
tupleValue2 = ''
@@ -73,7 +84,7 @@
7384
for i in range (0, len (valueList)):
7485
f.writerow([valueList[i][0]]+[valueList[i][1]]+[valueList[i][2]])
7586

76-
logout = requests.post(baseURL+'/rest/logout', headers=headerAuth)
87+
logout = requests.post(baseURL+'/rest/logout', headers=headerAuth, verify=verify)
7788

7889
elapsedTime = time.time() - startTime
7990
m, s = divmod(elapsedTime, 60)

findBogusUris.py

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,35 +4,46 @@
44
import csv
55
import time
66

7+
secretsVersion = raw_input('To edit production server, enter the name of the secrets file: ')
8+
if secretsVersion != '':
9+
try:
10+
secrets = __import__(secretsVersion)
11+
print 'Editing Production'
12+
except ImportError:
13+
print 'Editing Stage'
14+
715
baseURL = secrets.baseURL
816
email = secrets.email
917
password = secrets.password
1018
filePath = secrets.filePath
1119
handlePrefix = secrets.handlePrefix
20+
verify = secrets.verify
21+
22+
requests.packages.urllib3.disable_warnings()
1223

1324
startTime = time.time()
1425
data = json.dumps({'email':email,'password':password})
1526
header = {'content-type':'application/json','accept':'application/json'}
16-
session = requests.post(baseURL+'/rest/login', headers=header, data=data).content
27+
session = requests.post(baseURL+'/rest/login', headers=header, verify=verify, data=data).content
1728
headerAuth = {'content-type':'application/json','accept':'application/json', 'rest-dspace-token':session}
1829
print 'authenticated'
1930

2031
itemList = []
2132
endpoint = baseURL+'/rest/communities'
22-
communities = requests.get(endpoint, headers=headerAuth).json()
33+
communities = requests.get(endpoint, headers=headerAuth, verify=verify).json()
2334
for i in range (0, len (communities)):
2435
communityID = communities[i]['id']
25-
collections = requests.get(baseURL+'/rest/communities/'+str(communityID)+'/collections', headers=headerAuth).json()
36+
collections = requests.get(baseURL+'/rest/communities/'+str(communityID)+'/collections', headers=headerAuth, verify=verify).json()
2637
for j in range (0, len (collections)):
2738
collectionID = collections[j]['id']
2839
if collectionID != 24:
2940
offset = 0
3041
items = ''
3142
while items != []:
32-
items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=1000&offset='+str(offset), headers=headerAuth)
43+
items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=1000&offset='+str(offset), headers=headerAuth, verify=verify)
3344
while items.status_code != 200:
3445
time.sleep(5)
35-
items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=1000&offset='+str(offset), headers=headerAuth)
46+
items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=1000&offset='+str(offset), headers=headerAuth, verify=verify)
3647
items = items.json()
3748
for k in range (0, len (items)):
3849
itemID = items[k]['id']
@@ -49,14 +60,14 @@
4960
for number, itemID in enumerate(itemList):
5061
itemsRemaining = len(itemList) - number
5162
print 'Items remaining: ', itemsRemaining, 'ItemID: ', itemID
52-
metadata = requests.get(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=headerAuth).json()
63+
metadata = requests.get(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=headerAuth, verify=verify).json()
5364
for l in range (0, len (metadata)):
5465
if metadata[l]['key'] == 'dc.identifier.uri':
5566
uri = str(metadata[l]['value'])
5667
if uri.startswith(handlePrefix) == False:
5768
f.writerow([itemID]+[uri])
5869

59-
logout = requests.post(baseURL+'/rest/logout', headers=headerAuth)
70+
logout = requests.post(baseURL+'/rest/logout', headers=headerAuth, verify=verify)
6071

6172
elapsedTime = time.time() - startTime
6273
m, s = divmod(elapsedTime, 60)

findDuplicateKeys.py

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,35 +4,46 @@
44
import time
55
import csv
66

7+
secretsVersion = raw_input('To edit production server, enter the name of the secrets file: ')
8+
if secretsVersion != '':
9+
try:
10+
secrets = __import__(secretsVersion)
11+
print 'Editing Production'
12+
except ImportError:
13+
print 'Editing Stage'
14+
715
baseURL = secrets.baseURL
816
email = secrets.email
917
password = secrets.password
1018
filePath = secrets.filePath
19+
verify = secrets.verify
20+
21+
requests.packages.urllib3.disable_warnings()
1122

1223
key = raw_input('Enter key: ')
1324
searchString = "\""+key+"\""
1425

1526
startTime = time.time()
1627
data = json.dumps({'email':email,'password':password})
1728
header = {'content-type':'application/json','accept':'application/json'}
18-
session = requests.post(baseURL+'/rest/login', headers=header, data=data).content
29+
session = requests.post(baseURL+'/rest/login', headers=header, verify=verify, data=data).content
1930
headerAuth = {'content-type':'application/json','accept':'application/json', 'rest-dspace-token':session}
2031
print 'authenticated'
2132

2233

2334
itemList = []
2435
endpoint = baseURL+'/rest/communities'
25-
communities = requests.get(endpoint, headers=headerAuth).json()
36+
communities = requests.get(endpoint, headers=headerAuth, verify=verify).json()
2637
for i in range (0, len (communities)):
2738
communityID = communities[i]['id']
28-
collections = requests.get(baseURL+'/rest/communities/'+str(communityID)+'/collections', headers=headerAuth).json()
39+
collections = requests.get(baseURL+'/rest/communities/'+str(communityID)+'/collections', headers=headerAuth, verify=verify).json()
2940
for j in range (0, len (collections)):
3041
collectionID = collections[j]['id']
3142
if collectionID != 24:
32-
items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=100000', headers=headerAuth)
43+
items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=100000', headers=headerAuth, verify=verify)
3344
while items.status_code != 200:
3445
time.sleep(5)
35-
items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=100000', headers=headerAuth)
46+
items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=100000', headers=headerAuth, verify=verify)
3647
items = items.json()
3748
for k in range (0, len (items)):
3849
itemID = items[k]['id']
@@ -47,12 +58,12 @@
4758
for number, itemID in enumerate(itemList):
4859
itemsRemaining = len(itemList) - number
4960
print 'Items remaining: ', itemsRemaining, 'ItemID: ', itemID
50-
metadata = requests.get(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=headerAuth).json()
61+
metadata = requests.get(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=headerAuth, verify=verify).json()
5162
metadata = json.dumps(metadata)
5263
if metadata.find(searchString) != metadata.rfind(searchString):
5364
f.writerow([itemID])
5465

55-
logout = requests.post(baseURL+'/rest/logout', headers=headerAuth)
66+
logout = requests.post(baseURL+'/rest/logout', headers=headerAuth, verify=verify)
5667

5768
elapsedTime = time.time() - startTime
5869
m, s = divmod(elapsedTime, 60)

getCollectionMetadataJson.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,26 +3,37 @@
33
import secrets
44
import time
55

6+
secretsVersion = raw_input('To edit production server, enter the name of the secrets file: ')
7+
if secretsVersion != '':
8+
try:
9+
secrets = __import__(secretsVersion)
10+
print 'Editing Production'
11+
except ImportError:
12+
print 'Editing Stage'
13+
614
baseURL = secrets.baseURL
715
email = secrets.email
816
password = secrets.password
917
filePath = secrets.filePath
18+
verify = secrets.verify
19+
20+
requests.packages.urllib3.disable_warnings()
1021

1122
handle = raw_input('Enter handle: ')
1223

1324
data = json.dumps({'email':email,'password':password})
1425
header = {'content-type':'application/json','accept':'application/json'}
15-
session = requests.post(baseURL+'/rest/login', headers=header, data=data).content
26+
session = requests.post(baseURL+'/rest/login', headers=header, verify=verify, data=data).content
1627
headerAuth = {'content-type':'application/json','accept':'application/json', 'rest-dspace-token':session}
1728
print 'authenticated'
1829
startTime = time.time()
1930

2031
endpoint = baseURL+'/rest/handle/'+handle
21-
collection = requests.get(endpoint, headers=headerAuth).json()
32+
collection = requests.get(endpoint, headers=headerAuth, verify=verify).json()
2233
collectionID = collection['id']
23-
collectionTitle = requests.get(endpoint, headers=headerAuth).json()
34+
collectionTitle = requests.get(endpoint, headers=headerAuth, verify=verify).json()
2435
endpoint = baseURL+'/rest/collections/'+str(collectionID)+'/items'
25-
output = requests.get(endpoint, headers=headerAuth).json()
36+
output = requests.get(endpoint, headers=headerAuth, verify=verify).json()
2637

2738
itemList = []
2839
for i in range (0, len (output)):
@@ -33,11 +44,11 @@
3344
f=open(filePath+handle.replace('/','-')+'.json', 'w')
3445
metadataGroup = []
3546
for itemID in itemList:
36-
metadata = requests.get(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=headerAuth).json()
47+
metadata = requests.get(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=headerAuth, verify=verify).json()
3748
metadataGroup.append(metadata)
3849
json.dump(metadataGroup, f)
3950

40-
logout = requests.post(baseURL+'/rest/logout', headers=headerAuth)
51+
logout = requests.post(baseURL+'/rest/logout', headers=headerAuth, verify=verify)
4152

4253
elapsedTime = time.time() - startTime
4354
m, s = divmod(elapsedTime, 60)

getCompleteAndUniqueValuesForAllKeys.py

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,38 +5,50 @@
55
import time
66
import os.path
77
from collections import Counter
8+
from datetime import datetime
9+
10+
secretsVersion = raw_input('To edit production server, enter the name of the secrets file: ')
11+
if secretsVersion != '':
12+
try:
13+
secrets = __import__(secretsVersion)
14+
print 'Editing Production'
15+
except ImportError:
16+
print 'Editing Stage'
817

918
baseURL = secrets.baseURL
1019
email = secrets.email
1120
password = secrets.password
1221
filePath = secrets.filePath
22+
verify = secrets.verify
23+
24+
requests.packages.urllib3.disable_warnings()
1325

14-
filePathComplete = filePath+'completeValueLists/'
15-
filePathUnique = filePath+'uniqueValueLists/'
26+
filePathComplete = filePath+'completeValueLists'+datetime.now().strftime('%Y-%m-%d %H.%M.%S')+'/'
27+
filePathUnique = filePath+'uniqueValueLists'+datetime.now().strftime('%Y-%m-%d %H.%M.%S')+'/'
1628

1729
startTime = time.time()
1830
data = json.dumps({'email':email,'password':password})
1931
header = {'content-type':'application/json','accept':'application/json'}
20-
session = requests.post(baseURL+'/rest/login', headers=header, data=data).content
32+
session = requests.post(baseURL+'/rest/login', headers=header, verify=verify, data=data).content
2133
headerAuth = {'content-type':'application/json','accept':'application/json', 'rest-dspace-token':session}
2234
print 'authenticated'
2335

2436
itemList = []
2537
endpoint = baseURL+'/rest/communities'
26-
communities = requests.get(endpoint, headers=headerAuth).json()
38+
communities = requests.get(endpoint, headers=headerAuth, verify=verify).json()
2739
for i in range (0, len (communities)):
2840
communityID = communities[i]['id']
29-
collections = requests.get(baseURL+'/rest/communities/'+str(communityID)+'/collections', headers=headerAuth).json()
41+
collections = requests.get(baseURL+'/rest/communities/'+str(communityID)+'/collections', headers=headerAuth, verify=verify).json()
3042
for j in range (0, len (collections)):
3143
collectionID = collections[j]['id']
3244
if collectionID != 24:
3345
offset = 0
3446
items = ''
3547
while items != []:
36-
items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=1000&offset='+str(offset), headers=headerAuth)
48+
items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=1000&offset='+str(offset), headers=headerAuth, verify=verify)
3749
while items.status_code != 200:
3850
time.sleep(5)
39-
items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=1000&offset='+str(offset), headers=headerAuth)
51+
items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=1000&offset='+str(offset), headers=headerAuth, verify=verify)
4052
items = items.json()
4153
for k in range (0, len (items)):
4254
itemID = items[k]['id']
@@ -47,10 +59,12 @@
4759
h, m = divmod(m, 60)
4860
print 'Item list creation time: ','%d:%02d:%02d' % (h, m, s)
4961

62+
os.mkdir(filePathComplete)
63+
os.mkdir(filePathUnique)
5064
for number, itemID in enumerate(itemList):
5165
itemsRemaining = len(itemList) - number
5266
print 'Items remaining: ', itemsRemaining, 'ItemID: ', itemID
53-
metadata = requests.get(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=headerAuth).json()
67+
metadata = requests.get(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=headerAuth, verify=verify).json()
5468
for l in range (0, len (metadata)):
5569
if metadata[l]['key'] != 'dc.description.provenance':
5670
key = metadata[l]['key']
@@ -79,7 +93,7 @@
7993
for key, value in valueListCount.items():
8094
f.writerow([key]+[str(value).zfill(6)])
8195

82-
logout = requests.post(baseURL+'/rest/logout', headers=headerAuth)
96+
logout = requests.post(baseURL+'/rest/logout', headers=headerAuth, verify=verify)
8397

8498
elapsedTime = time.time() - startTime
8599
m, s = divmod(elapsedTime, 60)

0 commit comments

Comments
 (0)