Skip to content

Commit 24b0f47

Browse files
author
ehanson8
committed
add scripts
1 parent 06215c4 commit 24b0f47

10 files changed

+654
-0
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,3 +45,4 @@ $RECYCLE.BIN/
4545
Network Trash Folder
4646
Temporary Items
4747
.apdisk
48+
secrets.py

addKeyValuePairOnHandleCSV.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
import json
2+
import requests
3+
import secrets
4+
import time
5+
import csv
6+
from datetime import datetime
7+
8+
baseURL = secrets.baseURL
9+
email = secrets.email
10+
password = secrets.password
11+
filePath = secrets.filePath
12+
13+
data = json.dumps({'email':email,'password':password})
14+
header = {'content-type':'application/json','accept':'application/json'}
15+
session = requests.post(baseURL+'/rest/login', headers=header, data=data).content
16+
headerAuth = {'content-type':'application/json','accept':'application/json', 'rest-dspace-token':session}
17+
18+
filename = filePath+raw_input('Enter filename (including \'.csv\'): ')
19+
addedKey = raw_input('Enter key: ')
20+
startTime = time.time()
21+
22+
f=csv.writer(open(filePath+'addKeyValuePair'+datetime.now().strftime('%Y-%m-%d %H.%M.%S')+'.csv', 'wb'))
23+
f.writerow(['itemID']+['addedKey']+['addedValue']+['delete']+['post'])
24+
25+
with open(filename) as csvfile:
26+
reader = csv.DictReader(csvfile)
27+
for row in reader:
28+
addedValue = row['value'].decode('utf-8')
29+
handle = row['handle'].strip()
30+
addedMetadataElement = {}
31+
addedMetadataElement['key'] = addedKey
32+
addedMetadataElement['value'] = unicode(addedValue)
33+
addedMetadataElement['language'] = 'en_us'
34+
endpoint = baseURL+'/rest/handle/'+handle
35+
item = requests.get(endpoint, headers=header).json()
36+
itemID = item['id']
37+
itemMetadata = requests.get(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=headerAuth).json()
38+
itemMetadata.append(addedMetadataElement)
39+
itemMetadataProcessed = itemMetadata
40+
41+
provNote = '\''+addedKey+': '+addedValue+'\' was added through a batch process on '+datetime.now().strftime('%Y-%m-%d %H:%M:%S')+'.'
42+
provNoteElement = {}
43+
provNoteElement['key'] = 'dc.description.provenance'
44+
provNoteElement['value'] = unicode(provNote)
45+
provNoteElement['language'] = 'en_US'
46+
itemMetadataProcessed.append(provNoteElement)
47+
48+
itemMetadataProcessed = json.dumps(itemMetadataProcessed)
49+
delete = requests.delete(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=headerAuth)
50+
print delete
51+
post = requests.put(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=headerAuth, data=itemMetadataProcessed)
52+
print post
53+
f.writerow([itemID]+[addedMetadataElement['key']]+[addedMetadataElement['value'].encode('utf-8')]+[delete]+[post])

checkCommunityForKey.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
import json
2+
import requests
3+
import secrets
4+
import csv
5+
import time
6+
7+
baseURL = secrets.baseURL
8+
email = secrets.email
9+
password = secrets.password
10+
filePath = secrets.filePath
11+
12+
communityID = raw_input('Enter community ID: ')
13+
14+
key = raw_input('Enter key: ')
15+
16+
startTime = time.time()
17+
data = json.dumps({'email':email,'password':password})
18+
header = {'content-type':'application/json','accept':'application/json'}
19+
session = requests.post(baseURL+'/rest/login', headers=header, data=data).content
20+
headerAuth = {'content-type':'application/json','accept':'application/json', 'rest-dspace-token':session}
21+
print 'authenticated'
22+
23+
itemList = []
24+
endpoint = baseURL+'/rest/communities'
25+
collections = requests.get(baseURL+'/rest/communities/'+str(communityID)+'/collections', headers=headerAuth).json()
26+
for j in range (0, len (collections)):
27+
collectionID = collections[j]['id']
28+
if collectionID != 24:
29+
offset = 0
30+
items = ''
31+
while items != []:
32+
items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=1000&offset='+str(offset), headers=headerAuth)
33+
while items.status_code != 200:
34+
time.sleep(5)
35+
items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=1000&offset='+str(offset), headers=headerAuth)
36+
items = items.json()
37+
for k in range (0, len (items)):
38+
itemID = items[k]['id']
39+
itemList.append(itemID)
40+
offset = offset + 1000
41+
elapsedTime = time.time() - startTime
42+
m, s = divmod(elapsedTime, 60)
43+
h, m = divmod(m, 60)
44+
print 'Item list creation time: ','%d:%02d:%02d' % (h, m, s)
45+
46+
valueList = {}
47+
for number, itemID in enumerate(itemList):
48+
itemsRemaining = len(itemList) - number
49+
print 'Items remaining: ', itemsRemaining, 'ItemID: ', itemID
50+
metadata = requests.get(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=headerAuth).json()
51+
valueList[itemID] = ''
52+
for l in range (0, len (metadata)):
53+
if metadata[l]['key'] == key:
54+
metadataValue = metadata[l]['value'].encode('utf-8')
55+
valueList[itemID] = metadataValue
56+
57+
elapsedTime = time.time() - startTime
58+
m, s = divmod(elapsedTime, 60)
59+
h, m = divmod(m, 60)
60+
print 'Value list creation time: ','%d:%02d:%02d' % (h, m, s)
61+
62+
f=csv.writer(open(filePath+key+'Values.csv', 'wb'))
63+
f.writerow(['itemID']+[key])
64+
for k, v in valueList.iteritems():
65+
f.writerow([k]+[v])
66+
67+
logout = requests.post(baseURL+'/rest/logout', headers=headerAuth)
68+
69+
elapsedTime = time.time() - startTime
70+
m, s = divmod(elapsedTime, 60)
71+
h, m = divmod(m, 60)
72+
print 'Total script run time: ', '%d:%02d:%02d' % (h, m, s)

postItem.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
import json
2+
import requests
3+
import secrets
4+
5+
baseURL = secrets.baseURL
6+
email = secrets.email
7+
password = secrets.password
8+
filePath = secrets.filePath
9+
10+
data = json.dumps({'email':email,'password':password})
11+
header = {'content-type':'application/json','accept':'application/json'}
12+
session = requests.post(baseURL+'/rest/login', headers=header, data=data).content
13+
headerAuth = {'content-type':'application/json','accept':'application/json', 'rest-dspace-token':session}
14+
headerAuthFileUpload = {'accept':'application/json', 'rest-dspace-token':session}
15+
print 'authenticated'
16+
17+
#Post community
18+
communityName = 'Test Community'
19+
community = json.dumps({'name': communityName})
20+
post = requests.post(baseURL+'/rest/communities', headers=headerAuth, data=community).json()
21+
print post
22+
communityID = post['link']
23+
print communityID
24+
25+
# #Post collection
26+
collectionName = 'Test Collection'
27+
collection = json.dumps({'name': collectionName})
28+
post = requests.post(baseURL+communityID+'/collections', headers=headerAuth, data=collection).json()
29+
print post
30+
collectionID = post['link']
31+
32+
#Post item
33+
item = json.dumps({'metadata': [{'key': 'dc.title', 'language': 'en_US', 'value': 'testing123'}]})
34+
post = requests.post(baseURL+collectionID+'/items', headers=headerAuth, data=item).json()
35+
print post
36+
itemID = post['link']
37+
38+
#Post bitstream
39+
#bitstream = filePath+'test.txt'
40+
bitstream = filePath+'testImage.jpg'
41+
fileName = bitstream[bitstream.rfind('/')+1:]
42+
files = {'file': open(bitstream, 'rb')}
43+
data = json.dumps({'name': fileName, 'sequenceId': 1})
44+
post = requests.post(baseURL+itemID+'/bitstreams', headers=headerAuthFileUpload, files=files).json()
45+
print post
46+
bitstreamID = '/rest/bitstreams/'+str(post['id'])
47+
post = requests.put(baseURL+bitstreamID, headers=headerAuth, data=data)
48+
print post
49+
50+
# print baseURL+itemID+'/metadata'
51+
# item = json.dumps([{'key': 'dc.title', 'language': 'en_US', 'value': 'testing123'}])
52+
# post = requests.post(baseURL+itemID+'/metadata', headers=headerAuth, data=item).json()
53+
# print post
54+
55+
logout = requests.post(baseURL+'/rest/logout', headers=headerAuth)
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
import json
2+
import requests
3+
import secrets
4+
import time
5+
import csv
6+
from datetime import datetime
7+
8+
baseURL = secrets.baseURL
9+
email = secrets.email
10+
password = secrets.password
11+
filePath = secrets.filePath
12+
13+
startTime = time.time()
14+
data = json.dumps({'email':email,'password':password})
15+
header = {'content-type':'application/json','accept':'application/json'}
16+
session = requests.post(baseURL+'/rest/login', headers=header, data=data).content
17+
headerAuth = {'content-type':'application/json','accept':'application/json', 'rest-dspace-token':session}
18+
print 'authenticated'
19+
20+
itemList = []
21+
endpoint = baseURL+'/rest/communities'
22+
communities = requests.get(endpoint, headers=headerAuth).json()
23+
for i in range (0, len (communities)):
24+
communityID = communities[i]['id']
25+
collections = requests.get(baseURL+'/rest/communities/'+str(communityID)+'/collections', headers=headerAuth).json()
26+
for j in range (0, len (collections)):
27+
collectionID = collections[j]['id']
28+
if collectionID != 24:
29+
offset = 0
30+
items = ''
31+
while items != []:
32+
items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=1000&offset='+str(offset), headers=headerAuth)
33+
while items.status_code != 200:
34+
time.sleep(5)
35+
items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=1000&offset='+str(offset), headers=headerAuth)
36+
items = items.json()
37+
for k in range (0, len (items)):
38+
itemID = items[k]['id']
39+
itemList.append(itemID)
40+
offset = offset + 1000
41+
elapsedTime = time.time() - startTime
42+
m, s = divmod(elapsedTime, 60)
43+
h, m = divmod(m, 60)
44+
print 'Item list creation time: ','%d:%02d:%02d' % (h, m, s)
45+
46+
f=csv.writer(open(filePath+'removeDuplicatesRecordsEdited'+datetime.now().strftime('%Y-%m-%d %H.%M.%S')+'.csv', 'wb'))
47+
f.writerow(['itemID'])
48+
for number, itemID in enumerate(itemList):
49+
itemMetadataProcessed = []
50+
itemsRemaining = len(itemList) - number
51+
print 'Items remaining: ', itemsRemaining, 'ItemID: ', itemID
52+
metadata = requests.get(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=headerAuth).json()
53+
for l in range (0, len (metadata)):
54+
if metadata[l] not in itemMetadataProcessed:
55+
itemMetadataProcessed.append(metadata[l])
56+
else:
57+
if metadata[l]['key'] == 'dc.description.provenance':
58+
itemMetadataProcessed.append(metadata[l])
59+
else:
60+
provNote = 'A duplicate element, \''+metadata[l]['key']+': '+metadata[l]['value']+',\' was removed through a batch process on '+datetime.now().strftime('%Y-%m-%d %H:%M:%S')+'.'
61+
provNoteElement = {}
62+
provNoteElement['key'] = 'dc.description.provenance'
63+
provNoteElement['value'] = unicode(provNote)
64+
provNoteElement['language'] = 'en_US'
65+
itemMetadataProcessed.append(provNoteElement)
66+
if itemMetadataProcessed != metadata:
67+
itemMetadataProcessed = json.dumps(itemMetadataProcessed)
68+
delete = requests.delete(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=headerAuth)
69+
print delete
70+
post = requests.put(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=headerAuth, data=itemMetadataProcessed)
71+
print post
72+
f.writerow([itemID])
73+
74+
logout = requests.post(baseURL+'/rest/logout', headers=headerAuth)
75+
76+
elapsedTime = time.time() - startTime
77+
m, s = divmod(elapsedTime, 60)
78+
h, m = divmod(m, 60)
79+
print 'Total script run time: ', '%d:%02d:%02d' % (h, m, s)

replaceKey.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
import json
2+
import requests
3+
import secrets
4+
import time
5+
import csv
6+
from datetime import datetime
7+
8+
baseURL = secrets.baseURL
9+
email = secrets.email
10+
password = secrets.password
11+
filePath = secrets.filePath
12+
13+
oldKey = raw_input('Enter old key: ')
14+
newKey = raw_input('Enter new key: ')
15+
16+
startTime = time.time()
17+
data = json.dumps({'email':email,'password':password})
18+
header = {'content-type':'application/json','accept':'application/json'}
19+
session = requests.post(baseURL+'/rest/login', headers=header, data=data).content
20+
headerAuth = {'content-type':'application/json','accept':'application/json', 'rest-dspace-token':session}
21+
print 'authenticated'
22+
23+
itemList = []
24+
endpoint = baseURL+'/rest/communities'
25+
communities = requests.get(endpoint, headers=headerAuth).json()
26+
for i in range (0, len (communities)):
27+
communityID = communities[i]['id']
28+
collections = requests.get(baseURL+'/rest/communities/'+str(communityID)+'/collections', headers=headerAuth).json()
29+
for j in range (0, len (collections)):
30+
collectionID = collections[j]['id']
31+
if collectionID != 24:
32+
offset = 0
33+
items = ''
34+
while items != []:
35+
items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=1000&offset='+str(offset), headers=headerAuth)
36+
while items.status_code != 200:
37+
time.sleep(5)
38+
items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=1000&offset='+str(offset), headers=headerAuth)
39+
items = items.json()
40+
for k in range (0, len (items)):
41+
itemID = items[k]['id']
42+
itemList.append(itemID)
43+
offset = offset + 1000
44+
elapsedTime = time.time() - startTime
45+
m, s = divmod(elapsedTime, 60)
46+
h, m = divmod(m, 60)
47+
print 'Item list creation time: ','%d:%02d:%02d' % (h, m, s)
48+
49+
recordsEdited = 0
50+
elementsEdited = 0
51+
f=csv.writer(open(filePath+'replaceKey'+datetime.now().strftime('%Y-%m-%d %H.%M.%S')+'.csv', 'wb'))
52+
f.writerow(['itemID']+['replacedKey']+['replacedValue']+['delete']+['post'])
53+
for number, itemID in enumerate(itemList):
54+
replacedElement = ''
55+
itemMetadataProcessed = []
56+
itemsRemaining = len(itemList) - number
57+
print 'Items remaining: ', itemsRemaining, 'ItemID: ', itemID
58+
metadata = requests.get(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=headerAuth).json()
59+
for l in range (0, len (metadata)):
60+
if metadata[l]['key'] == oldKey:
61+
replacedElement = metadata[l]
62+
updatedMetadataElement = {}
63+
updatedMetadataElement['key'] = newKey
64+
updatedMetadataElement['value'] = unicode(replacedElement['value'])
65+
updatedMetadataElement['language'] = unicode(replacedElement['language'])
66+
print updatedMetadataElement
67+
itemMetadataProcessed.append(updatedMetadataElement)
68+
provNote = '\''+oldKey+'\' was replaced by \''+newKey+'\' through a batch process on '+datetime.now().strftime('%Y-%m-%d %H:%M:%S')+'.'
69+
provNoteElement = {}
70+
provNoteElement['key'] = 'dc.description.provenance'
71+
provNoteElement['value'] = unicode(provNote)
72+
provNoteElement['language'] = 'en_US'
73+
itemMetadataProcessed.append(provNoteElement)
74+
elementsEdited = elementsEdited + 1
75+
else:
76+
if metadata[l] not in itemMetadataProcessed:
77+
itemMetadataProcessed.append(metadata[l])
78+
if replacedElement != '':
79+
recordsEdited = recordsEdited + 1
80+
itemMetadataProcessed = json.dumps(itemMetadataProcessed)
81+
print 'updated', itemID, recordsEdited, elementsEdited
82+
delete = requests.delete(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=headerAuth)
83+
print delete
84+
post = requests.put(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=headerAuth, data=itemMetadataProcessed)
85+
print post
86+
f.writerow([itemID]+[replacedElement['key']]+[replacedElement['value'].encode('utf-8')]+[delete]+[post])
87+
else:
88+
print 'not updated', itemID
89+
90+
logout = requests.post(baseURL+'/rest/logout', headers=headerAuth)
91+
92+
elapsedTime = time.time() - startTime
93+
m, s = divmod(elapsedTime, 60)
94+
h, m = divmod(m, 60)
95+
print 'Total script run time: ', '%d:%02d:%02d' % (h, m, s)

0 commit comments

Comments
 (0)