Skip to content

Commit b4713a5

Browse files
author
ehanson8
committed
updates
1 parent 28b0900 commit b4713a5

22 files changed

+926
-293
lines changed

README.md

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# dspace-editing
22

3+
**Note**: These scripts were updated in 05/2018 for the new authentication method used by DSpace 6.x
4+
35
All of these scripts require a secrets.py file in the same directory that must contain the following text:
46
```
57
baseURL='https://dspace.myuni.edu'
@@ -13,27 +15,23 @@ The 'filePath' is directory into which output files will be written and 'handleP
1315

1416
If you are using both a development server and a production server, you can create a separate secrets.py file with a different name (e.g. secretsProd.py) and containing the production server information. When running each of these scripts, you will be prompted to enter the file name (e.g 'secretsProd' without '.py') of an alternate secrets file. If you skip the prompt or incorrectly type the file name, the scripts will default to the information in the secrets.py file. This ensures that you will only edit the production server if you really intend to.
1517

16-
The command 'requests.packages.urllib3.disable_warnings()' is used to disable the excessive warnings that will be produced if the 'verify' variable is set to False, which necessary if you are using an SSH tunnel to connect to the DSpace API.
17-
18-
19-
20-
**Note**: All of these scripts skip collection '24' for local reasons. To change this, edit the following portion of the script (typically between line 27-39)
18+
**Note**: All of these scripts skip collection '4dccec82-4cfb-4583-a728-2cb823b15ef0' for local reasons. To change this, edit the following portion of the script (typically between line 27-39)
2119

2220

23-
Skips collection 24:
21+
Skips collection 4dccec82-4cfb-4583-a728-2cb823b15ef0:
2422

2523
for j in range (0, len (collections)):
26-
collectionID = collections[j]['id']
27-
if collectionID != 24:
28-
offset = 0
24+
collectionID = collections[j]['uuid']
25+
if collectionID != '4dccec82-4cfb-4583-a728-2cb823b15ef0':
26+
offset = 0
2927

3028

3129
No collections skipped:
3230

3331
for j in range (0, len (collections)):
34-
collectionID = collections[j]['id']
32+
collectionID = collections[j]['uuid']
3533
if collectionID != 0:
36-
offset = 0
34+
offset = 0
3735

3836
#### [addKeyValuePairOnHandleCSV.py](addKeyValuePairOnHandleCSV.py)
3937
Based on user input, this script uses a specified CSV file of DSpace item handles and the value to be added to that item using the specified key. A CSV log is written with all of the changes made and a 'dc.description.provenance' note describing the change is added to the metadata of each item that is updated.

addKeyValuePairOnHandleCSV.py

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44
import time
55
import csv
66
from datetime import datetime
7+
import urllib3
8+
9+
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
710

811
secretsVersion = raw_input('To edit production server, enter the name of the secrets file: ')
912
if secretsVersion != '':
@@ -14,19 +17,22 @@
1417
print 'Editing Stage'
1518
else:
1619
print 'Editing Stage'
17-
20+
1821
baseURL = secrets.baseURL
1922
email = secrets.email
2023
password = secrets.password
2124
filePath = secrets.filePath
2225
verify = secrets.verify
2326

24-
requests.packages.urllib3.disable_warnings()
25-
26-
data = json.dumps({'email':email,'password':password})
27+
startTime = time.time()
28+
data = {'email':email,'password':password}
2729
header = {'content-type':'application/json','accept':'application/json'}
28-
session = requests.post(baseURL+'/rest/login', headers=header, verify=verify, data=data).content
29-
headerAuth = {'content-type':'application/json','accept':'application/json', 'rest-dspace-token':session}
30+
session = requests.post(baseURL+'/rest/login', headers=header, verify=verify, params=data).cookies['JSESSIONID']
31+
cookies = {'JSESSIONID': session}
32+
headerFileUpload = {'accept':'application/json'}
33+
cookiesFileUpload = cookies
34+
status = requests.get(baseURL+'/rest/status', headers=header, cookies=cookies, verify=verify).json()
35+
print 'authenticated'
3036

3137
filename = filePath+raw_input('Enter filename (including \'.csv\'): ')
3238
addedKey = raw_input('Enter key: ')
@@ -45,9 +51,9 @@
4551
addedMetadataElement['value'] = unicode(addedValue)
4652
addedMetadataElement['language'] = 'en_us'
4753
endpoint = baseURL+'/rest/handle/'+handle
48-
item = requests.get(endpoint, headers=headerAuth, verify=verify).json()
49-
itemID = item['id']
50-
itemMetadata = requests.get(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=headerAuth, verify=verify).json()
54+
item = requests.get(endpoint, headers=header, cookies=cookies, verify=verify).json()
55+
itemID = item['uuid']
56+
itemMetadata = requests.get(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=header, cookies=cookies, verify=verify).json()
5157
itemMetadata.append(addedMetadataElement)
5258
itemMetadataProcessed = itemMetadata
5359

@@ -59,8 +65,8 @@
5965
itemMetadataProcessed.append(provNoteElement)
6066

6167
itemMetadataProcessed = json.dumps(itemMetadataProcessed)
62-
delete = requests.delete(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=headerAuth, verify=verify)
68+
delete = requests.delete(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=header, cookies=cookies, verify=verify)
6369
print delete
64-
post = requests.put(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=headerAuth, verify=verify, data=itemMetadataProcessed)
70+
post = requests.put(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=header, cookies=cookies, verify=verify, data=itemMetadataProcessed)
6571
print post
6672
f.writerow([itemID]+[addedMetadataElement['key']]+[addedMetadataElement['value'].encode('utf-8')]+[delete]+[post])

addKeyValuePairToCollection.py

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44
import time
55
import csv
66
from datetime import datetime
7+
import urllib3
8+
9+
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
710

811
secretsVersion = raw_input('To edit production server, enter the name of the secrets file: ')
912
if secretsVersion != '':
@@ -14,44 +17,45 @@
1417
print 'Editing Stage'
1518
else:
1619
print 'Editing Stage'
17-
20+
1821
baseURL = secrets.baseURL
1922
email = secrets.email
2023
password = secrets.password
2124
filePath = secrets.filePath
2225
verify = secrets.verify
2326

24-
requests.packages.urllib3.disable_warnings()
25-
2627
collectionHandle = raw_input('Enter collection handle: ')
2728
addedKey = raw_input('Enter key: ')
2829
addedValue = raw_input('Enter value: ')
2930
addedLanguage = raw_input('Enter language: ')
3031
confirm = raw_input('Hit enter to proceed')
3132

3233
startTime = time.time()
33-
data = json.dumps({'email':email,'password':password})
34+
data = {'email':email,'password':password}
3435
header = {'content-type':'application/json','accept':'application/json'}
35-
session = requests.post(baseURL+'/rest/login', headers=header, verify=verify, data=data).content
36-
headerAuth = {'content-type':'application/json','accept':'application/json', 'rest-dspace-token':session}
36+
session = requests.post(baseURL+'/rest/login', headers=header, verify=verify, params=data).cookies['JSESSIONID']
37+
cookies = {'JSESSIONID': session}
38+
headerFileUpload = {'accept':'application/json'}
39+
cookiesFileUpload = cookies
40+
status = requests.get(baseURL+'/rest/status', headers=header, cookies=cookies, verify=verify).json()
3741
print 'authenticated'
3842

3943
itemList = []
4044
endpoint = baseURL+'/rest/handle/'+collectionHandle
41-
collection = requests.get(endpoint, headers=header, verify=verify).json()
42-
collectionID = collection['id']
45+
collection = requests.get(endpoint, headers=header, cookies=cookies, verify=verify).json()
46+
collectionID = collection['uuid']
4347
offset = 0
4448
items = ''
4549
while items != []:
46-
items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=1000&offset='+str(offset), headers=headerAuth, verify=verify)
50+
items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=200&offset='+str(offset), headers=header, cookies=cookies, verify=verify)
4751
while items.status_code != 200:
4852
time.sleep(5)
49-
items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=1000&offset='+str(offset), headers=headerAuth, verify=verify)
53+
items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=200&offset='+str(offset), headers=header, cookies=cookies, verify=verify)
5054
items = items.json()
5155
for k in range (0, len (items)):
52-
itemID = items[k]['id']
56+
itemID = items[k]['uuid']
5357
itemList.append(itemID)
54-
offset = offset + 1000
58+
offset = offset + 200
5559
elapsedTime = time.time() - startTime
5660
m, s = divmod(elapsedTime, 60)
5761
h, m = divmod(m, 60)
@@ -63,7 +67,7 @@
6367
for number, itemID in enumerate(itemList):
6468
itemsRemaining = len(itemList) - number
6569
print 'Items remaining: ', itemsRemaining, 'ItemID: ', itemID
66-
metadata = requests.get(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=headerAuth, verify=verify).json()
70+
metadata = requests.get(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=header, cookies=cookies, verify=verify).json()
6771
itemMetadataProcessed = metadata
6872
addedMetadataElement = {}
6973
addedMetadataElement['key'] = addedKey
@@ -79,13 +83,13 @@
7983
recordsEdited = recordsEdited + 1
8084
itemMetadataProcessed = json.dumps(itemMetadataProcessed)
8185
print 'updated', itemID, recordsEdited
82-
delete = requests.delete(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=headerAuth, verify=verify)
86+
delete = requests.delete(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=header, cookies=cookies, verify=verify)
8387
print delete
84-
post = requests.put(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=headerAuth, verify=verify, data=itemMetadataProcessed)
88+
post = requests.put(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=header, cookies=cookies, verify=verify, data=itemMetadataProcessed)
8589
print post
8690
f.writerow([itemID]+[addedKey]+[addedValue]+[delete]+[post])
8791

88-
logout = requests.post(baseURL+'/rest/logout', headers=headerAuth, verify=verify)
92+
logout = requests.post(baseURL+'/rest/logout', headers=header, cookies=cookies, verify=verify)
8993

9094
elapsedTime = time.time() - startTime
9195
m, s = divmod(elapsedTime, 60)

addKeyValuePairToCommunity.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
import json
2+
import requests
3+
import secrets
4+
import time
5+
import csv
6+
from datetime import datetime
7+
import urllib3
8+
9+
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
10+
11+
secretsVersion = raw_input('To edit production server, enter the name of the secrets file: ')
12+
if secretsVersion != '':
13+
try:
14+
secrets = __import__(secretsVersion)
15+
print 'Editing Production'
16+
except ImportError:
17+
print 'Editing Stage'
18+
else:
19+
print 'Editing Stage'
20+
21+
baseURL = secrets.baseURL
22+
email = secrets.email
23+
password = secrets.password
24+
filePath = secrets.filePath
25+
verify = secrets.verify
26+
27+
handle = raw_input('Enter community handle: ')
28+
addedKey = raw_input('Enter key: ')
29+
addedValue = raw_input('Enter value: ')
30+
addedLanguage = raw_input('Enter language: ')
31+
confirm = raw_input('Hit enter to proceed')
32+
33+
startTime = time.time()
34+
data = {'email':email,'password':password}
35+
header = {'content-type':'application/json','accept':'application/json'}
36+
session = requests.post(baseURL+'/rest/login', headers=header, verify=verify, params=data).cookies['JSESSIONID']
37+
cookies = {'JSESSIONID': session}
38+
headerFileUpload = {'accept':'application/json'}
39+
cookiesFileUpload = cookies
40+
status = requests.get(baseURL+'/rest/status', headers=header, cookies=cookies, verify=verify).json()
41+
print 'authenticated'
42+
43+
itemList = []
44+
endpoint = baseURL+'/rest/handle/'+handle
45+
community = requests.get(endpoint, headers=header, cookies=cookies, verify=verify).json()
46+
communityID = community['uuid']
47+
48+
collections = requests.get(baseURL+'/rest/communities/'+str(communityID)+'/collections', headers=header, cookies=cookies, verify=verify).json()
49+
for j in range (0, len (collections)):
50+
collectionID = collections[j]['uuid']
51+
if collectionID != '4dccec82-4cfb-4583-a728-2cb823b15ef0':
52+
offset = 0
53+
items = ''
54+
while items != []:
55+
items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=200&offset='+str(offset), headers=header, cookies=cookies, verify=verify)
56+
while items.status_code != 200:
57+
time.sleep(5)
58+
items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=200&offset='+str(offset), headers=header, cookies=cookies, verify=verify)
59+
items = items.json()
60+
for k in range (0, len (items)):
61+
itemID = items[k]['uuid']
62+
itemList.append(itemID)
63+
offset = offset + 200
64+
elapsedTime = time.time() - startTime
65+
m, s = divmod(elapsedTime, 60)
66+
h, m = divmod(m, 60)
67+
print 'Item list creation time: ','%d:%02d:%02d' % (h, m, s)
68+
69+
recordsEdited = 0
70+
f=csv.writer(open(filePath+'addKeyValuePair'+datetime.now().strftime('%Y-%m-%d %H.%M.%S')+'.csv', 'wb'))
71+
f.writerow(['itemID']+['addedKey']+['addedValue']+['delete']+['post'])
72+
for number, itemID in enumerate(itemList):
73+
itemsRemaining = len(itemList) - number
74+
print 'Items remaining: ', itemsRemaining, 'ItemID: ', itemID
75+
metadata = requests.get(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=header, cookies=cookies, verify=verify).json()
76+
itemMetadataProcessed = metadata
77+
addedMetadataElement = {}
78+
addedMetadataElement['key'] = addedKey
79+
addedMetadataElement['value'] = unicode(addedValue)
80+
addedMetadataElement['language'] = unicode(addedLanguage)
81+
itemMetadataProcessed.append(addedMetadataElement)
82+
provNote = '\''+addedKey+': '+addedValue+'\' was added through a batch process on '+datetime.now().strftime('%Y-%m-%d %H:%M:%S')+'.'
83+
provNoteElement = {}
84+
provNoteElement['key'] = 'dc.description.provenance'
85+
provNoteElement['value'] = unicode(provNote)
86+
provNoteElement['language'] = 'en_US'
87+
itemMetadataProcessed.append(provNoteElement)
88+
recordsEdited = recordsEdited + 1
89+
itemMetadataProcessed = json.dumps(itemMetadataProcessed)
90+
print 'updated', itemID, recordsEdited
91+
delete = requests.delete(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=header, cookies=cookies, verify=verify)
92+
print delete
93+
post = requests.put(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=header, cookies=cookies, verify=verify, data=itemMetadataProcessed)
94+
print post
95+
f.writerow([itemID]+[addedKey]+[addedValue]+[delete]+[post])
96+
97+
logout = requests.post(baseURL+'/rest/logout', headers=header, cookies=cookies, verify=verify)
98+
99+
elapsedTime = time.time() - startTime
100+
m, s = divmod(elapsedTime, 60)
101+
h, m = divmod(m, 60)
102+
print 'Total script run time: ', '%d:%02d:%02d' % (h, m, s)

0 commit comments

Comments
 (0)