Skip to content

Commit f779c14

Browse files
author
ehanson8
committed
updates
1 parent 5cde470 commit f779c14

14 files changed

+379
-88
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ Network Trash Folder
4646
Temporary Items
4747
.apdisk
4848
secrets.py
49+
secretsProd.py
4950
*.pyc
5051
data/*
5152
!data/.keep

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,16 @@ No collections skipped:
3131
#### [addKeyValuePairOnHandleCSV.py](addKeyValuePairOnHandleCSV.py)
3232
Based on user input, this script uses a specified CSV file of DSpace item handles and the value to be added to that item using the specified key. A CSV log is written with all of the changes made and a 'dc.description.provenance' note describing the change is added to the metadata of each item that is updated.
3333

34+
#### [addKeyValuePairToCollection.py](addKeyValuePairToCollection.py)
35+
3436
#### [createItemMetadataFromCSV.py](createItemMetadataFromCSV.py)
3537
Based on user input, this script created a JSON file of metadata that can be added to a DSpace item from the specified CSV file. The 'createMetadataElement' function in the script is used to create the desired metadata elements based on three variables:
3638
'key' - The Dublin Core property to be used for the element.
3739
'value' - The column in the CSV that contains the data for the element.
3840
'language' - The desired language value for the element
3941

42+
#### [deleteKeyFromCollection.py](deleteKeyFromCollection.py)
43+
4044
#### [postCollection.py](postCollection.py)
4145
Based on user input, this script creates a community with a specified name and collection with a specified name within that community. In the specified directory (within the filePath set by the secrets.py file), the script creates items and associated metadata based on a 'collectionMetadata.json' file in the directory. Based on the specified file extension, the script then posts each file in the directory with that extension as a bitstream for the appropriate item, which is determined by having the file name (minus the file extension) in a 'dc.identifier.other' field in the item metadata record.
4246

addKeyValuePairOnHandleCSV.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,25 @@
55
import csv
66
from datetime import datetime
77

8+
secretsVersion = raw_input('To edit production server, enter the name of the secrets file: ')
9+
if secretsVersion != '':
10+
try:
11+
secrets = __import__(secretsVersion)
12+
print 'Editing Production'
13+
except ImportError:
14+
print 'Editing Stage'
15+
816
baseURL = secrets.baseURL
917
email = secrets.email
1018
password = secrets.password
1119
filePath = secrets.filePath
20+
verify = secrets.verify
21+
22+
requests.packages.urllib3.disable_warnings()
1223

1324
data = json.dumps({'email':email,'password':password})
1425
header = {'content-type':'application/json','accept':'application/json'}
15-
session = requests.post(baseURL+'/rest/login', headers=header, data=data).content
26+
session = requests.post(baseURL+'/rest/login', headers=header, verify=verify, data=data).content
1627
headerAuth = {'content-type':'application/json','accept':'application/json', 'rest-dspace-token':session}
1728

1829
filename = filePath+raw_input('Enter filename (including \'.csv\'): ')
@@ -32,9 +43,9 @@
3243
addedMetadataElement['value'] = unicode(addedValue)
3344
addedMetadataElement['language'] = 'en_us'
3445
endpoint = baseURL+'/rest/handle/'+handle
35-
item = requests.get(endpoint, headers=header).json()
46+
item = requests.get(endpoint, headers=headerAuth, verify=verify).json()
3647
itemID = item['id']
37-
itemMetadata = requests.get(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=headerAuth).json()
48+
itemMetadata = requests.get(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=headerAuth, verify=verify).json()
3849
itemMetadata.append(addedMetadataElement)
3950
itemMetadataProcessed = itemMetadata
4051

@@ -46,8 +57,8 @@
4657
itemMetadataProcessed.append(provNoteElement)
4758

4859
itemMetadataProcessed = json.dumps(itemMetadataProcessed)
49-
delete = requests.delete(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=headerAuth)
60+
delete = requests.delete(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=headerAuth, verify=verify)
5061
print delete
51-
post = requests.put(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=headerAuth, data=itemMetadataProcessed)
62+
post = requests.put(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=headerAuth, verify=verify, data=itemMetadataProcessed)
5263
print post
5364
f.writerow([itemID]+[addedMetadataElement['key']]+[addedMetadataElement['value'].encode('utf-8')]+[delete]+[post])

addKeyValuePairToCollection.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
import json
2+
import requests
3+
import secrets
4+
import time
5+
import csv
6+
from datetime import datetime
7+
8+
secretsVersion = raw_input('To edit production server, enter the name of the secrets file: ')
9+
if secretsVersion != '':
10+
try:
11+
secrets = __import__(secretsVersion)
12+
print 'Editing Production'
13+
except ImportError:
14+
print 'Editing Stage'
15+
16+
baseURL = secrets.baseURL
17+
email = secrets.email
18+
password = secrets.password
19+
filePath = secrets.filePath
20+
verify = secrets.verify
21+
22+
requests.packages.urllib3.disable_warnings()
23+
24+
collectionHandle = raw_input('Enter collection handle: ')
25+
addedKey = raw_input('Enter key: ')
26+
addedValue = raw_input('Enter value: ')
27+
addedLanguage = raw_input('Enter language: ')
28+
confirm = raw_input('Hit enter to proceed')
29+
30+
startTime = time.time()
31+
data = json.dumps({'email':email,'password':password})
32+
header = {'content-type':'application/json','accept':'application/json'}
33+
session = requests.post(baseURL+'/rest/login', headers=header, verify=verify, data=data).content
34+
headerAuth = {'content-type':'application/json','accept':'application/json', 'rest-dspace-token':session}
35+
print 'authenticated'
36+
37+
itemList = []
38+
endpoint = baseURL+'/rest/handle/'+collectionHandle
39+
collection = requests.get(endpoint, headers=header, verify=verify).json()
40+
collectionID = collection['id']
41+
offset = 0
42+
items = ''
43+
while items != []:
44+
items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=1000&offset='+str(offset), headers=headerAuth, verify=verify)
45+
while items.status_code != 200:
46+
time.sleep(5)
47+
items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=1000&offset='+str(offset), headers=headerAuth, verify=verify)
48+
items = items.json()
49+
for k in range (0, len (items)):
50+
itemID = items[k]['id']
51+
itemList.append(itemID)
52+
offset = offset + 1000
53+
elapsedTime = time.time() - startTime
54+
m, s = divmod(elapsedTime, 60)
55+
h, m = divmod(m, 60)
56+
print 'Item list creation time: ','%d:%02d:%02d' % (h, m, s)
57+
58+
recordsEdited = 0
59+
f=csv.writer(open(filePath+'addKeyValuePair'+datetime.now().strftime('%Y-%m-%d %H.%M.%S')+'.csv', 'wb'))
60+
f.writerow(['itemID']+['addedKey']+['addedValue']+['delete']+['post'])
61+
for number, itemID in enumerate(itemList):
62+
itemsRemaining = len(itemList) - number
63+
print 'Items remaining: ', itemsRemaining, 'ItemID: ', itemID
64+
metadata = requests.get(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=headerAuth, verify=verify).json()
65+
itemMetadataProcessed = metadata
66+
addedMetadataElement = {}
67+
addedMetadataElement['key'] = addedKey
68+
addedMetadataElement['value'] = unicode(addedValue)
69+
addedMetadataElement['language'] = unicode(addedLanguage)
70+
itemMetadataProcessed.append(addedMetadataElement)
71+
provNote = '\''+addedKey+': '+addedValue+'\' was added through a batch process on '+datetime.now().strftime('%Y-%m-%d %H:%M:%S')+'.'
72+
provNoteElement = {}
73+
provNoteElement['key'] = 'dc.description.provenance'
74+
provNoteElement['value'] = unicode(provNote)
75+
provNoteElement['language'] = 'en_US'
76+
itemMetadataProcessed.append(provNoteElement)
77+
recordsEdited = recordsEdited + 1
78+
itemMetadataProcessed = json.dumps(itemMetadataProcessed)
79+
print 'updated', itemID, recordsEdited
80+
delete = requests.delete(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=headerAuth, verify=verify)
81+
print delete
82+
post = requests.put(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=headerAuth, verify=verify, data=itemMetadataProcessed)
83+
print post
84+
f.writerow([itemID]+[addedKey]+[addedValue]+[delete]+[post])
85+
86+
logout = requests.post(baseURL+'/rest/logout', headers=headerAuth, verify=verify)
87+
88+
elapsedTime = time.time() - startTime
89+
m, s = divmod(elapsedTime, 60)
90+
h, m = divmod(m, 60)
91+
print 'Total script run time: ', '%d:%02d:%02d' % (h, m, s)

createItemMetadataFromCSV.py

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
import json
33
import csv
44

5-
def createMetadataElement (key, value, language):
6-
value = row[value]
5+
def createMetadataElementCSV (key, valueSource, language):
6+
value = row[valueSource]
77
if value != '':
88
if language != '':
99
metadataElement = {'key': key, 'language': language, 'value': value}
@@ -14,6 +14,14 @@ def createMetadataElement (key, value, language):
1414
else:
1515
pass
1616

17+
def createMetadataElementDirect (key, value, language):
18+
if language != '':
19+
metadataElement = {'key': key, 'language': language, 'value': value}
20+
metadata.append(metadataElement)
21+
else:
22+
metadataElement = {'key': key, 'value': value}
23+
metadata.append(metadataElement)
24+
1725
filename = raw_input('Enter filename (including \'.csv\'): ')
1826

1927
with open(filename) as csvfile:
@@ -22,18 +30,18 @@ def createMetadataElement (key, value, language):
2230
metadataGroup = []
2331
for row in reader:
2432
metadata = []
25-
createMetadataElement('dc.creator', 'creator', '')
26-
createMetadataElement('dc.date', 'date', '')
27-
createMetadataElement('dc.subject', 'decade', 'en_US')
28-
createMetadataElement('dc.description.abstract', 'description', 'en_US')
29-
createMetadataElement('dc.description', 'notes', 'en_US')
30-
createMetadataElement('dc.subject', 'photographType', 'en_US')
31-
createMetadataElement('dc.relation.ispartof', 'location', 'en_US')
32-
createMetadataElement('dc.identifier', 'identifier', '')
33-
createMetadataElement('dc.format.extent', 'size', '')
34-
createMetadataElement('dc.format.medium', 'medium', 'en_US')
35-
createMetadataElement('dc.title', 'title', 'en_US')
36-
createMetadataElement('dc.subject', 'subjectType', 'en_US')
33+
createMetadataElementCSV('dc.creator', 'creator', '')
34+
createMetadataElementCSV('dc.date', 'date', '')
35+
createMetadataElementCSV('dc.subject', 'decade', 'en_US')
36+
createMetadataElementCSV('dc.description.abstract', 'description', 'en_US')
37+
createMetadataElementCSV('dc.description', 'notes', 'en_US')
38+
createMetadataElementCSV('dc.subject', 'photographType', 'en_US')
39+
createMetadataElementCSV('dc.relation.ispartof', 'location', 'en_US')
40+
createMetadataElementCSV('dc.identifier', 'identifier', '')
41+
createMetadataElementCSV('dc.format.extent', 'size', '')
42+
createMetadataElementCSV('dc.format.medium', 'medium', 'en_US')
43+
createMetadataElementCSV('dc.title', 'title', 'en_US')
44+
createMetadataElementCSV('dc.subject', 'subjectType', 'en_US')
3745

3846
item = {'metadata': metadata}
3947
metadataGroup.append(item)

deleteKeyFromCollection.py

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
import json
2+
import requests
3+
import secrets
4+
import time
5+
import csv
6+
from datetime import datetime
7+
8+
secretsVersion = raw_input('To edit production server, enter the name of the secrets file: ')
9+
if secretsVersion != '':
10+
try:
11+
secrets = __import__(secretsVersion)
12+
print 'Editing Production'
13+
except ImportError:
14+
print 'Editing Stage'
15+
16+
baseURL = secrets.baseURL
17+
email = secrets.email
18+
password = secrets.password
19+
filePath = secrets.filePath
20+
verify = secrets.verify
21+
22+
requests.packages.urllib3.disable_warnings()
23+
24+
collectionHandle = raw_input('Enter collection handle: ')
25+
deletedKey = raw_input('Enter key to be deleted: ')
26+
27+
28+
startTime = time.time()
29+
data = json.dumps({'email':email,'password':password})
30+
header = {'content-type':'application/json','accept':'application/json'}
31+
session = requests.post(baseURL+'/rest/login', headers=header, verify=verify, data=data).content
32+
headerAuth = {'content-type':'application/json','accept':'application/json', 'rest-dspace-token':session}
33+
print 'authenticated'
34+
35+
itemList = []
36+
endpoint = baseURL+'/rest/handle/'+collectionHandle
37+
collection = requests.get(endpoint, headers=headerAuth, verify=verify).json()
38+
collectionID = collection['id']
39+
offset = 0
40+
items = ''
41+
while items != []:
42+
items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=1000&offset='+str(offset), headers=headerAuth, verify=verify)
43+
while items.status_code != 200:
44+
time.sleep(5)
45+
items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=1000&offset='+str(offset), headers=headerAuth, verify=verify)
46+
items = items.json()
47+
for k in range (0, len (items)):
48+
itemID = items[k]['id']
49+
itemList.append(itemID)
50+
offset = offset + 1000
51+
elapsedTime = time.time() - startTime
52+
m, s = divmod(elapsedTime, 60)
53+
h, m = divmod(m, 60)
54+
print 'Item list creation time: ','%d:%02d:%02d' % (h, m, s)
55+
56+
recordsEdited = 0
57+
f=csv.writer(open(filePath+'deletedKey'+datetime.now().strftime('%Y-%m-%d %H.%M.%S')+'.csv', 'wb'))
58+
f.writerow(['itemID']+['deletedKey']+['delete']+['post'])
59+
for number, itemID in enumerate(itemList):
60+
itemsRemaining = len(itemList) - number
61+
print 'Items remaining: ', itemsRemaining, 'ItemID: ', itemID
62+
metadata = requests.get(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=headerAuth, verify=verify).json()
63+
itemMetadataProcessed = []
64+
for l in range (0, len (metadata)):
65+
if metadata[l]['key'] == deletedKey:
66+
provNote = '\''+deletedKey+'\' was deleted through a batch process on '+datetime.now().strftime('%Y-%m-%d %H:%M:%S')+'.'
67+
provNoteElement = {}
68+
provNoteElement['key'] = 'dc.description.provenance'
69+
provNoteElement['value'] = unicode(provNote)
70+
provNoteElement['language'] = 'en_US'
71+
itemMetadataProcessed.append(provNoteElement)
72+
else:
73+
itemMetadataProcessed.append(metadata[l])
74+
75+
if itemMetadataProcessed != metadata:
76+
recordsEdited = recordsEdited + 1
77+
itemMetadataProcessed = json.dumps(itemMetadataProcessed)
78+
print 'updated', itemID, recordsEdited
79+
delete = requests.delete(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=headerAuth, verify=verify)
80+
print delete
81+
post = requests.put(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=headerAuth, verify=verify, data=itemMetadataProcessed)
82+
print post
83+
f.writerow([itemID]+[deletedKey]+[delete]+[post])
84+
85+
logout = requests.post(baseURL+'/rest/logout', headers=headerAuth, verify=verify)
86+
87+
elapsedTime = time.time() - startTime
88+
m, s = divmod(elapsedTime, 60)
89+
h, m = divmod(m, 60)
90+
print 'Total script run time: ', '%d:%02d:%02d' % (h, m, s)

0 commit comments

Comments
 (0)