|
| 1 | +import json |
| 2 | +import requests |
| 3 | +import secrets |
| 4 | +import csv |
| 5 | +import time |
| 6 | +from datetime import datetime |
| 7 | + |
| 8 | +baseURL = secrets.baseURL |
| 9 | +email = secrets.email |
| 10 | +password = secrets.password |
| 11 | +filePath = secrets.filePath |
| 12 | + |
| 13 | +handle = raw_input('Enter handle: ') |
| 14 | + |
| 15 | +data = json.dumps({'email':email,'password':password}) |
| 16 | +header = {'content-type':'application/json','accept':'application/json'} |
| 17 | +session = requests.post(baseURL+'/rest/login', headers=header, data=data).content |
| 18 | +headerAuth = {'content-type':'application/json','accept':'application/json', 'rest-dspace-token':session} |
| 19 | +print 'authenticated' |
| 20 | +startTime = time.time() |
| 21 | + |
| 22 | +endpoint = baseURL+'/rest/handle/'+handle |
| 23 | +community = requests.get(endpoint, headers=headerAuth).json() |
| 24 | +communityID = community['id'] |
| 25 | +collections = requests.get(baseURL+'/rest/communities/'+str(communityID)+'/collections', headers=headerAuth).json() |
| 26 | + |
| 27 | +itemList = [] |
| 28 | +for i in range (0, len (collections)): |
| 29 | + collectionID = collections[i]['id'] |
| 30 | + collectionHandle = collections[i]['handle'] |
| 31 | + items = requests.get(baseURL+'/rest/collections/'+str(collectionID)+'/items?limit=5000', headers=headerAuth).json() |
| 32 | + for j in range (0, len (items)): |
| 33 | + itemID = items[j]['id'] |
| 34 | + itemList.append(itemID) |
| 35 | + |
| 36 | +f=csv.writer(open(filePath+'removeUnnecessarySpaces'+datetime.now().strftime('%Y-%m-%d %H.%M.%S')+'.csv', 'wb')) |
| 37 | +f.writerow(['itemID']+['replacedKey']+['replacedValue']+['delete']+['post']) |
| 38 | +for itemID in itemList: |
| 39 | + itemMetadataProcessed = [] |
| 40 | + metadata = requests.get(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=headerAuth).json() |
| 41 | + for i in range (0, len (metadata)): |
| 42 | + if metadata[i]['key'] == 'dc.contributor.advisor' or metadata[i]['key'] == 'dc.contributor.committeeMember': |
| 43 | + if ' ' in json.dumps(metadata[i]) or ' ,' in json.dumps(metadata[i]): |
| 44 | + updatedMetadataElement = json.loads(json.dumps(metadata[i]).replace(' ',' ').replace(' ',' ').replace(' ,',',')) |
| 45 | + itemMetadataProcessed.append(updatedMetadataElement) |
| 46 | + f.writerow([itemID]+[metadata[i]['key']]+[metadata[i]['value']]) |
| 47 | + else: |
| 48 | + itemMetadataProcessed.append(metadata[i]) |
| 49 | + else: |
| 50 | + itemMetadataProcessed.append(metadata[i]) |
| 51 | + if json.dumps(itemMetadataProcessed) != json.dumps(metadata): |
| 52 | + itemMetadataProcessed = json.dumps(itemMetadataProcessed) |
| 53 | + print 'updated', itemID |
| 54 | + delete = requests.delete(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=headerAuth) |
| 55 | + print delete |
| 56 | + post = requests.put(baseURL+'/rest/items/'+str(itemID)+'/metadata', headers=headerAuth, data=itemMetadataProcessed) |
| 57 | + print post |
| 58 | + else: |
| 59 | + print 'not updated', itemID |
| 60 | + |
| 61 | +logout = requests.post(baseURL+'/rest/logout', headers=headerAuth) |
| 62 | + |
| 63 | +elapsedTime = time.time() - startTime |
| 64 | +m, s = divmod(elapsedTime, 60) |
| 65 | +h, m = divmod(m, 60) |
| 66 | +print 'Total script run time: ', '%d:%02d:%02d' % (h, m, s) |
0 commit comments