|
6 | 6 | import os |
7 | 7 | import csv |
8 | 8 | import urllib3 |
| 9 | +import collections |
| 10 | +import argparse |
| 11 | + |
| 12 | +parser = argparse.ArgumentParser() |
| 13 | +parser.add_argument('-d', '--directory', help='the directory of files to be ingested. optional - if not provided, the script will ask for input') |
| 14 | +parser.add_argument('-e', '--fileExtension', help='the extension of files to be ingested. optional - if not provided, the script will ask for input') |
| 15 | +parser.add_argument('-h', '--handle', help='handle of the object to retreive. optional - if not provided, the script will ask for input') |
| 16 | +args = parser.parse_args() |
| 17 | + |
| 18 | +if args.uri: |
| 19 | + directory = args.directory |
| 20 | +else: |
| 21 | + directory = raw_input('Enter directory name: ') |
| 22 | +if args.uri: |
| 23 | + fileExtension = args.fileExtension |
| 24 | +else: |
| 25 | + fileExtension = '.'+raw_input('Enter file extension: ') |
| 26 | +if args.handle: |
| 27 | + handle = args.handle |
| 28 | +else: |
| 29 | + handle = raw_input('Enter handle: ') |
9 | 30 |
|
10 | 31 | urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) |
11 | 32 |
|
|
25 | 46 | filePath = secrets.filePath |
26 | 47 | verify = secrets.verify |
27 | 48 |
|
28 | | -directory = raw_input('Enter directory name: ') |
29 | | -fileExtension = '.'+raw_input('Enter file extension: ') |
30 | | -collectionHandle = raw_input('Enter collection handle: ') |
31 | | - |
32 | 49 | startTime = time.time() |
33 | 50 |
|
34 | 51 | #create file list and export csv |
|
37 | 54 | for file in files: |
38 | 55 | if file.endswith(fileExtension): |
39 | 56 | fileList[file[:file.index('.')]] = os.path.join(root, file).replace('\\','/') |
40 | | - print file |
41 | 57 | elapsedTime = time.time() - startTime |
42 | 58 | m, s = divmod(elapsedTime, 60) |
43 | 59 | h, m = divmod(m, 60) |
44 | 60 | print 'File list creation time: ','%d:%02d:%02d' % (h, m, s) |
45 | 61 |
|
46 | | -f=csv.writer(open(collectionHandle.replace('/','-')+'addedFilesList.csv', 'wb')) |
| 62 | +f=csv.writer(open(handle.replace('/','-')+'addedFilesList.csv', 'wb')) |
47 | 63 | f.writerow(['itemID']) |
48 | 64 |
|
49 | 65 | for k,v in fileList.items(): |
|
62 | 78 | print 'authenticated' |
63 | 79 |
|
64 | 80 | #Get collection ID |
65 | | -endpoint = baseURL+'/rest/handle/'+collectionHandle |
| 81 | +endpoint = baseURL+'/rest/handle/'+handle |
66 | 82 | collection = requests.get(endpoint, headers=header, cookies=cookies, verify=verify).json() |
67 | 83 | collectionID = str(collection['uuid']) |
| 84 | +print collectionID |
68 | 85 |
|
69 | 86 | # Post items |
70 | | -collectionMetadata = json.load(open(directory+'/'+'metadata.json')) |
| 87 | +collectionMetadata = json.load(open(directory+'/'+'metadataNewFiles.json')) |
71 | 88 | for itemMetadata in collectionMetadata: |
72 | 89 | counter = counter - 1 |
73 | 90 | print 'Items remaining: ', counter |
| 91 | + fileExists = '' |
74 | 92 | updatedItemMetadata = {} |
75 | 93 | updatedItemMetadataList = [] |
76 | 94 | for element in itemMetadata['metadata']: |
|
80 | 98 | updatedItemMetadataList.append(element) |
81 | 99 | updatedItemMetadata['metadata'] = updatedItemMetadataList |
82 | 100 | updatedItemMetadata = json.dumps(updatedItemMetadata) |
83 | | - post = requests.post(baseURL+'/rest/collections/'+collectionID+'/items', headers=header, cookies=cookies, verify=verify, data=updatedItemMetadata).json() |
84 | | - itemID = post['link'] |
85 | | - |
86 | | - #Post bitstream |
87 | | - bitstream = fileList[fileIdentifier] |
88 | | - fileName = bitstream[bitstream.rfind('/')+1:] |
89 | | - data = open(bitstream, 'rb') |
90 | | - files = {'file': open(bitstream, 'rb')} |
91 | | - post = requests.post(baseURL+itemID+'/bitstreams?name='+fileName, headers=headerFileUpload, verify=verify, data=data).json() |
| 101 | + for k in fileList: |
| 102 | + if fileIdentifier in k: |
| 103 | + fileExists = True |
| 104 | + if fileExists == True: |
| 105 | + print fileIdentifier |
| 106 | + post = requests.post(baseURL+'/rest/collections/'+collectionID+'/items', headers=header, cookies=cookies, verify=verify, data=updatedItemMetadata).json() |
| 107 | + print json.dumps(post) |
| 108 | + itemID = post['link'] |
| 109 | + |
| 110 | + # #Post bitstream - front and back |
| 111 | + # for k,v in fileList.items(): |
| 112 | + # if k == fileIdentifier + '-Front': |
| 113 | + # bitstream = fileList[k] |
| 114 | + # fileName = bitstream[bitstream.rfind('/')+1:] |
| 115 | + # data = open(bitstream, 'rb') |
| 116 | + # post = requests.post(baseURL+itemID+'/bitstreams?name='+fileName, headers=headerFileUpload, cookies=cookies, verify=verify, data=data).json() |
| 117 | + # print post |
| 118 | + # |
| 119 | + # for k,v in fileList.items(): |
| 120 | + # if k == fileIdentifier + '-Back': |
| 121 | + # bitstream = fileList[k] |
| 122 | + # fileName = bitstream[bitstream.rfind('/')+1:] |
| 123 | + # data = open(bitstream, 'rb') |
| 124 | + # post = requests.post(baseURL+itemID+'/bitstreams?name='+fileName, headers=headerFileUpload, cookies=cookies, verify=verify, data=data).json() |
| 125 | + # print post |
| 126 | + |
| 127 | + #Post bitstream - starts with file identifier |
| 128 | + orderedFileList = collections.OrderedDict(sorted(fileList.items())) |
| 129 | + for k,v in orderedFileList.items(): |
| 130 | + if k.startswith(fileIdentifier): |
| 131 | + bitstream = orderedFileList[k] |
| 132 | + fileName = bitstream[bitstream.rfind('/')+1:] |
| 133 | + print fileName |
| 134 | + data = open(bitstream, 'rb') |
| 135 | + post = requests.post(baseURL+itemID+'/bitstreams?name='+fileName, headers=headerFileUpload, cookies=cookies, verify=verify, data=data).json() |
| 136 | + print post |
92 | 137 |
|
93 | 138 | #Create provenance notes |
94 | 139 | provNote = {} |
|
110 | 155 | provNote2 = {} |
111 | 156 | provNote2['key'] = 'dc.description.provenance' |
112 | 157 | provNote2['language'] = 'en_US' |
113 | | - |
114 | 158 | provNote2Value = 'Made available in DSpace on '+utcTime+' (GMT). No. of bitstreams: '+str(bitstreamCount) |
115 | 159 | for bitstream in bitstreams: |
116 | 160 | fileName = bitstream['name'] |
|
0 commit comments