MITLibraries
diff --git a/‎.gitignore‎
Lines changed: 3 additions & 0 deletions b/‎.gitignore‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 48 additions & 1 deletion b/‎README.md‎
Lines changed: 48 additions & 1 deletion
diff --git a/‎addBibNumbersAndPost.py‎
Lines changed: 45 additions & 0 deletions b/‎addBibNumbersAndPost.py‎
Lines changed: 45 additions & 0 deletions
diff --git a/‎dateCheck.py‎
Lines changed: 53 additions & 0 deletions b/‎dateCheck.py‎
Lines changed: 53 additions & 0 deletions
diff --git a/‎eadToCsv.py‎
Lines changed: 98 additions & 0 deletions b/‎eadToCsv.py‎
Lines changed: 98 additions & 0 deletions
diff --git a/‎getAccessionUDFs.py‎
Lines changed: 71 additions & 0 deletions b/‎getAccessionUDFs.py‎
Lines changed: 71 additions & 0 deletions
diff --git a/‎getAccessions.py‎
Lines changed: 34 additions & 0 deletions b/‎getAccessions.py‎
Lines changed: 34 additions & 0 deletions
@@ -49,3 +49,6 @@ secrets.py
 *.pyc
 data/*
 !data/.keep
+.profile
+*.csv
+*.json
@@ -10,6 +10,27 @@ All of these scripts require a secrets.py file in the same directory that must c
 
 ## Scripts
 
+#### [addBibNumbersAndPost.py](/addBibNumbersAndPost.py)
+Based on a specified CSV file with URIs and bib numbers, this script posts the specified bib number to the ['user_defined]['real_1'] field for record specified by the URI.
+
+#### [dateCheck.py](/dateCheck.py)
+Retrieves 'begin,' 'end,' 'expression,' and 'date_type' for all dates associated with all resources in a repository
+
+#### [eadToCsv.py](/eadToCsv.py)
+Based on a specified file name and a specified file path, this script extracts selected elements from an EAD XML file and prints them to a CSV file.
+
+#### [getAccessionUDFs.py](/getAccessionUDFs.py)
+This GET script retrieves all of the user-defined fields from all of the accessions in the specified repository.
+
+#### [getAccessions.py](/getAccessions.py)
+This GET script retrieves all of the accessions from a particular repository into a JSON file.
+
+#### [getAllArchivalObjectTitles.py](/getAllArchivalObjectTitles.py)
+Retrieves titles from all archival objects in a repository. Upon running the script, you will be prompted enter the resource ID (just the number, not the full URI).
+
+#### [getArchivalObjectCountByResource.py](/getArchivalObjectCountByResource.py)
+Retrieves a count of archival objects associated with a particular resource. Upon running the script, you will be prompted enter the resource ID (just the number, not the full URI).
+
 #### [getArchivalObjectsByResource.py](/getArchivalObjectsByResource.py)
 A GET script to extract all of the archival objects associated with a particular resource. Upon running the script, you will be prompted enter the resource ID (just the number, not the full URI).
 
@@ -18,17 +39,43 @@ This GET script retrieves specific properties, including proprerties that have a
 
 #### [getPropertiesFromAgentsPeopleCSV.py](/getPropertiesFromAgentsPeopleCSV.py)
 This GET script retrieves specific properties from the JSON of ArchivesSpace agent_people records into a CSV file which is specified in variable 'f' on line 17. In this example, the script retrieves the 'uri,' 'sort_name,' 'authority_id,' and 'names' properties from the JSON records by iterating through the JSON records with the function 'for i in range (...)' on line 19. The f.writerow(....) function on line 20 specifies which properties are retrieved from the JSON and the f.writerow(....) on line 18 specifies header row of the CSV file.  
+
 #### [getResources.py](/getResources.py)
 This GET scripts retrieves all of the resources from a particular repository into a JSON file which is specified in variable 'f' on line 16. This GET script can be adapted to other record types by editing the 'endpoint' variable on line 13 (e.g. 'repositories/[repo ID]/accessions' or 'agents/corporate_entities').
 
 #### [getSingleRecord.py](/getSingleRecord.py)
 This GET script retrieves a single ArchivesSpace record based on the record's 'uri,' which is specified in the 'endpoint' variable on line 13.
 
+#### [getTopContainerCountByResource.py](/getTopContainerCountByResource.py)
+Retrieves a count of top containers associated with archival objects associated with a particular resource. Upon running the script, you will be prompted enter the resource ID (just the number, not the full URI).
+
+#### [getTopContainerCountByResourceNoAOs.py](/getTopContainerCountByResourceNoAOs.py)
+Retrieves a count of top containers directly associated (not through an archival object) with a particular resource. Upon running the script, you will be prompted enter the resource ID (just the number, not the full URI).
+
+#### [getTopContainers.py](/getTopContainers.py)
+This GET script retrieves all of the top containers from a particular repository into a JSON file.
+
+#### [getUrisAndIds.py](getUrisAndIds.py)
+For the specified record type, this script retrieves URI and the 'id_0,' 'id_1,' 'id_2,' 'id_3,' and a concatenated version of all the 'id' fields.
+
 #### [postContainersFromCSV.py](/postContainersFromCSV.py)
 This script works to create instances (consisting of top_containers) from a separate CSV file. The CSV file should have two columns, indicator and barcode. The directory where this file is stored must match the directory in the filePath variable. The script will prompt you first for the exact name of the CSV file, and then for the exact resource or accession to attach the containers to.
 
 #### [postNew.py](/postNew.py)
 This POST script will post new records to a generic API endpoint based the record type, 'agents/people' in this example. This script can be modified to accommodate other data types (e.g. 'repositories/[repo ID]/resources' or 'agents/corporate_entities'). It requires a properly formatted JSON file (specified where [JSON File] appears in the 'records' variable on line 13) for the particular ArchivesSpace record type you are trying to post.  
 
 #### [postOverwrite.py](/postOverwrite.py)
-This POST script will overwrite existing ArchivesSpace records based the 'uri' and can be used with any ArchivesSpace record type (e.g. resource, accession, subject, agent_people, agent_corporate_entity, archival_object, etc.). It requires a properly formatted JSON file (specified where [JSON File] appears in the 'records' variable on line 13) for the particular ArchivesSpace record type you are trying to post. 
+This POST script will overwrite existing ArchivesSpace records based the 'uri' and can be used with any ArchivesSpace record type (e.g. resource, accession, subject, agent_people, agent_corporate_entity, archival_object, etc.). It requires a properly formatted JSON file (specified where [JSON File] appears in the 'records' variable on line 13) for the particular ArchivesSpace record type you are trying to post.
+
+#### [resourcesWithNoBibNum.py](/resourcesWithNoBibNum.py
+Prints the URIs to a CSV file of all resources in a repository without a bib number stored in the ['user_defined']['real_1'] field.
+
+#### [searchForUnassociatedContainers.py](/searchForUnassociatedContainers.py)
+Prints the URIs to a CSV file of all top containers that are not associated with a resource or archival object.
+
+#### [unpublishArchivalObjectsByResource.py](/unpublishArchivalObjectsByResource.py)
+This script unpublishes all archival objects associated with the specified resource. Upon running the script, you will be prompted enter the resource ID (just the number, not the full URI).
+
+#### [updateFindingAidData.py](/updateFindingAidData.py)
+
+#### [updateResourceWithCSV.py](/updateResourceWithCSV.py)
@@ -0,0 +1,45 @@
+import json
+import requests
+import secrets
+import time
+import csv
+
+startTime = time.time()
+
+baseURL = secrets.baseURL
+user = secrets.user
+password = secrets.password
+
+auth = requests.post(baseURL + '/users/'+user+'/login?password='+password).json()
+session = auth["session"]
+headers = {'X-ArchivesSpace-Session':session, 'Content_Type':'application/json'}
+
+urisBibs = csv.DictReader(open(''))
+
+f=csv.writer(open('bibNumberPush.csv', 'wb'))
+f.writerow(['uri']+['existingValue']+['bibNum'])
+
+for row in urisBibs:
+    uri = row['asURI']
+    bibNum = row['bibNum']
+    print uri
+    record = requests.get(baseURL + uri, headers=headers).json()
+    try:
+        print record['user_defined']
+        record['user_defined']['real_1'] = bibNum
+        existingValue = 'Y'
+    except:
+        value = {}
+        value['real_1'] = row['bibNum']
+        record['user_defined'] = value
+        print value
+        existingValue = 'N'
+    record = json.dumps(record)
+    post = requests.post(baseURL + uri, headers=headers, data=record)#.json()
+    print post
+    f.writerow([uri]+[existingValue]+[bibNum]+[post])
+
+elapsedTime = time.time() - startTime
+m, s = divmod(elapsedTime, 60)
+h, m = divmod(m, 60)
+print 'Total script run time: ', '%d:%02d:%02d' % (h, m, s)
@@ -0,0 +1,53 @@
+import json
+import requests
+import secrets
+import time
+import csv
+
+startTime = time.time()
+
+baseURL = secrets.baseURL
+user = secrets.user
+password = secrets.password
+
+auth = requests.post(baseURL + '/users/'+user+'/login?password='+password).json()
+session = auth["session"]
+headers = {'X-ArchivesSpace-Session':session, 'Content_Type':'application/json'}
+
+endpoint = '/repositories/3/resources?all_ids=true'
+
+ids = requests.get(baseURL + endpoint, headers=headers).json()
+
+records = []
+f=csv.writer(open('duplicateBeginEndDates.csv', 'wb'))
+f2=csv.writer(open('asDates.csv', 'wb'))
+f.writerow(['uri']+['begin']+['end']+['expression']+['type'])
+f2.writerow(['uri']+['begin']+['end']+['expression']+['type'])
+counter = 0
+for id in ids:
+    endpoint = '/repositories/3/resources/'+str(id)
+    output = requests.get(baseURL + endpoint, headers=headers).json()
+    for date in output['dates']:
+        counter = counter + 1
+        print counter
+        try:
+            begin = date['begin']
+        except:
+            begin = ''
+        try:
+            end = date['end']
+        except:
+            end = ''
+        try:
+            expression = date['expression']
+        except:
+            expression = ''
+        if begin == end and begin != '' and begin != '':
+            f.writerow([output['uri']]+[begin]+[end]+[expression]+[date['date_type']])
+        else:
+            f2.writerow([output['uri']]+[begin]+[end]+[expression]+[date['date_type']])
+
+elapsedTime = time.time() - startTime
+m, s = divmod(elapsedTime, 60)
+h, m = divmod(m, 60)
+print 'Total script run time: ', '%d:%02d:%02d' % (h, m, s)
@@ -0,0 +1,98 @@
+import csv
+from bs4 import BeautifulSoup
+
+
+
+def extractValuesFromComponentLevel (componentLevel):
+        level = componentLevel.name
+        componentLevelLabel = componentLevel['level']
+        unittitle = componentLevel.find('did').find('unittitle').text.replace('\n','').encode('utf-8')
+        try:
+            unitdate = componentLevel.find('did').find('unitdate').text.encode('utf-8')
+        except:
+            unitdate = ''
+        try:
+            scopecontentElement = componentLevel.find('scopecontent').find_all('p')
+            scopecontent = ''
+            for paragraph in scopecontentElement:
+                paragraphText = paragraph.text.replace('\n','').replace('              ',' ').replace('            ',' ').encode('utf-8')
+                scopecontent = scopecontent + paragraphText
+        except:
+            scopecontent = ''
+        try:
+            container1 = componentLevel.find('did').find_all('container')[0].text.encode('utf-8')
+        except:
+            container1 = ''
+        try:
+            containerId1 = componentLevel.find('did').find_all('container')[0]['id']
+        except:
+            containerId1 = ''
+        try:
+            containerType1 = componentLevel.find('did').find_all('container')[0]['type']
+        except:
+            containerType1 = ''
+        try:
+            container2 = componentLevel.find('did').find_all('container')[1].text.encode('utf-8')
+        except:
+            container2 = ''
+        try:
+            containerId2 = componentLevel.find('did').find_all('container')[1]['id']
+        except:
+            containerId2 = ''
+        try:
+            containerType2 = componentLevel.find('did').find_all('container')[1]['type']
+        except:
+            containerType2 = ''
+        global sortOrder
+        sortOrder += 1
+        f.writerow([sortOrder]+[level]+[componentLevelLabel]+[unittitle]+[unitdate]+[scopecontent]+[containerType1]+[container1]+[containerId1]+[containerType2]+[container2]+[containerId2])
+
+filepath =  raw_input('Enter file path: ')
+fileName = raw_input('Enter file name: ')
+xml = open(filepath+fileName)
+
+f=csv.writer(open(filepath+'eadFields.csv', 'wb'))
+f.writerow(['sortOrder']+['<co?>']+['<co?> level']+['<unittitle>']+['<unitdate>']+['<scopecontent>']+['containerType1']+['container1']+['containerId1']+['containerType2']+['container2']+['containerId2'])
+upperComponentLevels = BeautifulSoup(xml, 'lxml').find('dsc').find_all('c01')
+sortOrder = 0
+for upperComponentLevel in upperComponentLevels:
+    componentLevelLabel = upperComponentLevel['level']
+    unittitle = upperComponentLevel.find('did').find('unittitle').text.encode('utf-8')
+    try:
+        scopecontentElement = upperComponentLevel.find('scopecontent').find_all('p')
+        scopecontent = ''
+        for paragraph in scopecontentElement:
+            paragraphText = paragraph.text.replace('\\n','').replace('              ',' ').replace('            ',' ').encode('utf-8')
+            scopecontent = scopecontent + paragraphText
+    except:
+        scopecontent = ''
+    sortOrder += 1
+    f.writerow([sortOrder]+['c01']+[componentLevelLabel]+[unittitle]+['']+[scopecontent]+['']+['']+['']+['']+['']+[''])
+
+    componentLevelArray = upperComponentLevel.find_all('c02')
+    for componentLevel in componentLevelArray:
+        extractValuesFromComponentLevel(componentLevel)
+        componentLevelArray = componentLevel.find_all('c03')
+        for componentLevel in componentLevelArray:
+            extractValuesFromComponentLevel(componentLevel)
+            componentLevelArray = componentLevel.find_all('c04')
+            for componentLevel in componentLevelArray:
+                extractValuesFromComponentLevel(componentLevel)
+                componentLevelArray = componentLevel.find_all('c05')
+                for componentLevel in componentLevelArray:
+                    extractValuesFromComponentLevel(componentLevel)
+                    componentLevelArray = componentLevel.find_all('c06')
+                    for componentLevel in componentLevelArray:
+                        extractValuesFromComponentLevel(componentLevel)
+                        componentLevelArray = componentLevel.find_all('c07')
+                        for componentLevel in componentLevelArray:
+                            extractValuesFromComponentLevel(componentLevel)
+                            componentLevelArray = componentLevel.find_all('c08')
+                            for componentLevel in componentLevelArray:
+                                extractValuesFromComponentLevel(componentLevel)
+                                componentLevelArray = componentLevel.find_all('c09')
+                                for componentLevel in componentLevelArray:
+                                    extractValuesFromComponentLevel(componentLevel)
+                                    componentLevelArray = componentLevel.find_all('c10')
+                                    for componentLevel in componentLevelArray:
+                                        extractValuesFromComponentLevel(componentLevel)
@@ -0,0 +1,71 @@
+import json
+import requests
+import secrets
+import time
+import csv
+
+startTime = time.time()
+
+def findKey(d, key):
+    if key in d:
+        yield d[key]
+    for k in d:
+        if isinstance(d[k], list) and k == 'children':
+            for i in d[k]:
+                for j in findKey(i, key):
+                    yield j
+
+baseURL = secrets.baseURL
+user = secrets.user
+password = secrets.password
+
+auth = requests.post(baseURL + '/users/'+user+'/login?password='+password).json()
+session = auth["session"]
+headers = {'X-ArchivesSpace-Session':session, 'Content_Type':'application/json'}
+
+endpoint = '/repositories/3/accessions?all_ids=true'
+
+ids = requests.get(baseURL + endpoint, headers=headers).json()
+
+udfs = []
+for id in ids:
+    print id
+    endpoint = '/repositories/3/accessions/'+str(id)
+    output = requests.get(baseURL + endpoint, headers=headers).json()
+    try:
+        userDefined = output['user_defined']
+        for k, v in userDefined.items():
+            if k not in udfs:
+                udfs.append(k)
+    except:
+        userDefined = ''
+udfs.sort()
+udfsHeader = ['title', 'uri'] + udfs
+f=csv.writer(open('accessionsUdfs.csv', 'wb'))
+f.writerow(udfsHeader)
+
+for id in ids:
+    print id
+    endpoint = '/repositories/3/accessions/'+str(id)
+    output = requests.get(baseURL + endpoint, headers=headers).json()
+    title = output['title'].encode('utf-8')
+    uri = output['uri']
+    accessionUdfs = []
+    for udf in udfs:
+        try:
+            keyValue = udf+'|'+output['user_defined'][udf].encode('utf-8')
+        except:
+            keyValue = udf+'|'
+        accessionUdfs.append(keyValue)
+    accessionUdfs.sort()
+    accessionUdfsUpdated = []
+    for accessionUdf in accessionUdfs:
+        edited = accessionUdf[accessionUdf.index('|')+1:]
+        accessionUdfsUpdated.append(edited)
+    accessionUdfsRow = [title, uri] + accessionUdfsUpdated
+    f.writerow(accessionUdfsRow)
+
+elapsedTime = time.time() - startTime
+m, s = divmod(elapsedTime, 60)
+h, m = divmod(m, 60)
+print 'Total script run time: ', '%d:%02d:%02d' % (h, m, s)
@@ -0,0 +1,34 @@
+import json
+import requests
+import secrets
+import time
+
+startTime = time.time()
+
+baseURL = secrets.baseURL
+user = secrets.user
+password = secrets.password
+
+auth = requests.post(baseURL + '/users/'+user+'/login?password='+password).json()
+session = auth["session"]
+headers = {'X-ArchivesSpace-Session':session, 'Content_Type':'application/json'}
+print 'authenticated'
+
+endpoint = '/repositories/3/accessions?all_ids=true'
+
+ids = requests.get(baseURL + endpoint, headers=headers).json()
+
+records = []
+for id in ids:
+    endpoint = '/repositories/3/accessions/'+str(id)
+    output = requests.get(baseURL + endpoint, headers=headers).json()
+    records.append(output)
+
+f=open('accessions.json', 'w')
+json.dump(records, f)
+f.close()
+
+elapsedTime = time.time() - startTime
+m, s = divmod(elapsedTime, 60)
+h, m = divmod(m, 60)
+print 'Total script run time: ', '%d:%02d:%02d' % (h, m, s)