Skip to content

Commit 272cb3c

Browse files
author
ehanson8
committed
updates
1 parent d8d36b2 commit 272cb3c

File tree

2 files changed

+170
-0
lines changed

2 files changed

+170
-0
lines changed

extractMarcFields.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
import json
2+
import csv
3+
import time
4+
5+
def extractMarcField (tag):
6+
dataFields = record['record']['datafield']
7+
tagData = ''
8+
for dataField in dataFields:
9+
if dataField['tag'] == '910':
10+
bibnum = dataField['subfield']
11+
if isinstance(bibnum, basestring):
12+
bibnum = bibnum
13+
else:
14+
bibnum = bibnum[0]
15+
for dataField in dataFields:
16+
if dataField['tag'] == tag:
17+
value = dataField['subfield']
18+
indicator1 = dataField['ind1']
19+
indicator2 = dataField['ind2']
20+
if isinstance(value, basestring):
21+
tagData = value
22+
else:
23+
for subfield in value:
24+
tagData = tagData + subfield + ' '
25+
f.writerow([bibnum]+[tag]+[indicator1]+[indicator2]+[tagData.encode('utf-8')])
26+
27+
def extractMarcFieldStartsWith (digit):
28+
dataFields = record['record']['datafield']
29+
30+
for dataField in dataFields:
31+
if dataField['tag'] == '910':
32+
bibnum = dataField['subfield']
33+
if isinstance(bibnum, basestring):
34+
bibnum = bibnum
35+
else:
36+
bibnum = bibnum[0]
37+
38+
for dataField in dataFields:
39+
tagData = ''
40+
if dataField['tag'].startswith(digit):
41+
tagNumber = dataField['tag']
42+
value = dataField['subfield']
43+
indicator1 = dataField['ind1']
44+
indicator2 = dataField['ind2']
45+
if isinstance(value, basestring):
46+
tagData = value
47+
else:
48+
for subfield in value:
49+
if isinstance(subfield, basestring):
50+
tagData = tagData + subfield + '--'
51+
f.writerow([bibnum]+[tagNumber]+[indicator1]+[indicator2]+[tagData.encode('utf-8')])
52+
53+
startTime = time.time()
54+
file = 'C:/Users/ehanson8/Downloads/combined.json'
55+
56+
records = json.load(open(file))
57+
58+
f=csv.writer(open('marcFields.csv', 'wb'))
59+
f.writerow(['bibnum']+['tag']+['indicator1']+['indicator2']+['value'])
60+
61+
for record in records:
62+
extractMarcFieldStartsWith('1')
63+
extractMarcField('245')
64+
extractMarcField('520')
65+
extractMarcField('540')
66+
extractMarcField('545')
67+
extractMarcField('561')
68+
extractMarcFieldStartsWith('6')
69+
extractMarcFieldStartsWith('7')
70+
71+
elapsedTime = time.time() - startTime
72+
m, s = divmod(elapsedTime, 60)
73+
h, m = divmod(m, 60)
74+
print 'Total script run time: ', '%d:%02d:%02d' % (h, m, s)

getPropertiesFromResources.py

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
import json
2+
import requests
3+
import secrets
4+
import time
5+
import csv
6+
7+
startTime = time.time()
8+
9+
baseURL = secrets.baseURL
10+
user = secrets.user
11+
password = secrets.password
12+
13+
auth = requests.post(baseURL + '/users/'+user+'/login?password='+password).json()
14+
session = auth["session"]
15+
headers = {'X-ArchivesSpace-Session':session, 'Content_Type':'application/json'}
16+
17+
endpoint = '/repositories/3/resources?all_ids=true'
18+
19+
ids = requests.get(baseURL + endpoint, headers=headers).json()
20+
21+
f=csv.writer(open('resourceProperties.csv', 'wb'))
22+
f.writerow(['title']+['uri']+['bibnum']+['type']+['value'])
23+
24+
total = len(ids)
25+
for id in ids:
26+
print 'id', id, total, 'records remaining'
27+
total = total - 1
28+
endpoint = '/repositories/3/resources/'+str(id)
29+
output = requests.get(baseURL + endpoint, headers=headers).json()
30+
31+
title = output['title'].encode('utf-8')
32+
uri = output['uri']
33+
try:
34+
bibnum = output['user_defined']['real_1']
35+
except:
36+
bibnum = ''
37+
try:
38+
agents = output['linked_agents']
39+
for agent in agents:
40+
agentUri = agent['ref']
41+
agentOutput = requests.get(baseURL + agentUri, headers=headers).json()
42+
agentName = agentOutput['title']
43+
f.writerow([title]+[uri]+[bibnum]+['name']+[agentName])
44+
except:
45+
pass
46+
try:
47+
subjects = output['subjects']
48+
for subject in subjects:
49+
subjectUri = subject['ref']
50+
subjectOutput = requests.get(baseURL + subjectUri, headers=headers).json()
51+
subjectName = subjectOutput['title']
52+
f.writerow([title]+[uri]+[bibnum]+['subject']+[subjectName])
53+
except:
54+
pass
55+
for note in output['notes']:
56+
abstract = ''
57+
scopecontent = ''
58+
acqinfo = ''
59+
custodhist = ''
60+
try:
61+
if note['type'] == 'abstract':
62+
abstract = note['content'][0].encode('utf-8')
63+
64+
f.writerow([title]+[uri]+[bibnum]+['abstract']+[abstract])
65+
if note['type'] == 'scopecontent':
66+
scopecontentSubnotes = note['subnotes']
67+
for subnote in scopecontentSubnotes:
68+
scopecontent = scopecontent + subnote['content'].encode('utf-8') + ' '
69+
f.writerow([title]+[uri]+[bibnum]+['scopecontent']+[scopecontent])
70+
if note['type'] == 'acqinfo':
71+
acqinfoSubnotes = note['subnotes']
72+
for subnote in acqinfoSubnotes:
73+
acqinfo = acqinfo + subnote['content'].encode('utf-8') + ' '
74+
f.writerow([title]+[uri]+[bibnum]+['acqinfo']+[acqinfo])
75+
if note['type'] == 'custodhist':
76+
custodhistSubnotes = note['subnotes']
77+
for subnote in custodhistSubnotes:
78+
custodhist = custodhist + subnote['content'].encode('utf-8') + ' '
79+
f.writerow([title]+[uri]+[bibnum]+['custodhist']+[custodhist])
80+
if note['type'] == 'bioghist':
81+
custodhistSubnotes = note['subnotes']
82+
for subnote in custodhistSubnotes:
83+
custodhist = custodhist + subnote['content'].encode('utf-8') + ' '
84+
f.writerow([title]+[uri]+[bibnum]+['bioghist']+[custodhist])
85+
if note['type'] == 'accessrestrict':
86+
custodhistSubnotes = note['subnotes']
87+
for subnote in custodhistSubnotes:
88+
custodhist = custodhist + subnote['content'].encode('utf-8') + ' '
89+
f.writerow([title]+[uri]+[bibnum]+['accessrestrict']+[custodhist])
90+
except:
91+
f.writerow([title]+[uri]+[bibnum]+['']+[custodhist])
92+
93+
elapsedTime = time.time() - startTime
94+
m, s = divmod(elapsedTime, 60)
95+
h, m = divmod(m, 60)
96+
print 'Total script run time: ', '%d:%02d:%02d' % (h, m, s)

0 commit comments

Comments
 (0)