|
19 | 19 | ids = requests.get(baseURL + endpoint, headers=headers).json() |
20 | 20 |
|
21 | 21 | f=csv.writer(open('topContainerCountByResource.csv', 'wb')) |
22 | | -f.writerow(['title']+['uri']+['id_0']+['id_1']+['id_2']+['id_3']+['tcCount']) |
| 22 | +f.writerow(['title']+['uri']+['id_0']+['id_1']+['id_2']+['id_3']+['topContainerCount']) |
23 | 23 |
|
24 | 24 | f2=csv.writer(open('topContainersLinks.csv', 'wb')) |
25 | 25 | f2.writerow(['resourceUri']+['topContainerUri']) |
26 | 26 |
|
| 27 | +f3=csv.writer(open('uniqueTopContainers.csv', 'wb')) |
| 28 | +f3.writerow(['topContainer']+['indicator']+['barcode']) |
| 29 | + |
| 30 | + |
| 31 | +total = len(ids) |
27 | 32 | topContainerLinks = [] |
28 | 33 | uniqueTopContainers = [] |
29 | 34 | for id in ids: |
30 | | - print 'id', id |
| 35 | + |
| 36 | + resourceTopContainers = [] |
| 37 | + print 'id', id, total, 'records remaining' |
| 38 | + total = total - 1 |
31 | 39 | endpoint = '/repositories/3/resources/'+str(id) |
32 | 40 | output = requests.get(baseURL + endpoint, headers=headers).json() |
33 | | - topContainers =[] |
34 | 41 | title = output['title'].encode('utf-8') |
35 | 42 | uri = output['uri'] |
36 | 43 | id0 = output['id_0'] |
|
46 | 53 | id3 = output['id_3'] |
47 | 54 | except: |
48 | 55 | id3='' |
49 | | - searchEndpoint = '/repositories/3/top_containers/search' |
50 | | - output = requests.get(baseURL + searchEndpoint, headers=headers).json() |
| 56 | + # ###searchEndpoint = '/repositories/3/top_containers/search' |
| 57 | + # ###output = requests.get(baseURL + searchEndpoint, headers=headers).json() |
51 | 58 | page = 1 |
52 | 59 | payload = {'page': page, 'page_size': '3000', 'root_record': endpoint} |
53 | 60 | search = requests.get(baseURL+'/search', headers=headers, params=payload).json() |
54 | 61 | results = [] |
55 | 62 | resultsPage = search['results'] |
56 | 63 | for result in resultsPage: |
57 | 64 | results.append(result) |
58 | | - |
59 | 65 | while resultsPage != []: |
60 | 66 | page = page + 1 |
61 | 67 | payload = {'page': page, 'page_size': '3000', 'root_record': endpoint} |
62 | 68 | search = requests.get(baseURL+'/search', headers=headers, params=payload).json() |
63 | 69 | resultsPage = search['results'] |
64 | 70 | for result in resultsPage: |
65 | 71 | results.append(result) |
66 | | - resourceTopContainers = [] |
| 72 | + |
67 | 73 | for result in results: |
68 | 74 | try: |
69 | 75 | topContainers = result['top_container_uri_u_sstr'] |
|
72 | 78 | resourceTopContainers.append(topContainer) |
73 | 79 | if topContainer not in uniqueTopContainers: |
74 | 80 | uniqueTopContainers.append(topContainer) |
75 | | - topContainerLink = str(id) +'|'+topContainer |
| 81 | + topContainerLink = str(id)+'|'+topContainer |
76 | 82 | if topContainerLink not in topContainerLinks: |
77 | 83 | topContainerLinks.append(topContainerLink) |
78 | 84 | except: |
79 | | - topContainer = '' |
| 85 | + topContainers = [] |
80 | 86 | topContainerCount = len(resourceTopContainers) |
81 | 87 | print 'top containers', topContainerCount |
82 | 88 | f.writerow([title]+[uri]+[id0]+[id1]+[id2]+[id3]+[topContainerCount]) |
83 | 89 |
|
84 | 90 | for topContainerLink in topContainerLinks: |
85 | 91 | f2.writerow([topContainerLink[:topContainerLink.index('|')]]+[topContainerLink[topContainerLink.index('|')+1:]]) |
86 | 92 |
|
87 | | -f3=csv.writer(open('uniqueTopContainers.csv', 'wb')) |
88 | | -f3.writerow(['topContainer']) |
89 | 93 | for topContainer in uniqueTopContainers: |
90 | | - f3.writerow([topContainer]) |
| 94 | + search = requests.get(baseURL+topContainer, headers=headers).json() |
| 95 | + try: |
| 96 | + indicator = search['indicator'] |
| 97 | + except: |
| 98 | + indicator = '' |
| 99 | + |
| 100 | + try: |
| 101 | + barcode = search['barcode'] |
| 102 | + except: |
| 103 | + barcode = '' |
| 104 | + f3.writerow([topContainer]+[indicator]+[barcode]) |
91 | 105 |
|
92 | 106 | elapsedTime = time.time() - startTime |
93 | 107 | m, s = divmod(elapsedTime, 60) |
|
0 commit comments