1+ import json
2+ import logging
3+ from copy import copy
4+ from typing import Union
5+
16from model .smartapi import SmartAPIDoc
7+ from utils import decoder
8+ from utils .downloader import download
29
310from .component import Components
411from .endpoint import Endpoint
512
13+ logger = logging .getLogger ("metakg_parser" )
14+
615
716class API :
8- _smartapi_doc = {}
17+ _smartapi_doc = None
918
10- def __init__ (self , smartapi_doc ):
19+ def __init__ (self , smartapi_doc : Union [ dict , None ] = None , url : Union [ str , None ] = None , id : Union [ str , None ] = None ):
1120 self ._smartapi_doc = smartapi_doc
21+ self .url = url
22+ self ._id = id
23+ if self .url and not self ._id :
24+ # generate hashed _id from url
25+ self ._id = decoder .get_id (self .url )
1226
1327 @property
1428 def smartapi_doc (self ):
29+ if self ._smartapi_doc is None and self .url :
30+ content = download (self .url )
31+ if content .status == 200 :
32+ self ._smartapi_doc = decoder .to_dict (content .raw )
1533 return self ._smartapi_doc
1634
1735 @property
@@ -20,7 +38,18 @@ def metadata(self):
2038 metadata ["operations" ] = self .fetch_all_opts ()
2139 return metadata
2240
23- def fetch_API_title (self ):
41+ @property
42+ def is_trapi (self ):
43+ """return True if a TRAPI"""
44+ return self .has_tags ("trapi" , "translator" )
45+
46+ def has_tags (self , * tags ):
47+ """return True if an SmartAPI contains all given tags"""
48+ _tag_set = set (self .tags )
49+ return len (set (tags ) - _tag_set ) == 0
50+
51+ @property
52+ def title (self ):
2453 if "info" not in self .smartapi_doc :
2554 return None
2655 return self .smartapi_doc ["info" ]["title" ]
@@ -39,35 +68,41 @@ def fetch_XTranslator_team(self):
3968 return []
4069 return self .smartapi_doc ["info" ]["x-translator" ]["team" ]
4170
42- def fetch_API_tags (self ):
71+ @property
72+ def tags (self ):
4373 if "tags" not in self .smartapi_doc :
44- return None
74+ return []
4575 return [x ["name" ] for x in self .smartapi_doc ["tags" ]]
4676
47- def fetch_server_url (self ):
77+ @property
78+ def server_url (self ):
4879 if "servers" not in self .smartapi_doc :
4980 return None
50- return SmartAPIDoc .get_default_server_url (self .smartapi_doc ["servers" ])
81+ _server_url = SmartAPIDoc .get_default_server_url (self .smartapi_doc ["servers" ])
82+ if _server_url .endswith ("/" ):
83+ _server_url = _server_url [:- 1 ]
84+ return _server_url
5185
52- def fetch_components (self ):
86+ @property
87+ def components (self ):
5388 if "components" not in self .smartapi_doc :
5489 return None
5590 return Components (self .smartapi_doc ["components" ])
5691
5792 def fetch_API_meta (self ):
5893 return {
59- "title" : self .fetch_API_title () ,
60- "tags" : self .fetch_API_tags () ,
61- "url" : self .fetch_server_url () ,
94+ "title" : self .title ,
95+ "tags" : self .tags ,
96+ "url" : self .server_url ,
6297 "x-translator" : {
6398 "component" : self .fetch_XTranslator_component (),
6499 "team" : self .fetch_XTranslator_team (),
65100 },
66101 "smartapi" : {
67- "id" : self .smartapi_doc .get ("_id" ),
102+ "id" : self .smartapi_doc .get ("_id" ) or self . _id ,
68103 "meta" : self .smartapi_doc .get ("_meta" ),
69104 },
70- "components" : self .fetch_components () ,
105+ "components" : self .components ,
71106 "paths" : list (self .smartapi_doc ["paths" ].keys ())
72107 if isinstance (self .smartapi_doc .get ("paths" ), dict )
73108 else [],
@@ -82,3 +117,120 @@ def fetch_all_opts(self):
82117 ep = Endpoint (self .smartapi_doc ["paths" ][path ], api_meta , path )
83118 ops = [* ops , * ep .construct_endpoint_info ()]
84119 return ops
120+
121+ # methods for extracting metakg edges
122+ def extract_metakgedges (self , ops , extra_data = None ):
123+ extra_data = extra_data or {}
124+
125+ metakg_edges = []
126+ for op in ops :
127+ smartapi_data = op ["association" ]["smartapi" ]
128+ url = (smartapi_data .get ("meta" ) or {}).get ("url" ) or extra_data .get ("url" )
129+ _id = smartapi_data .get ("id" ) or extra_data .get ("id" )
130+
131+ edge = {
132+ "subject" : op ["association" ]["input_type" ],
133+ "object" : op ["association" ]["output_type" ],
134+ "subject_prefix" : op ["association" ]["input_id" ],
135+ "object_prefix" : op ["association" ]["output_id" ],
136+ "predicate" : op ["association" ]["predicate" ],
137+ "api" : {
138+ "name" : op ["association" ]["api_name" ],
139+ "smartapi" : {
140+ "metadata" : url ,
141+ "id" : _id ,
142+ "ui" : f"https://smart-api.info/ui/{ _id } " ,
143+ },
144+ "tags" : op ["tags" ],
145+ "x-translator" : op ["association" ]["x-translator" ],
146+ "provided_by" : op ["association" ].get ("source" ),
147+ # "date_created": (smartapi_data.get("meta") or {}).get("date_created"),
148+ # "date_updated": (smartapi_data.get("meta") or {}).get("date_updated"),
149+ # "username": (smartapi_data.get("meta") or {}).get("username"),
150+ },
151+ }
152+ # include bte-specific edge metadata
153+ bte = {}
154+ for attr in ["query_operation" , "response_mapping" ]:
155+ if attr in op :
156+ bte [attr ] = op [attr ]
157+ # remove redundant query_operation.tags field
158+ if attr == "query_operation" and "tags" in bte [attr ]:
159+ bte [attr ] = copy (bte [attr ])
160+ del bte [attr ]["tags" ]
161+ if bte :
162+ edge ["api" ]["bte" ] = bte
163+ metakg_edges .append (edge )
164+ return metakg_edges
165+
166+ def get_xbte_metaedges (self , extra_data = None ):
167+ mkg = self .extract_metakgedges (self .metadata ["operations" ], extra_data = extra_data )
168+ no_nodes = len ({x ["subject" ] for x in mkg } | {x ["object" ] for x in mkg })
169+ no_edges = len ({x ["predicate" ] for x in mkg })
170+ logger .info ("Done [%s nodes, %s edges]" , no_nodes , no_edges )
171+ return mkg
172+
173+ def get_trapi_metaedges (self , extra_data = None ):
174+ ops = []
175+ self .metakg_errors = {}
176+ ops .extend (self .get_ops_from_trapi ())
177+ if self .metakg_errors :
178+ cnt_metakg_errors = sum ([len (x ) for x in self .metakg_errors .values ()])
179+ logger .error (f"Found { cnt_metakg_errors } TRAPI metakg errors:\n { json .dumps (self .metakg_errors , indent = 2 )} " )
180+
181+ return self .extract_metakgedges (ops , extra_data = extra_data )
182+
183+ def get_metakg (self , include_trapi = True ):
184+ self .metakg_errors = None # reset metakg_errors
185+ if self .is_trapi :
186+ metakg = self .get_trapi_metaedges () if include_trapi else []
187+ else :
188+ metakg = self .get_xbte_metaedges ()
189+ return metakg
190+
191+ def get_ops_from_trapi (self ):
192+ if self .metadata .get ("url" ):
193+ trapi_metakg_url = self .metadata ["url" ] + "/meta_knowledge_graph"
194+ content = download (trapi_metakg_url , timeout = 60 ) # 60s timeout, as trapi endpoint can be slow
195+ if content .status == 200 :
196+ data = decoder .to_dict (content .raw )
197+ return self .parse_trapi_metakg_endpoint (data )
198+ return []
199+
200+ def parse_trapi_metakg_endpoint (self , response ):
201+ ops = []
202+ for pred in response .get ("edges" , []):
203+ ops .append (
204+ {
205+ "association" : {
206+ "input_type" : self .remove_bio_link_prefix (pred ["subject" ]),
207+ "input_id" : response .get ("nodes" , {}).get (pred ["subject" ], {}).get ("id_prefixes" ),
208+ "output_type" : self .remove_bio_link_prefix (pred ["object" ]),
209+ "output_id" : response .get ("nodes" , {}).get (pred ["object" ], {}).get ("id_prefixes" ),
210+ "predicate" : self .remove_bio_link_prefix (pred ["predicate" ]),
211+ "api_name" : self .metadata .get ("title" ),
212+ "smartapi" : self .metadata .get ("smartapi" ),
213+ "x-translator" : self .metadata .get ("x-translator" ),
214+ },
215+ "tags" : [* self .metadata .get ("tags" ), "bte-trapi" ],
216+ "query_operation" : {
217+ "path" : "/query" ,
218+ "method" : "post" ,
219+ "server" : self .metadata .get ("url" ),
220+ "path_params" : None ,
221+ "params" : None ,
222+ "request_body" : None ,
223+ "support_batch" : True ,
224+ "input_separator" : "," ,
225+ "tags" : [* self .metadata .get ("tags" ), "bte-trapi" ],
226+ },
227+ }
228+ )
229+ return ops
230+
231+ def remove_bio_link_prefix (self , _input ):
232+ if not isinstance (_input , str ):
233+ return
234+ if _input .startswith ("biolink:" ):
235+ return _input [8 :]
236+ return _input
0 commit comments