Skip to content

Commit 78a27e0

Browse files
committed
refactor: ♻️ moved metakg parsing to API class
1 parent 757f654 commit 78a27e0

File tree

2 files changed

+168
-14
lines changed

2 files changed

+168
-14
lines changed

src/utils/metakg/api.py

Lines changed: 165 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,35 @@
1+
import json
2+
import logging
3+
from copy import copy
4+
from typing import Union
5+
16
from model.smartapi import SmartAPIDoc
7+
from utils import decoder
8+
from utils.downloader import download
29

310
from .component import Components
411
from .endpoint import Endpoint
512

13+
logger = logging.getLogger("metakg_parser")
14+
615

716
class API:
8-
_smartapi_doc = {}
17+
_smartapi_doc = None
918

10-
def __init__(self, smartapi_doc):
19+
def __init__(self, smartapi_doc: Union[dict, None] = None, url: Union[str, None] = None, id: Union[str, None] = None):
1120
self._smartapi_doc = smartapi_doc
21+
self.url = url
22+
self._id = id
23+
if self.url and not self._id:
24+
# generate hashed _id from url
25+
self._id = decoder.get_id(self.url)
1226

1327
@property
1428
def smartapi_doc(self):
29+
if self._smartapi_doc is None and self.url:
30+
content = download(self.url)
31+
if content.status == 200:
32+
self._smartapi_doc = decoder.to_dict(content.raw)
1533
return self._smartapi_doc
1634

1735
@property
@@ -20,7 +38,18 @@ def metadata(self):
2038
metadata["operations"] = self.fetch_all_opts()
2139
return metadata
2240

23-
def fetch_API_title(self):
41+
@property
42+
def is_trapi(self):
43+
"""return True if a TRAPI"""
44+
return self.has_tags("trapi", "translator")
45+
46+
def has_tags(self, *tags):
47+
"""return True if an SmartAPI contains all given tags"""
48+
_tag_set = set(self.tags)
49+
return len(set(tags) - _tag_set) == 0
50+
51+
@property
52+
def title(self):
2453
if "info" not in self.smartapi_doc:
2554
return None
2655
return self.smartapi_doc["info"]["title"]
@@ -39,35 +68,41 @@ def fetch_XTranslator_team(self):
3968
return []
4069
return self.smartapi_doc["info"]["x-translator"]["team"]
4170

42-
def fetch_API_tags(self):
71+
@property
72+
def tags(self):
4373
if "tags" not in self.smartapi_doc:
44-
return None
74+
return []
4575
return [x["name"] for x in self.smartapi_doc["tags"]]
4676

47-
def fetch_server_url(self):
77+
@property
78+
def server_url(self):
4879
if "servers" not in self.smartapi_doc:
4980
return None
50-
return SmartAPIDoc.get_default_server_url(self.smartapi_doc["servers"])
81+
_server_url = SmartAPIDoc.get_default_server_url(self.smartapi_doc["servers"])
82+
if _server_url.endswith("/"):
83+
_server_url = _server_url[:-1]
84+
return _server_url
5185

52-
def fetch_components(self):
86+
@property
87+
def components(self):
5388
if "components" not in self.smartapi_doc:
5489
return None
5590
return Components(self.smartapi_doc["components"])
5691

5792
def fetch_API_meta(self):
5893
return {
59-
"title": self.fetch_API_title(),
60-
"tags": self.fetch_API_tags(),
61-
"url": self.fetch_server_url(),
94+
"title": self.title,
95+
"tags": self.tags,
96+
"url": self.server_url,
6297
"x-translator": {
6398
"component": self.fetch_XTranslator_component(),
6499
"team": self.fetch_XTranslator_team(),
65100
},
66101
"smartapi": {
67-
"id": self.smartapi_doc.get("_id"),
102+
"id": self.smartapi_doc.get("_id") or self._id,
68103
"meta": self.smartapi_doc.get("_meta"),
69104
},
70-
"components": self.fetch_components(),
105+
"components": self.components,
71106
"paths": list(self.smartapi_doc["paths"].keys())
72107
if isinstance(self.smartapi_doc.get("paths"), dict)
73108
else [],
@@ -82,3 +117,120 @@ def fetch_all_opts(self):
82117
ep = Endpoint(self.smartapi_doc["paths"][path], api_meta, path)
83118
ops = [*ops, *ep.construct_endpoint_info()]
84119
return ops
120+
121+
# methods for extracting metakg edges
122+
def extract_metakgedges(self, ops, extra_data=None):
123+
extra_data = extra_data or {}
124+
125+
metakg_edges = []
126+
for op in ops:
127+
smartapi_data = op["association"]["smartapi"]
128+
url = (smartapi_data.get("meta") or {}).get("url") or extra_data.get("url")
129+
_id = smartapi_data.get("id") or extra_data.get("id")
130+
131+
edge = {
132+
"subject": op["association"]["input_type"],
133+
"object": op["association"]["output_type"],
134+
"subject_prefix": op["association"]["input_id"],
135+
"object_prefix": op["association"]["output_id"],
136+
"predicate": op["association"]["predicate"],
137+
"api": {
138+
"name": op["association"]["api_name"],
139+
"smartapi": {
140+
"metadata": url,
141+
"id": _id,
142+
"ui": f"https://smart-api.info/ui/{_id}",
143+
},
144+
"tags": op["tags"],
145+
"x-translator": op["association"]["x-translator"],
146+
"provided_by": op["association"].get("source"),
147+
# "date_created": (smartapi_data.get("meta") or {}).get("date_created"),
148+
# "date_updated": (smartapi_data.get("meta") or {}).get("date_updated"),
149+
# "username": (smartapi_data.get("meta") or {}).get("username"),
150+
},
151+
}
152+
# include bte-specific edge metadata
153+
bte = {}
154+
for attr in ["query_operation", "response_mapping"]:
155+
if attr in op:
156+
bte[attr] = op[attr]
157+
# remove redundant query_operation.tags field
158+
if attr == "query_operation" and "tags" in bte[attr]:
159+
bte[attr] = copy(bte[attr])
160+
del bte[attr]["tags"]
161+
if bte:
162+
edge["api"]["bte"] = bte
163+
metakg_edges.append(edge)
164+
return metakg_edges
165+
166+
def get_xbte_metaedges(self, extra_data=None):
167+
mkg = self.extract_metakgedges(self.metadata["operations"], extra_data=extra_data)
168+
no_nodes = len({x["subject"] for x in mkg} | {x["object"] for x in mkg})
169+
no_edges = len({x["predicate"] for x in mkg})
170+
logger.info("Done [%s nodes, %s edges]", no_nodes, no_edges)
171+
return mkg
172+
173+
def get_trapi_metaedges(self, extra_data=None):
174+
ops = []
175+
self.metakg_errors = {}
176+
ops.extend(self.get_ops_from_trapi())
177+
if self.metakg_errors:
178+
cnt_metakg_errors = sum([len(x) for x in self.metakg_errors.values()])
179+
logger.error(f"Found {cnt_metakg_errors} TRAPI metakg errors:\n {json.dumps(self.metakg_errors, indent=2)}")
180+
181+
return self.extract_metakgedges(ops, extra_data=extra_data)
182+
183+
def get_metakg(self, include_trapi=True):
184+
self.metakg_errors = None # reset metakg_errors
185+
if self.is_trapi:
186+
metakg = self.get_trapi_metaedges() if include_trapi else []
187+
else:
188+
metakg = self.get_xbte_metaedges()
189+
return metakg
190+
191+
def get_ops_from_trapi(self):
192+
if self.metadata.get("url"):
193+
trapi_metakg_url = self.metadata["url"] + "/meta_knowledge_graph"
194+
content = download(trapi_metakg_url, timeout=60) # 60s timeout, as trapi endpoint can be slow
195+
if content.status == 200:
196+
data = decoder.to_dict(content.raw)
197+
return self.parse_trapi_metakg_endpoint(data)
198+
return []
199+
200+
def parse_trapi_metakg_endpoint(self, response):
201+
ops = []
202+
for pred in response.get("edges", []):
203+
ops.append(
204+
{
205+
"association": {
206+
"input_type": self.remove_bio_link_prefix(pred["subject"]),
207+
"input_id": response.get("nodes", {}).get(pred["subject"], {}).get("id_prefixes"),
208+
"output_type": self.remove_bio_link_prefix(pred["object"]),
209+
"output_id": response.get("nodes", {}).get(pred["object"], {}).get("id_prefixes"),
210+
"predicate": self.remove_bio_link_prefix(pred["predicate"]),
211+
"api_name": self.metadata.get("title"),
212+
"smartapi": self.metadata.get("smartapi"),
213+
"x-translator": self.metadata.get("x-translator"),
214+
},
215+
"tags": [*self.metadata.get("tags"), "bte-trapi"],
216+
"query_operation": {
217+
"path": "/query",
218+
"method": "post",
219+
"server": self.metadata.get("url"),
220+
"path_params": None,
221+
"params": None,
222+
"request_body": None,
223+
"support_batch": True,
224+
"input_separator": ",",
225+
"tags": [*self.metadata.get("tags"), "bte-trapi"],
226+
},
227+
}
228+
)
229+
return ops
230+
231+
def remove_bio_link_prefix(self, _input):
232+
if not isinstance(_input, str):
233+
return
234+
if _input.startswith("biolink:"):
235+
return _input[8:]
236+
return _input

src/utils/metakg/parser.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def get_TRAPI_metadatas(self, data, extra_data=None):
2727
count_metadata_list = len(metadata_list)
2828
self.metakg_errors = {}
2929
for i, metadata in enumerate(metadata_list):
30-
ops.extend(self.get_ops_from_metakg_endpoint(metadata, f"[{i+1}/{count_metadata_list}]"))
30+
ops.extend(self.get_ops_from_metakg_endpoint(metadata, f"[{i + 1}/{count_metadata_list}]"))
3131
if self.metakg_errors:
3232
cnt_metakg_errors = sum([len(x) for x in self.metakg_errors.values()])
3333
logger.error(f"Found {cnt_metakg_errors} TRAPI metakg errors:\n {json.dumps(self.metakg_errors, indent=2)}")
@@ -151,6 +151,8 @@ def extract_metakgedges(self, ops, extra_data=None):
151151
edge = {
152152
"subject": op["association"]["input_type"],
153153
"object": op["association"]["output_type"],
154+
"subject_prefix": op["association"]["input_id"],
155+
"object_prefix": op["association"]["output_id"],
154156
"predicate": op["association"]["predicate"],
155157
"api": {
156158
"name": op["association"]["api_name"],

0 commit comments

Comments
 (0)