Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
156 changes: 67 additions & 89 deletions modelseedpy/core/msgrowthphenotypes.py
Original file line number Diff line number Diff line change
@@ -1,48 +1,46 @@
import pandas as pd
import pandas as pd #!!! import never used
import logging
import cobra
from cobra.core.dictlist import DictList
from modelseedpy.core.msmedia import MSMedia
from modelseedpy.fbapkg.mspackagemanager import MSPackageManager
from modelseedpy.core.msmodelutl import MSModelUtil
from modelseedpy.core.msgapfill import MSGapfill
from modelseedpy.core.fbahelper import FBAHelper

logger = logging.getLogger(__name__)

class MSGrowthPhenotype:

def __init__(self,id,media=None,growth=None,gene_ko=[],additional_compounds=[],parent=None,name=None):
self.id = id
self.name = name
if name == None:
self.name = self.id
def __init__(self, obj_id, media=None, growth=None, gene_ko=[], additional_compounds=[], parent=None, name=None):
self.id = obj_id
self.name = name or self.id
self.growth = growth
self.media = media
self.gene_ko = gene_ko
self.gapfilling = None
self.additional_compounds = additional_compounds
self.parent = parent

def build_media(self):
cpd_hash = {}
for cpd in self.additional_compounds:
cpd_hash[cpd] = 100
full_media = MSMedia.from_dict(cpd_hash)
if self.media != None:
full_media.merge(self.media,overwrite_overlap = False)
if self.parent != None and self.parent.base_media != None:
full_media.merge(parent.base_media,overwrite_overlap = False)
if self.media is not None:
full_media.merge(self.media, overwrite_overlap = False)
if self.parent is not None and self.parent.base_media is not None:
full_media.merge(self.parent.base_media, overwrite_overlap = False)
return full_media

def simulate(self,modelutl,growth_threshold=0.001,add_missing_exchanges=False,save_fluxes=False,pfba=False):
def simulate(self, modelutl, growth_threshold=0.001, add_missing_exchanges=False, save_fluxes=False, pfba=False):
if not isinstance(modelutl,MSModelUtil):
modelutl = MSModelUtil(modelutl)
media = self.build_media()
output = {"growth":None,"class":None,"missing_transports":[]}
output = {"growth":None, "class":None, "missing_transports":[]}
if add_missing_exchanges:
output["missing_transports"] = modelutl.add_missing_exchanges(media)
pkgmgr = MSPackageManager.get_pkg_mgr(modelutl.model)
pkgmgr.getpkg("KBaseMediaPkg").build_package(media,self.parent.base_uptake,self.parent.base_excretion)
pkgmgr.getpkg("KBaseMediaPkg").build_package(media, self.parent.base_uptake, self.parent.base_excretion)
for gene in self.gene_ko:
if gene in modelutl.model.genes:
geneobj = modelutl.model.genes.get_by_id(gene)
Expand All @@ -54,18 +52,16 @@ def simulate(self,modelutl,growth_threshold=0.001,add_missing_exchanges=False,sa
if save_fluxes:
output["fluxes"] = solution.fluxes
if output["growth"] >= growth_threshold:
output["class"] = "FP"
if self.growth > 0:
output["class"] = "CP"
else:
output["class"] = "FP"
else:
output["class"] = "CN"
if self.growth > 0:
output["class"] = "FN"
else:
output["class"] = "CN"
return output

def gapfill_model_for_phenotype(self,modelutl,default_gapfill_templates,test_conditions,default_gapfill_models=[],blacklist=[],growth_threshold=0.001,add_missing_exchanges=False):
def gapfill_model_for_phenotype(self, modelutl, default_gapfill_templates, test_conditions, default_gapfill_models=[], blacklist=[], growth_threshold=0.001, add_missing_exchanges=False):
if not isinstance(modelutl,MSModelUtil):
modelutl = MSModelUtil(modelutl)
self.gapfilling = MSGapfill(modelutl.model, default_gapfill_templates, default_gapfill_models, test_conditions, modelutl.reaction_scores(), blacklist)
Expand All @@ -82,87 +78,71 @@ def gapfill_model_for_phenotype(self,modelutl,default_gapfill_templates,test_con
return self.gapfilling.integrate_gapfill_solution(gfresults)

class MSGrowthPhenotypes:

def __init__(self,base_media=None,base_uptake=0,base_excretion=1000):
def __init__(self, base_media=None, base_uptake=0, base_excretion=1000):
self.base_media = base_media
self.phenotypes = DictList()
self.base_uptake = base_uptake
self.base_excretion = base_excretion


@staticmethod
def from_compound_hash(compounds,base_media,base_uptake=0,base_excretion=1000):
growthpheno = MSGrowthPhenotypes(base_media,base_uptake,base_excretion)
new_phenos = []
for cpd in compounds:
newpheno = MSGrowthPhenotype(cpd,None,compounds[cpd],[],[cpd])
new_phenos.append(newpheno)
growthpheno.add_phenotypes(new_phenos)
growthpheno.add_phenotypes([MSGrowthPhenotype(cpd, None, compounds[cpd] ,[] ,[cpd]) for cpd in compounds])
return growthpheno

@staticmethod
def from_kbase_object(data,kbase_api):
def from_kbase_object(data, kbase_api):
growthpheno = MSGrowthPhenotypes(None,0,1000)
new_phenos = []
for pheno in data["phenotypes"]:
media = kbase_api.get_from_ws(pheno["media_ref"],None)
geneko = []
for gene in pheno["geneko_refs"]:
geneko.append(added_cpd.split("/").pop())
added_compounds = []
for added_cpd in pheno["additionalcompound_refs"]:
added_compounds.append(added_cpd.split("/").pop())
newpheno = MSGrowthPhenotype(pheno["id"],media,pheno["normalizedGrowth"],geneko,added_compounds)
new_phenos.append(newpheno)
new_phenos.append(MSGrowthPhenotype(
pheno["id"], kbase_api.get_from_ws(pheno["media_ref"],None),
pheno["normalizedGrowth"],
[gene.split("/").pop() for gene in pheno["geneko_refs"]],
[added_cpd.split("/").pop() for added_cpd in pheno["additionalcompound_refs"]]))
growthpheno.add_phenotypes(new_phenos)
return growthpheno

@staticmethod
def from_kbase_file(filename,kbase_api):
def from_kbase_file(filename, kbase_api, base_media):
#TSV file with the following headers:media mediaws growth geneko addtlCpd
from pandas import read_table
table_df = read_table(filename)

growthpheno = MSGrowthPhenotypes(base_media,0,1000)
headings = []
new_phenos = []
with open(filename) as f:
lines = f.readlines()
for line in lines:
items = line.split("\t")
if headings == None:
headings = items
else:
data = {}
for i in range(0,len(items)):
data[headings[i]] = items[i]
data = FBAHelper.validate_dictionary(headings,["media","growth"],{"mediaws":None,"geneko":[],"addtlCpd":[]})
media = kbase_api.get_from_ws(data["media"],data["mediaws"])
id = data["media"]
if len(data["geneko"]) > 0:
id += "-"+",".join(data["geneko"])
if len(data["addtlCpd"]) > 0:
id += "-"+",".join(data["addtlCpd"])
newpheno = MSGrowthPhenotype(id,media,data["growth"],data["geneko"],data["addtlCpd"])
new_phenos.append(newpheno)
for index, row in table_df.iterrows():
data = FBAHelper.validate_dictionary(row.to_dict(), ["media","growth"], {"mediaws":None, "geneko":[], "addtlCpd":[]})
media = kbase_api.get_from_ws(data["media"], data["mediaws"])
media_id = data["media"]
if len(data["geneko"]) > 0:
media_id += "-"+",".join(data["geneko"])
if len(data["addtlCpd"]) > 0:
media_id += "-"+",".join(data["addtlCpd"])
new_phenos.append(MSGrowthPhenotype(media_id, media, data["growth"], data["geneko"], data["addtlCpd"]))
growthpheno.add_phenotypes(new_phenos)
return growthpheno

@staticmethod
def from_ms_file(filename,basemedia,base_uptake=0,base_excretion=100):
def from_ms_file(filename, base_media, base_uptake=0, base_excretion=100):
from pandas import read_csv
df = read_csv(filename)

growthpheno = MSGrowthPhenotypes(base_media,base_uptake,base_excretion)
df = pd.read_csv (filename)
required_headers = ["Compounds","Growth"]
for item in required_headers:
if item not in df:
raise ValueError('Required header '+item+' is missing!')
raise ValueError(f'The required header {item} is missing from the CSV file.')
new_phenos = []
for row in df.rows:
for index, row in df.iterrows():
cpds = row["Compounds"].split(";")
id = row["Compounds"]
cpd_id = row["Compounds"]
if "ID" in row:
id = row["ID"]
cpd_id = row["ID"]
geneko = []
if "GeneKO" in row:
geneko = row["GeneKO"].split(";")
newpheno = MSGrowthPhenotype(id,None,row["Growth"],geneko,cpds)
newpheno = MSGrowthPhenotype(cpd_id,None,row["Growth"],geneko,cpds)
new_phenos.append(newpheno)
growthpheno.add_phenotypes(new_phenos)
return growthpheno
Expand All @@ -176,33 +156,32 @@ def add_phenotypes(self,new_phenotypes):
additions = DictList(keep_phenos)
self.phenotypes += additions

def simulate_phenotypes(self,model,biomass,add_missing_exchanges=False,correct_false_negatives=False,template=None,growth_threshold=0.001,save_fluxes=False):
def simulate_phenotypes(self,model, biomass, add_missing_exchanges=False, correct_false_negatives=False, template=None, growth_threshold=0.001):
from pandas import DataFrame
data_df = DataFrame(columns = ["Phenotype","Observed growth","Simulated growth","Class","Transports missing","Gapfilled reactions"])

model.objective = biomass
modelutl = MSModelUtil(model)
summary = {"Label":["Accuracy","CP","CN","FP","FN"],"Count":[0,0,0,0,0]}
data = {"Phenotype":[],"Observed growth":[],"Simulated growth":[],"Class":[],"Transports missing":[],"Gapfilled reactions":[]}
for pheno in self.phenotypes:
for index, pheno in enumerate(self.phenotypes):
with model:
result = pheno.simulate(modelutl,growth_threshold,add_missing_exchanges,save_fluxes)#Result should have "growth" and "class"
result = pheno.simulate(modelutl,growth_threshold,add_missing_exchanges)
gfl = None
if result["class"] == "FN" and correct_false_negatives:
pheno.gapfill_model_for_phenotype(modelutl,[template],None)
if pheno.gapfilling.last_solution != None:
list = [];
if pheno.gapfilling.last_solution is not None:
gpfl_rxns = []
for rxn_id in pheno.gapfilling.last_solution["reversed"]:
list.append(pheno.gapfilling.last_solution["reversed"][rxn_id]+rxn_id)
gpfl_rxns.append(pheno.gapfilling.last_solution["reversed"][rxn_id]+rxn_id)
for rxn_id in pheno.gapfilling.last_solution["new"]:
list.append(pheno.gapfilling.last_solution["new"][rxn_id]+rxn_id)
data["Gapfilled reactions"].append(";".join(list))
else:
data["Gapfilled reactions"].append(None)
else:
data["Gapfilled reactions"].append(None)
result = pheno.simulate(modelutl,growth_threshold,add_missing_exchanges,save_fluxes)#Result should have "growth" and "class"
data["Class"].append(result["class"])
data["Phenotype"].append(pheno.id)
data["Observed growth"].append(pheno.growth)
data["Simulated growth"].append(result["growth"])
data["Transports missing"].append(";".join(result["missing_transports"]))
gpfl_rxns.append(pheno.gapfilling.last_solution["new"][rxn_id]+rxn_id)
gfl = ";".join(gpfl_rxns)
result = pheno.simulate(modelutl,growth_threshold,add_missing_exchanges)
data_df.loc[index] = {
"Phenotype":pheno.id, "Observed growth": pheno.growth,
"Simulated growth":result["growth"], "class":result["class"],
"Transports missing": ";".join(result["missing_transports"]),
"Gapfilled reactions":gfl}
if result["class"] == "CP":
summary["Count"][1] += 1
summary["Count"][0] += 1
Expand All @@ -215,9 +194,8 @@ def simulate_phenotypes(self,model,biomass,add_missing_exchanges=False,correct_f
summary["Count"][4] += 1

summary["Count"][0] = summary["Count"][0]/len(self.phenotypes)
sdf = pd.DataFrame(summary)
df = pd.DataFrame(data)
logger.info(df)
return {"details":df,"summary":sdf}
sdf = DataFrame(summary)
logger.info(data_df)
return {"details":data_df,"summary":sdf}