From 96390ed5bf1a181fd1b03e93cdfb256c2233b215 Mon Sep 17 00:00:00 2001 From: Andrew Freiburger Date: Fri, 1 Jul 2022 15:07:55 -0400 Subject: [PATCH 1/5] revisions before PR --- modelseedpy/core/msgrowthphenotypes.py | 181 +++++++++++-------------- 1 file changed, 77 insertions(+), 104 deletions(-) diff --git a/modelseedpy/core/msgrowthphenotypes.py b/modelseedpy/core/msgrowthphenotypes.py index 02a72e3c..a31c887a 100644 --- a/modelseedpy/core/msgrowthphenotypes.py +++ b/modelseedpy/core/msgrowthphenotypes.py @@ -1,4 +1,4 @@ -import pandas as pd +import pandas as pd #!!! import never used import logging import cobra from cobra.core.dictlist import DictList @@ -6,66 +6,58 @@ from modelseedpy.fbapkg.mspackagemanager import MSPackageManager from modelseedpy.core.msmodelutl import MSModelUtil from modelseedpy.core.msgapfill import MSGapfill +from modelseedpy.core.fbahelper import FBAHelper logger = logging.getLogger(__name__) class MSGrowthPhenotype: - - def __init__(self,id,media=None,growth=None,gene_ko=[],additional_compounds=[],parent=None,name=None): - self.id = id - self.name = name - if name == None: - self.name = self.id - self.growth = growth - self.media = media - self.gene_ko = gene_ko - self.gapfilling = None - self.additional_compounds = additional_compounds - self.parent = parent + def __init__(self, obj_id, media=None, growth=None, gene_ko=[], additional_compounds=[], parent=None, name=None): + self.id = obj_id; self.name = name; self.media = media; self.growth = growth; self.gene_ko = gene_ko + self.gapfilling = None; self.additional_compounds = additional_compounds; self.parent = parent + self.name = name or obj_id + def build_media(self): cpd_hash = {} for cpd in self.additional_compounds: cpd_hash[cpd] = 100 full_media = MSMedia.from_dict(cpd_hash) - if self.media != None: - full_media.merge(self.media,overwrite_overlap = False) - if self.parent != None and self.parent.base_media != None: - full_media.merge(parent.base_media,overwrite_overlap = False) + if self.media: + full_media.merge(self.media, overwrite_overlap = False) + if self.parent and self.parent.base_media: + full_media.merge(self.parent.base_media, overwrite_overlap = False) return full_media - def simulate(self,modelutl,growth_threshold=0.001,add_missing_exchanges=False,save_fluxes=False,pfba=False): + def simulate(self, modelutl, growth_threshold=0.001, add_missing_exchanges=False, save_fluxes=False, pfba=False): if not isinstance(modelutl,MSModelUtil): modelutl = MSModelUtil(modelutl) media = self.build_media() - output = {"growth":None,"class":None,"missing_transports":[]} + results = {"growth":None, "class":None, "missing_transports":[]} if add_missing_exchanges: - output["missing_transports"] = modelutl.add_missing_exchanges(media) + results["missing_transports"] = modelutl.add_missing_exchanges(media) pkgmgr = MSPackageManager.get_pkg_mgr(modelutl.model) - pkgmgr.getpkg("KBaseMediaPkg").build_package(media,self.parent.base_uptake,self.parent.base_excretion) + pkgmgr.getpkg("KBaseMediaPkg").build_package(media, self.parent.base_uptake, self.parent.base_excretion) for gene in self.gene_ko: if gene in modelutl.model.genes: geneobj = modelutl.model.genes.get_by_id(gene) geneobj.knock_out() solution = modelutl.model.optimize() - output["growth"] = solution.objective_value + results["growth"] = solution.objective_value if solution.objective_value > 0 and pfba: solution = cobra.flux_analysis.pfba(modelutl.model) if save_fluxes: - output["fluxes"] = solution.fluxes - if output["growth"] >= growth_threshold: + results["fluxes"] = solution.fluxes + if results["growth"] >= growth_threshold: + results["class"] = "FP" if self.growth > 0: - output["class"] = "CP" - else: - output["class"] = "FP" + results["class"] = "CP" else: + results["class"] = "CN" if self.growth > 0: - output["class"] = "FN" - else: - output["class"] = "CN" - return output + results["class"] = "FN" + return results - def gapfill_model_for_phenotype(self,modelutl,default_gapfill_templates,test_conditions,default_gapfill_models=[],blacklist=[],growth_threshold=0.001,add_missing_exchanges=False): + def gapfill_model_for_phenotype(self, modelutl, default_gapfill_templates, test_conditions, default_gapfill_models=[], blacklist=[], growth_threshold=0.001, add_missing_exchanges=False): if not isinstance(modelutl,MSModelUtil): modelutl = MSModelUtil(modelutl) self.gapfilling = MSGapfill(modelutl.model, default_gapfill_templates, default_gapfill_models, test_conditions, modelutl.reaction_scores(), blacklist) @@ -82,87 +74,70 @@ def gapfill_model_for_phenotype(self,modelutl,default_gapfill_templates,test_con return self.gapfilling.integrate_gapfill_solution(gfresults) class MSGrowthPhenotypes: - - def __init__(self,base_media=None,base_uptake=0,base_excretion=1000): - self.base_media = base_media - self.phenotypes = DictList() - self.base_uptake = base_uptake - self.base_excretion = base_excretion + def __init__(self, base_media=None, base_uptake=0, base_excretion=1000): + self.base_media = base_media; self.base_uptake = base_uptake; self.base_excretion = base_excretion + self.phenotypes = DictList() @staticmethod def from_compound_hash(compounds,base_media,base_uptake=0,base_excretion=1000): growthpheno = MSGrowthPhenotypes(base_media,base_uptake,base_excretion) - new_phenos = [] - for cpd in compounds: - newpheno = MSGrowthPhenotype(cpd,None,compounds[cpd],[],[cpd]) - new_phenos.append(newpheno) - growthpheno.add_phenotypes(new_phenos) + growthpheno.add_phenotypes([MSGrowthPhenotype(cpd, None, compounds[cpd] ,[] ,[cpd]) for cpd in compounds]) return growthpheno @staticmethod - def from_kbase_object(data,kbase_api): + def from_kbase_object(data, kbase_api): growthpheno = MSGrowthPhenotypes(None,0,1000) new_phenos = [] for pheno in data["phenotypes"]: - media = kbase_api.get_from_ws(pheno["media_ref"],None) - geneko = [] - for gene in pheno["geneko_refs"]: - geneko.append(added_cpd.split("/").pop()) - added_compounds = [] - for added_cpd in pheno["additionalcompound_refs"]: - added_compounds.append(added_cpd.split("/").pop()) - newpheno = MSGrowthPhenotype(pheno["id"],media,pheno["normalizedGrowth"],geneko,added_compounds) - new_phenos.append(newpheno) + new_phenos.append(MSGrowthPhenotype( + pheno["id"], kbase_api.get_from_ws(pheno["media_ref"],None), + pheno["normalizedGrowth"], + [gene.split("/").pop() for gene in pheno["geneko_refs"]], + [added_cpd.split("/").pop() for added_cpd in pheno["additionalcompound_refs"]])) growthpheno.add_phenotypes(new_phenos) return growthpheno @staticmethod - def from_kbase_file(filename,kbase_api): + def from_kbase_file(filename, base_media, kbase_api): # The base_media was called in line 105 but was never defined #TSV file with the following headers:media mediaws growth geneko addtlCpd + from pandas import read_table + table_df = read_table(filename) + growthpheno = MSGrowthPhenotypes(base_media,0,1000) - headings = [] new_phenos = [] - with open(filename) as f: - lines = f.readlines() - for line in lines: - items = line.split("\t") - if headings == None: - headings = items - else: - data = {} - for i in range(0,len(items)): - data[headings[i]] = items[i] - data = FBAHelper.validate_dictionary(headings,["media","growth"],{"mediaws":None,"geneko":[],"addtlCpd":[]}) - media = kbase_api.get_from_ws(data["media"],data["mediaws"]) - id = data["media"] - if len(data["geneko"]) > 0: - id += "-"+",".join(data["geneko"]) - if len(data["addtlCpd"]) > 0: - id += "-"+",".join(data["addtlCpd"]) - newpheno = MSGrowthPhenotype(id,media,data["growth"],data["geneko"],data["addtlCpd"]) - new_phenos.append(newpheno) + for index, row in table_df.iterrows(): + data = FBAHelper.validate_dictionary(row.to_dict(), ["media","growth"], {"mediaws":None, "geneko":[], "addtlCpd":[]}) + media = kbase_api.get_from_ws(data["media"], data["mediaws"]) + media_id = data["media"] + if len(data["geneko"]) > 0: + media_id += "-"+",".join(data["geneko"]) + if len(data["addtlCpd"]) > 0: + media_id += "-"+",".join(data["addtlCpd"]) + new_phenos.append(MSGrowthPhenotype(media_id, media, data["growth"], data["geneko"], data["addtlCpd"])) growthpheno.add_phenotypes(new_phenos) return growthpheno @staticmethod - def from_ms_file(filename,basemedia,base_uptake=0,base_excretion=100): + def from_ms_file(filename, base_media, base_uptake=0, base_excretion=100): + from pandas import read_csv + df = read_csv(filename) + growthpheno = MSGrowthPhenotypes(base_media,base_uptake,base_excretion) - df = pd.read_csv (filename) required_headers = ["Compounds","Growth"] for item in required_headers: if item not in df: - raise ValueError('Required header '+item+' is missing!') + raise ValueError(f'The required header {item} is missing from the CSV file.') new_phenos = [] - for row in df.rows: + for index, row in df.iterrows(): cpds = row["Compounds"].split(";") - id = row["Compounds"] + cpd_id = row["Compounds"] if "ID" in row: - id = row["ID"] + cpd_id = row["ID"] geneko = [] if "GeneKO" in row: geneko = row["GeneKO"].split(";") - newpheno = MSGrowthPhenotype(id,None,row["Growth"],geneko,cpds) + newpheno = MSGrowthPhenotype(cpd_id,None,row["Growth"],geneko,cpds) new_phenos.append(newpheno) growthpheno.add_phenotypes(new_phenos) return growthpheno @@ -176,33 +151,32 @@ def add_phenotypes(self,new_phenotypes): additions = DictList(keep_phenos) self.phenotypes += additions - def simulate_phenotypes(self,model,biomass,add_missing_exchanges=False,correct_false_negatives=False,template=None,growth_threshold=0.001,save_fluxes=False): + def simulate_phenotypes(self,model, biomass, add_missing_exchanges=False, correct_false_negatives=False, template=None, growth_threshold=0.001): + from pandas import DataFrame + data_df = DataFrame(columns = ["Phenotype","Observed growth","Simulated growth","Class","Transports missing","Gapfilled reactions"]) + model.objective = biomass modelutl = MSModelUtil(model) summary = {"Label":["Accuracy","CP","CN","FP","FN"],"Count":[0,0,0,0,0]} - data = {"Phenotype":[],"Observed growth":[],"Simulated growth":[],"Class":[],"Transports missing":[],"Gapfilled reactions":[]} - for pheno in self.phenotypes: + for index, pheno in enumerate(self.phenotypes): with model: - result = pheno.simulate(modelutl,growth_threshold,add_missing_exchanges,save_fluxes)#Result should have "growth" and "class" + result = pheno.simulate(modelutl,growth_threshold,add_missing_exchanges) + gfl = None if result["class"] == "FN" and correct_false_negatives: pheno.gapfill_model_for_phenotype(modelutl,[template],None) - if pheno.gapfilling.last_solution != None: - list = []; + if pheno.gapfilling.last_solution: + gpfl_rxns = [] for rxn_id in pheno.gapfilling.last_solution["reversed"]: - list.append(pheno.gapfilling.last_solution["reversed"][rxn_id]+rxn_id) + gpfl_rxns.append(pheno.gapfilling.last_solution["reversed"][rxn_id]+rxn_id) for rxn_id in pheno.gapfilling.last_solution["new"]: - list.append(pheno.gapfilling.last_solution["new"][rxn_id]+rxn_id) - data["Gapfilled reactions"].append(";".join(list)) - else: - data["Gapfilled reactions"].append(None) - else: - data["Gapfilled reactions"].append(None) - result = pheno.simulate(modelutl,growth_threshold,add_missing_exchanges,save_fluxes)#Result should have "growth" and "class" - data["Class"].append(result["class"]) - data["Phenotype"].append(pheno.id) - data["Observed growth"].append(pheno.growth) - data["Simulated growth"].append(result["growth"]) - data["Transports missing"].append(";".join(result["missing_transports"])) + gpfl_rxns.append(pheno.gapfilling.last_solution["new"][rxn_id]+rxn_id) + gfl = ";".join(gpfl_rxns) + result = pheno.simulate(modelutl,growth_threshold,add_missing_exchanges) + data_df.loc[index] = { + "Phenotype":pheno.id, "Observed growth": pheno.growth, + "Simulated growth":result["growth"], "class":result["class"], + "Transports missing": ";".join(result["missing_transports"]), + "Gapfilled reactions":gfl} if result["class"] == "CP": summary["Count"][1] += 1 summary["Count"][0] += 1 @@ -215,9 +189,8 @@ def simulate_phenotypes(self,model,biomass,add_missing_exchanges=False,correct_f summary["Count"][4] += 1 summary["Count"][0] = summary["Count"][0]/len(self.phenotypes) - sdf = pd.DataFrame(summary) - df = pd.DataFrame(data) - logger.info(df) - return {"details":df,"summary":sdf} + sdf = DataFrame(summary) + logger.info(data_df) + return {"details":data_df,"summary":sdf} \ No newline at end of file From 3747f294f8d5eaa96ec724eabae11aaebd6ce004 Mon Sep 17 00:00:00 2001 From: Andrew Freiburger Date: Fri, 1 Jul 2022 15:12:36 -0400 Subject: [PATCH 2/5] output restored name --- modelseedpy/core/msgrowthphenotypes.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/modelseedpy/core/msgrowthphenotypes.py b/modelseedpy/core/msgrowthphenotypes.py index a31c887a..dee53e1a 100644 --- a/modelseedpy/core/msgrowthphenotypes.py +++ b/modelseedpy/core/msgrowthphenotypes.py @@ -32,9 +32,9 @@ def simulate(self, modelutl, growth_threshold=0.001, add_missing_exchanges=False if not isinstance(modelutl,MSModelUtil): modelutl = MSModelUtil(modelutl) media = self.build_media() - results = {"growth":None, "class":None, "missing_transports":[]} + output = {"growth":None, "class":None, "missing_transports":[]} if add_missing_exchanges: - results["missing_transports"] = modelutl.add_missing_exchanges(media) + output["missing_transports"] = modelutl.add_missing_exchanges(media) pkgmgr = MSPackageManager.get_pkg_mgr(modelutl.model) pkgmgr.getpkg("KBaseMediaPkg").build_package(media, self.parent.base_uptake, self.parent.base_excretion) for gene in self.gene_ko: @@ -42,20 +42,20 @@ def simulate(self, modelutl, growth_threshold=0.001, add_missing_exchanges=False geneobj = modelutl.model.genes.get_by_id(gene) geneobj.knock_out() solution = modelutl.model.optimize() - results["growth"] = solution.objective_value + output["growth"] = solution.objective_value if solution.objective_value > 0 and pfba: solution = cobra.flux_analysis.pfba(modelutl.model) if save_fluxes: - results["fluxes"] = solution.fluxes - if results["growth"] >= growth_threshold: - results["class"] = "FP" + output["fluxes"] = solution.fluxes + if output["growth"] >= growth_threshold: + output["class"] = "FP" if self.growth > 0: - results["class"] = "CP" + output["class"] = "CP" else: - results["class"] = "CN" + output["class"] = "CN" if self.growth > 0: - results["class"] = "FN" - return results + output["class"] = "FN" + return output def gapfill_model_for_phenotype(self, modelutl, default_gapfill_templates, test_conditions, default_gapfill_models=[], blacklist=[], growth_threshold=0.001, add_missing_exchanges=False): if not isinstance(modelutl,MSModelUtil): From 6da528b5f48c1069878e2209484e513729fced45 Mon Sep 17 00:00:00 2001 From: Andrew Freiburger Date: Fri, 1 Jul 2022 15:17:25 -0400 Subject: [PATCH 3/5] adjust function parameter order --- modelseedpy/core/msgrowthphenotypes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modelseedpy/core/msgrowthphenotypes.py b/modelseedpy/core/msgrowthphenotypes.py index dee53e1a..af137e5c 100644 --- a/modelseedpy/core/msgrowthphenotypes.py +++ b/modelseedpy/core/msgrowthphenotypes.py @@ -99,7 +99,7 @@ def from_kbase_object(data, kbase_api): return growthpheno @staticmethod - def from_kbase_file(filename, base_media, kbase_api): # The base_media was called in line 105 but was never defined + def from_kbase_file(filename, kbase_api, base_media): #TSV file with the following headers:media mediaws growth geneko addtlCpd from pandas import read_table table_df = read_table(filename) From 5565e16c1825d7766f948f0f3385ea214ba1652f Mon Sep 17 00:00:00 2001 From: Andrew Freiburger Date: Fri, 22 Jul 2022 21:54:19 -0400 Subject: [PATCH 4/5] msgrowthphenotypes updates --- modelseedpy/core/msgrowthphenotypes.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/modelseedpy/core/msgrowthphenotypes.py b/modelseedpy/core/msgrowthphenotypes.py index af137e5c..4d14c2af 100644 --- a/modelseedpy/core/msgrowthphenotypes.py +++ b/modelseedpy/core/msgrowthphenotypes.py @@ -13,18 +13,23 @@ class MSGrowthPhenotype: def __init__(self, obj_id, media=None, growth=None, gene_ko=[], additional_compounds=[], parent=None, name=None): - self.id = obj_id; self.name = name; self.media = media; self.growth = growth; self.gene_ko = gene_ko - self.gapfilling = None; self.additional_compounds = additional_compounds; self.parent = parent + self.id = obj_id self.name = name or obj_id + self.growth = growth + self.media = media + self.gene_ko = gene_ko + self.gapfilling = None + self.additional_compounds = additional_compounds + self.parent = parent def build_media(self): cpd_hash = {} for cpd in self.additional_compounds: cpd_hash[cpd] = 100 full_media = MSMedia.from_dict(cpd_hash) - if self.media: + if self.media is not None: full_media.merge(self.media, overwrite_overlap = False) - if self.parent and self.parent.base_media: + if self.parent is not None and self.parent.base_media is not None: full_media.merge(self.parent.base_media, overwrite_overlap = False) return full_media @@ -75,9 +80,10 @@ def gapfill_model_for_phenotype(self, modelutl, default_gapfill_templates, test_ class MSGrowthPhenotypes: def __init__(self, base_media=None, base_uptake=0, base_excretion=1000): - - self.base_media = base_media; self.base_uptake = base_uptake; self.base_excretion = base_excretion + self.base_media = base_media self.phenotypes = DictList() + self.base_uptake = base_uptake + self.base_excretion = base_excretion @staticmethod def from_compound_hash(compounds,base_media,base_uptake=0,base_excretion=1000): @@ -164,7 +170,7 @@ def simulate_phenotypes(self,model, biomass, add_missing_exchanges=False, correc gfl = None if result["class"] == "FN" and correct_false_negatives: pheno.gapfill_model_for_phenotype(modelutl,[template],None) - if pheno.gapfilling.last_solution: + if pheno.gapfilling.last_solution is not None: gpfl_rxns = [] for rxn_id in pheno.gapfilling.last_solution["reversed"]: gpfl_rxns.append(pheno.gapfilling.last_solution["reversed"][rxn_id]+rxn_id) From e20a96cb6bc198963c833bb5a452160a8b99eb5a Mon Sep 17 00:00:00 2001 From: Andrew Freiburger Date: Thu, 15 Sep 2022 15:21:15 -0500 Subject: [PATCH 5/5] final polish --- modelseedpy/core/msgrowthphenotypes.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/modelseedpy/core/msgrowthphenotypes.py b/modelseedpy/core/msgrowthphenotypes.py index 4d14c2af..6accf440 100644 --- a/modelseedpy/core/msgrowthphenotypes.py +++ b/modelseedpy/core/msgrowthphenotypes.py @@ -12,9 +12,8 @@ class MSGrowthPhenotype: def __init__(self, obj_id, media=None, growth=None, gene_ko=[], additional_compounds=[], parent=None, name=None): - self.id = obj_id - self.name = name or obj_id + self.name = name or self.id self.growth = growth self.media = media self.gene_ko = gene_ko