cshenry · freiburgermsu · Jul 1, 2022 · Jul 1, 2022 · Jul 1, 2022 · Jul 23, 2022
diff --git a/modelseedpy/core/msgrowthphenotypes.py b/modelseedpy/core/msgrowthphenotypes.py
@@ -1,48 +1,46 @@
-import pandas as pd
+import pandas as pd  #!!! import never used
 import logging
 import cobra
 from cobra.core.dictlist import DictList
 from modelseedpy.core.msmedia import MSMedia
 from modelseedpy.fbapkg.mspackagemanager import MSPackageManager
 from modelseedpy.core.msmodelutl import MSModelUtil
 from modelseedpy.core.msgapfill import MSGapfill
+from modelseedpy.core.fbahelper import FBAHelper
 
 logger = logging.getLogger(__name__)
 
 class MSGrowthPhenotype:
-
-    def __init__(self,id,media=None,growth=None,gene_ko=[],additional_compounds=[],parent=None,name=None):
-        self.id = id
-        self.name = name
-        if name == None:
-            self.name = self.id
+    def __init__(self, obj_id, media=None, growth=None, gene_ko=[], additional_compounds=[], parent=None, name=None):
+        self.id = obj_id
+        self.name = name or self.id
         self.growth = growth
         self.media = media
         self.gene_ko = gene_ko
         self.gapfilling = None
         self.additional_compounds = additional_compounds
         self.parent = parent
-        
+
     def build_media(self):
         cpd_hash = {}
         for cpd in self.additional_compounds:
             cpd_hash[cpd] = 100
         full_media = MSMedia.from_dict(cpd_hash)
-        if self.media != None:
-            full_media.merge(self.media,overwrite_overlap = False)
-        if self.parent != None and self.parent.base_media != None:
-            full_media.merge(parent.base_media,overwrite_overlap = False)
+        if self.media is not None:
+            full_media.merge(self.media, overwrite_overlap = False)
+        if self.parent is not None and self.parent.base_media is not None:
+                full_media.merge(self.parent.base_media, overwrite_overlap = False)
         return full_media
 
-    def simulate(self,modelutl,growth_threshold=0.001,add_missing_exchanges=False,save_fluxes=False,pfba=False):        
+    def simulate(self, modelutl, growth_threshold=0.001, add_missing_exchanges=False, save_fluxes=False, pfba=False):        
         if not isinstance(modelutl,MSModelUtil):
             modelutl = MSModelUtil(modelutl)
         media = self.build_media()
-        output = {"growth":None,"class":None,"missing_transports":[]}
+        output = {"growth":None, "class":None, "missing_transports":[]}
         if add_missing_exchanges:
             output["missing_transports"] = modelutl.add_missing_exchanges(media)
         pkgmgr = MSPackageManager.get_pkg_mgr(modelutl.model)
-        pkgmgr.getpkg("KBaseMediaPkg").build_package(media,self.parent.base_uptake,self.parent.base_excretion)        
+        pkgmgr.getpkg("KBaseMediaPkg").build_package(media, self.parent.base_uptake, self.parent.base_excretion)        
         for gene in self.gene_ko:
             if gene in modelutl.model.genes:
                 geneobj = modelutl.model.genes.get_by_id(gene)
@@ -54,18 +52,16 @@ def simulate(self,modelutl,growth_threshold=0.001,add_missing_exchanges=False,sa
         if save_fluxes:
             output["fluxes"] = solution.fluxes
         if output["growth"] >= growth_threshold:
+            output["class"] = "FP"
             if self.growth > 0:
                 output["class"] = "CP"
-            else:
-                output["class"] = "FP"
         else:    
+            output["class"] = "CN"
             if self.growth > 0:
                 output["class"] = "FN"
-            else:
-                output["class"] = "CN"
         return output        
 
-    def gapfill_model_for_phenotype(self,modelutl,default_gapfill_templates,test_conditions,default_gapfill_models=[],blacklist=[],growth_threshold=0.001,add_missing_exchanges=False):
+    def gapfill_model_for_phenotype(self, modelutl, default_gapfill_templates, test_conditions, default_gapfill_models=[], blacklist=[], growth_threshold=0.001, add_missing_exchanges=False):
         if not isinstance(modelutl,MSModelUtil):
             modelutl = MSModelUtil(modelutl)
         self.gapfilling = MSGapfill(modelutl.model, default_gapfill_templates, default_gapfill_models, test_conditions, modelutl.reaction_scores(), blacklist) 
@@ -82,87 +78,71 @@ def gapfill_model_for_phenotype(self,modelutl,default_gapfill_templates,test_con
         return self.gapfilling.integrate_gapfill_solution(gfresults)
 
 class MSGrowthPhenotypes:
-
-    def __init__(self,base_media=None,base_uptake=0,base_excretion=1000): 
+    def __init__(self, base_media=None, base_uptake=0, base_excretion=1000): 
         self.base_media = base_media
         self.phenotypes = DictList()
         self.base_uptake = base_uptake
         self.base_excretion = base_excretion
-
 
     @staticmethod
     def from_compound_hash(compounds,base_media,base_uptake=0,base_excretion=1000):
         growthpheno = MSGrowthPhenotypes(base_media,base_uptake,base_excretion)
-        new_phenos = []
-        for cpd in compounds:
-            newpheno = MSGrowthPhenotype(cpd,None,compounds[cpd],[],[cpd])
-            new_phenos.append(newpheno)
-        growthpheno.add_phenotypes(new_phenos)
+        growthpheno.add_phenotypes([MSGrowthPhenotype(cpd, None, compounds[cpd] ,[] ,[cpd]) for cpd in compounds])
         return growthpheno
 
     @staticmethod
-    def from_kbase_object(data,kbase_api):
+    def from_kbase_object(data, kbase_api):
         growthpheno = MSGrowthPhenotypes(None,0,1000)
         new_phenos = []
         for pheno in data["phenotypes"]:
-            media = kbase_api.get_from_ws(pheno["media_ref"],None)
-            geneko = []
-            for gene in pheno["geneko_refs"]:
-                geneko.append(added_cpd.split("/").pop())
-            added_compounds = []
-            for added_cpd in pheno["additionalcompound_refs"]:
-                added_compounds.append(added_cpd.split("/").pop())
-            newpheno = MSGrowthPhenotype(pheno["id"],media,pheno["normalizedGrowth"],geneko,added_compounds)
-            new_phenos.append(newpheno)
+            new_phenos.append(MSGrowthPhenotype(
+                pheno["id"], kbase_api.get_from_ws(pheno["media_ref"],None),
+                pheno["normalizedGrowth"], 
+                [gene.split("/").pop() for gene in pheno["geneko_refs"]], 
+                [added_cpd.split("/").pop() for added_cpd in pheno["additionalcompound_refs"]]))
         growthpheno.add_phenotypes(new_phenos)                
         return growthpheno
 
     @staticmethod
-    def from_kbase_file(filename,kbase_api):
+    def from_kbase_file(filename, kbase_api, base_media):
         #TSV file with the following headers:media    mediaws    growth    geneko    addtlCpd
+        from pandas import read_table
+        table_df = read_table(filename)
+
         growthpheno = MSGrowthPhenotypes(base_media,0,1000)
-        headings = []
         new_phenos = []
-        with open(filename) as f:
-            lines = f.readlines()
-            for line in lines:
-                items = line.split("\t")
-                if headings == None:
-                    headings = items
-                else:
-                    data = {}
-                    for i in range(0,len(items)):
-                        data[headings[i]] = items[i]
-                    data = FBAHelper.validate_dictionary(headings,["media","growth"],{"mediaws":None,"geneko":[],"addtlCpd":[]})
-                    media = kbase_api.get_from_ws(data["media"],data["mediaws"])
-                    id = data["media"]
-                    if len(data["geneko"]) > 0:
-                        id += "-"+",".join(data["geneko"])
-                    if len(data["addtlCpd"]) > 0:
-                        id += "-"+",".join(data["addtlCpd"])
-                    newpheno = MSGrowthPhenotype(id,media,data["growth"],data["geneko"],data["addtlCpd"])
-                    new_phenos.append(newpheno)
+        for index, row in table_df.iterrows():
+            data = FBAHelper.validate_dictionary(row.to_dict(), ["media","growth"], {"mediaws":None, "geneko":[], "addtlCpd":[]})
+            media = kbase_api.get_from_ws(data["media"], data["mediaws"])
+            media_id = data["media"]
+            if len(data["geneko"]) > 0:
+                media_id += "-"+",".join(data["geneko"])
+            if len(data["addtlCpd"]) > 0:
+                media_id += "-"+",".join(data["addtlCpd"])
+            new_phenos.append(MSGrowthPhenotype(media_id, media, data["growth"], data["geneko"], data["addtlCpd"]))
         growthpheno.add_phenotypes(new_phenos)                
         return growthpheno
 
     @staticmethod
-    def from_ms_file(filename,basemedia,base_uptake=0,base_excretion=100):
+    def from_ms_file(filename, base_media, base_uptake=0, base_excretion=100):
+        from pandas import read_csv
+        df = read_csv(filename)
+
         growthpheno = MSGrowthPhenotypes(base_media,base_uptake,base_excretion)
-        df = pd.read_csv (filename)
         required_headers = ["Compounds","Growth"]
         for item in required_headers:
             if item not in df:
-                raise ValueError('Required header '+item+' is missing!')
+                raise ValueError(f'The required header {item} is missing from the CSV file.')
         new_phenos = []
-        for row in df.rows:
+        for index, row in df.iterrows():
             cpds = row["Compounds"].split(";")
-            id = row["Compounds"]
+            cpd_id = row["Compounds"]
             if "ID" in row:
-                id = row["ID"]
+                cpd_id = row["ID"]
             geneko = []
             if "GeneKO" in row:
                 geneko = row["GeneKO"].split(";")
-            newpheno = MSGrowthPhenotype(id,None,row["Growth"],geneko,cpds)
+            newpheno = MSGrowthPhenotype(cpd_id,None,row["Growth"],geneko,cpds)
             new_phenos.append(newpheno)
         growthpheno.add_phenotypes(new_phenos)            
         return growthpheno
@@ -176,33 +156,32 @@ def add_phenotypes(self,new_phenotypes):
         additions = DictList(keep_phenos)
         self.phenotypes += additions
 
-    def simulate_phenotypes(self,model,biomass,add_missing_exchanges=False,correct_false_negatives=False,template=None,growth_threshold=0.001,save_fluxes=False):
+    def simulate_phenotypes(self,model, biomass, add_missing_exchanges=False, correct_false_negatives=False, template=None, growth_threshold=0.001):
+        from pandas import DataFrame
+        data_df = DataFrame(columns = ["Phenotype","Observed growth","Simulated growth","Class","Transports missing","Gapfilled reactions"])
+
         model.objective = biomass
         modelutl = MSModelUtil(model)
         summary = {"Label":["Accuracy","CP","CN","FP","FN"],"Count":[0,0,0,0,0]}
-        data = {"Phenotype":[],"Observed growth":[],"Simulated growth":[],"Class":[],"Transports missing":[],"Gapfilled reactions":[]}
-        for pheno in self.phenotypes:
+        for index, pheno in enumerate(self.phenotypes):
             with model:
-                result = pheno.simulate(modelutl,growth_threshold,add_missing_exchanges,save_fluxes)#Result should have "growth" and "class"
+                result = pheno.simulate(modelutl,growth_threshold,add_missing_exchanges)
+                gfl = None
                 if result["class"] == "FN" and correct_false_negatives:
                     pheno.gapfill_model_for_phenotype(modelutl,[template],None)
-                    if pheno.gapfilling.last_solution != None:
-                        list = [];
+                    if pheno.gapfilling.last_solution is not None:
+                        gpfl_rxns = []
                         for rxn_id in pheno.gapfilling.last_solution["reversed"]:
-                            list.append(pheno.gapfilling.last_solution["reversed"][rxn_id]+rxn_id)
+                            gpfl_rxns.append(pheno.gapfilling.last_solution["reversed"][rxn_id]+rxn_id)
                         for rxn_id in pheno.gapfilling.last_solution["new"]:
-                            list.append(pheno.gapfilling.last_solution["new"][rxn_id]+rxn_id)
-                        data["Gapfilled reactions"].append(";".join(list))
-                    else:
-                        data["Gapfilled reactions"].append(None)
-                else:
-                    data["Gapfilled reactions"].append(None)
-                result = pheno.simulate(modelutl,growth_threshold,add_missing_exchanges,save_fluxes)#Result should have "growth" and "class"
-                data["Class"].append(result["class"])
-                data["Phenotype"].append(pheno.id)
-                data["Observed growth"].append(pheno.growth)
-                data["Simulated growth"].append(result["growth"])
-                data["Transports missing"].append(";".join(result["missing_transports"]))
+                            gpfl_rxns.append(pheno.gapfilling.last_solution["new"][rxn_id]+rxn_id)
+                        gfl = ";".join(gpfl_rxns)
+                result = pheno.simulate(modelutl,growth_threshold,add_missing_exchanges)
+                data_df.loc[index] = {
+                    "Phenotype":pheno.id, "Observed growth": pheno.growth,
+                    "Simulated growth":result["growth"], "class":result["class"], 
+                    "Transports missing": ";".join(result["missing_transports"]), 
+                    "Gapfilled reactions":gfl}
                 if result["class"] == "CP":
                     summary["Count"][1] += 1
                     summary["Count"][0] += 1
@@ -215,9 +194,8 @@ def simulate_phenotypes(self,model,biomass,add_missing_exchanges=False,correct_f
                     summary["Count"][4] += 1
 
         summary["Count"][0] = summary["Count"][0]/len(self.phenotypes) 
-        sdf = pd.DataFrame(summary)
-        df = pd.DataFrame(data)
-        logger.info(df)
-        return {"details":df,"summary":sdf}
+        sdf = DataFrame(summary)
+        logger.info(data_df)
+        return {"details":data_df,"summary":sdf}