From 96390ed5bf1a181fd1b03e93cdfb256c2233b215 Mon Sep 17 00:00:00 2001
From: Andrew Freiburger <Andrewefreiburger@gmail.com>
Date: Fri, 1 Jul 2022 15:07:55 -0400
Subject: [PATCH 1/5] revisions before PR

---
 modelseedpy/core/msgrowthphenotypes.py | 181 +++++++++++--------------
 1 file changed, 77 insertions(+), 104 deletions(-)

diff --git a/modelseedpy/core/msgrowthphenotypes.py b/modelseedpy/core/msgrowthphenotypes.py
index 02a72e3c..a31c887a 100644
--- a/modelseedpy/core/msgrowthphenotypes.py
+++ b/modelseedpy/core/msgrowthphenotypes.py
@@ -1,4 +1,4 @@
-import pandas as pd
+import pandas as pd  #!!! import never used
 import logging
 import cobra
 from cobra.core.dictlist import DictList
@@ -6,66 +6,58 @@
 from modelseedpy.fbapkg.mspackagemanager import MSPackageManager
 from modelseedpy.core.msmodelutl import MSModelUtil
 from modelseedpy.core.msgapfill import MSGapfill
+from modelseedpy.core.fbahelper import FBAHelper
 
 logger = logging.getLogger(__name__)
 
 class MSGrowthPhenotype:
-
-    def __init__(self,id,media=None,growth=None,gene_ko=[],additional_compounds=[],parent=None,name=None):
-        self.id = id
-        self.name = name
-        if name == None:
-            self.name = self.id
-        self.growth = growth
-        self.media = media
-        self.gene_ko = gene_ko
-        self.gapfilling = None
-        self.additional_compounds = additional_compounds
-        self.parent = parent
+    def __init__(self, obj_id, media=None, growth=None, gene_ko=[], additional_compounds=[], parent=None, name=None):
         
+        self.id = obj_id; self.name = name; self.media = media; self.growth = growth; self.gene_ko = gene_ko
+        self.gapfilling = None; self.additional_compounds = additional_compounds; self.parent = parent
+        self.name = name or obj_id
+
     def build_media(self):
         cpd_hash = {}
         for cpd in self.additional_compounds:
             cpd_hash[cpd] = 100
         full_media = MSMedia.from_dict(cpd_hash)
-        if self.media != None:
-            full_media.merge(self.media,overwrite_overlap = False)
-        if self.parent != None and self.parent.base_media != None:
-            full_media.merge(parent.base_media,overwrite_overlap = False)
+        if self.media:
+            full_media.merge(self.media, overwrite_overlap = False)
+        if self.parent and self.parent.base_media:
+                full_media.merge(self.parent.base_media, overwrite_overlap = False)
         return full_media
     
-    def simulate(self,modelutl,growth_threshold=0.001,add_missing_exchanges=False,save_fluxes=False,pfba=False):        
+    def simulate(self, modelutl, growth_threshold=0.001, add_missing_exchanges=False, save_fluxes=False, pfba=False):        
         if not isinstance(modelutl,MSModelUtil):
             modelutl = MSModelUtil(modelutl)
         media = self.build_media()
-        output = {"growth":None,"class":None,"missing_transports":[]}
+        results = {"growth":None, "class":None, "missing_transports":[]}
         if add_missing_exchanges:
-            output["missing_transports"] = modelutl.add_missing_exchanges(media)
+            results["missing_transports"] = modelutl.add_missing_exchanges(media)
         pkgmgr = MSPackageManager.get_pkg_mgr(modelutl.model)
-        pkgmgr.getpkg("KBaseMediaPkg").build_package(media,self.parent.base_uptake,self.parent.base_excretion)        
+        pkgmgr.getpkg("KBaseMediaPkg").build_package(media, self.parent.base_uptake, self.parent.base_excretion)        
         for gene in self.gene_ko:
             if gene in modelutl.model.genes:
                 geneobj = modelutl.model.genes.get_by_id(gene)
                 geneobj.knock_out()
         solution = modelutl.model.optimize()
-        output["growth"] = solution.objective_value
+        results["growth"] = solution.objective_value
         if solution.objective_value > 0 and pfba:
             solution = cobra.flux_analysis.pfba(modelutl.model)
         if save_fluxes:
-            output["fluxes"] = solution.fluxes
-        if output["growth"] >= growth_threshold:
+            results["fluxes"] = solution.fluxes
+        if results["growth"] >= growth_threshold:
+            results["class"] = "FP"
             if self.growth > 0:
-                output["class"] = "CP"
-            else:
-                output["class"] = "FP"
+                results["class"] = "CP"
         else:    
+            results["class"] = "CN"
             if self.growth > 0:
-                output["class"] = "FN"
-            else:
-                output["class"] = "CN"
-        return output        
+                results["class"] = "FN"
+        return results        
     
-    def gapfill_model_for_phenotype(self,modelutl,default_gapfill_templates,test_conditions,default_gapfill_models=[],blacklist=[],growth_threshold=0.001,add_missing_exchanges=False):
+    def gapfill_model_for_phenotype(self, modelutl, default_gapfill_templates, test_conditions, default_gapfill_models=[], blacklist=[], growth_threshold=0.001, add_missing_exchanges=False):
         if not isinstance(modelutl,MSModelUtil):
             modelutl = MSModelUtil(modelutl)
         self.gapfilling = MSGapfill(modelutl.model, default_gapfill_templates, default_gapfill_models, test_conditions, modelutl.reaction_scores(), blacklist) 
@@ -82,87 +74,70 @@ def gapfill_model_for_phenotype(self,modelutl,default_gapfill_templates,test_con
         return self.gapfilling.integrate_gapfill_solution(gfresults)
     
 class MSGrowthPhenotypes:
-
-    def __init__(self,base_media=None,base_uptake=0,base_excretion=1000): 
-        self.base_media = base_media
-        self.phenotypes = DictList()
-        self.base_uptake = base_uptake
-        self.base_excretion = base_excretion
+    def __init__(self, base_media=None, base_uptake=0, base_excretion=1000): 
         
+        self.base_media = base_media; self.base_uptake = base_uptake; self.base_excretion = base_excretion
+        self.phenotypes = DictList()
 
     @staticmethod
     def from_compound_hash(compounds,base_media,base_uptake=0,base_excretion=1000):
         growthpheno = MSGrowthPhenotypes(base_media,base_uptake,base_excretion)
-        new_phenos = []
-        for cpd in compounds:
-            newpheno = MSGrowthPhenotype(cpd,None,compounds[cpd],[],[cpd])
-            new_phenos.append(newpheno)
-        growthpheno.add_phenotypes(new_phenos)
+        growthpheno.add_phenotypes([MSGrowthPhenotype(cpd, None, compounds[cpd] ,[] ,[cpd]) for cpd in compounds])
         return growthpheno
     
     @staticmethod
-    def from_kbase_object(data,kbase_api):
+    def from_kbase_object(data, kbase_api):
         growthpheno = MSGrowthPhenotypes(None,0,1000)
         new_phenos = []
         for pheno in data["phenotypes"]:
-            media = kbase_api.get_from_ws(pheno["media_ref"],None)
-            geneko = []
-            for gene in pheno["geneko_refs"]:
-                geneko.append(added_cpd.split("/").pop())
-            added_compounds = []
-            for added_cpd in pheno["additionalcompound_refs"]:
-                added_compounds.append(added_cpd.split("/").pop())
-            newpheno = MSGrowthPhenotype(pheno["id"],media,pheno["normalizedGrowth"],geneko,added_compounds)
-            new_phenos.append(newpheno)
+            new_phenos.append(MSGrowthPhenotype(
+                pheno["id"], kbase_api.get_from_ws(pheno["media_ref"],None),
+                pheno["normalizedGrowth"], 
+                [gene.split("/").pop() for gene in pheno["geneko_refs"]], 
+                [added_cpd.split("/").pop() for added_cpd in pheno["additionalcompound_refs"]]))
         growthpheno.add_phenotypes(new_phenos)                
         return growthpheno
         
     @staticmethod
-    def from_kbase_file(filename,kbase_api):
+    def from_kbase_file(filename, base_media, kbase_api):   # The base_media was called in line 105 but was never defined
         #TSV file with the following headers:media    mediaws    growth    geneko    addtlCpd
+        from pandas import read_table
+        table_df = read_table(filename)
+        
         growthpheno = MSGrowthPhenotypes(base_media,0,1000)
-        headings = []
         new_phenos = []
-        with open(filename) as f:
-            lines = f.readlines()
-            for line in lines:
-                items = line.split("\t")
-                if headings == None:
-                    headings = items
-                else:
-                    data = {}
-                    for i in range(0,len(items)):
-                        data[headings[i]] = items[i]
-                    data = FBAHelper.validate_dictionary(headings,["media","growth"],{"mediaws":None,"geneko":[],"addtlCpd":[]})
-                    media = kbase_api.get_from_ws(data["media"],data["mediaws"])
-                    id = data["media"]
-                    if len(data["geneko"]) > 0:
-                        id += "-"+",".join(data["geneko"])
-                    if len(data["addtlCpd"]) > 0:
-                        id += "-"+",".join(data["addtlCpd"])
-                    newpheno = MSGrowthPhenotype(id,media,data["growth"],data["geneko"],data["addtlCpd"])
-                    new_phenos.append(newpheno)
+        for index, row in table_df.iterrows():
+            data = FBAHelper.validate_dictionary(row.to_dict(), ["media","growth"], {"mediaws":None, "geneko":[], "addtlCpd":[]})
+            media = kbase_api.get_from_ws(data["media"], data["mediaws"])
+            media_id = data["media"]
+            if len(data["geneko"]) > 0:
+                media_id += "-"+",".join(data["geneko"])
+            if len(data["addtlCpd"]) > 0:
+                media_id += "-"+",".join(data["addtlCpd"])
+            new_phenos.append(MSGrowthPhenotype(media_id, media, data["growth"], data["geneko"], data["addtlCpd"]))
         growthpheno.add_phenotypes(new_phenos)                
         return growthpheno
         
     @staticmethod
-    def from_ms_file(filename,basemedia,base_uptake=0,base_excretion=100):
+    def from_ms_file(filename, base_media, base_uptake=0, base_excretion=100):
+        from pandas import read_csv
+        df = read_csv(filename)
+        
         growthpheno = MSGrowthPhenotypes(base_media,base_uptake,base_excretion)
-        df = pd.read_csv (filename)
         required_headers = ["Compounds","Growth"]
         for item in required_headers:
             if item not in df:
-                raise ValueError('Required header '+item+' is missing!')
+                raise ValueError(f'The required header {item} is missing from the CSV file.')
         new_phenos = []
-        for row in df.rows:
+        for index, row in df.iterrows():
             cpds = row["Compounds"].split(";")
-            id = row["Compounds"]
+            cpd_id = row["Compounds"]
             if "ID" in row:
-                id = row["ID"]
+                cpd_id = row["ID"]
             geneko = []
             if "GeneKO" in row:
                 geneko = row["GeneKO"].split(";")
-            newpheno = MSGrowthPhenotype(id,None,row["Growth"],geneko,cpds)
+            newpheno = MSGrowthPhenotype(cpd_id,None,row["Growth"],geneko,cpds)
             new_phenos.append(newpheno)
         growthpheno.add_phenotypes(new_phenos)            
         return growthpheno
@@ -176,33 +151,32 @@ def add_phenotypes(self,new_phenotypes):
         additions = DictList(keep_phenos)
         self.phenotypes += additions
         
-    def simulate_phenotypes(self,model,biomass,add_missing_exchanges=False,correct_false_negatives=False,template=None,growth_threshold=0.001,save_fluxes=False):
+    def simulate_phenotypes(self,model, biomass, add_missing_exchanges=False, correct_false_negatives=False, template=None, growth_threshold=0.001):
+        from pandas import DataFrame
+        data_df = DataFrame(columns = ["Phenotype","Observed growth","Simulated growth","Class","Transports missing","Gapfilled reactions"])
+        
         model.objective = biomass
         modelutl = MSModelUtil(model)
         summary = {"Label":["Accuracy","CP","CN","FP","FN"],"Count":[0,0,0,0,0]}
-        data = {"Phenotype":[],"Observed growth":[],"Simulated growth":[],"Class":[],"Transports missing":[],"Gapfilled reactions":[]}
-        for pheno in self.phenotypes:
+        for index, pheno in enumerate(self.phenotypes):
             with model:
-                result = pheno.simulate(modelutl,growth_threshold,add_missing_exchanges,save_fluxes)#Result should have "growth" and "class"
+                result = pheno.simulate(modelutl,growth_threshold,add_missing_exchanges)
+                gfl = None
                 if result["class"] == "FN" and correct_false_negatives:
                     pheno.gapfill_model_for_phenotype(modelutl,[template],None)
-                    if pheno.gapfilling.last_solution != None:
-                        list = [];
+                    if pheno.gapfilling.last_solution:
+                        gpfl_rxns = []
                         for rxn_id in pheno.gapfilling.last_solution["reversed"]:
-                            list.append(pheno.gapfilling.last_solution["reversed"][rxn_id]+rxn_id)
+                            gpfl_rxns.append(pheno.gapfilling.last_solution["reversed"][rxn_id]+rxn_id)
                         for rxn_id in pheno.gapfilling.last_solution["new"]:
-                            list.append(pheno.gapfilling.last_solution["new"][rxn_id]+rxn_id)
-                        data["Gapfilled reactions"].append(";".join(list))
-                    else:
-                        data["Gapfilled reactions"].append(None)
-                else:
-                    data["Gapfilled reactions"].append(None)
-                result = pheno.simulate(modelutl,growth_threshold,add_missing_exchanges,save_fluxes)#Result should have "growth" and "class"
-                data["Class"].append(result["class"])
-                data["Phenotype"].append(pheno.id)
-                data["Observed growth"].append(pheno.growth)
-                data["Simulated growth"].append(result["growth"])
-                data["Transports missing"].append(";".join(result["missing_transports"]))
+                            gpfl_rxns.append(pheno.gapfilling.last_solution["new"][rxn_id]+rxn_id)
+                        gfl = ";".join(gpfl_rxns)
+                result = pheno.simulate(modelutl,growth_threshold,add_missing_exchanges)
+                data_df.loc[index] = {
+                    "Phenotype":pheno.id, "Observed growth": pheno.growth,
+                    "Simulated growth":result["growth"], "class":result["class"], 
+                    "Transports missing": ";".join(result["missing_transports"]), 
+                    "Gapfilled reactions":gfl}
                 if result["class"] == "CP":
                     summary["Count"][1] += 1
                     summary["Count"][0] += 1
@@ -215,9 +189,8 @@ def simulate_phenotypes(self,model,biomass,add_missing_exchanges=False,correct_f
                     summary["Count"][4] += 1
                 
         summary["Count"][0] = summary["Count"][0]/len(self.phenotypes) 
-        sdf = pd.DataFrame(summary)
-        df = pd.DataFrame(data)
-        logger.info(df)
-        return {"details":df,"summary":sdf}
+        sdf = DataFrame(summary)
+        logger.info(data_df)
+        return {"details":data_df,"summary":sdf}
     
     
\ No newline at end of file

From 3747f294f8d5eaa96ec724eabae11aaebd6ce004 Mon Sep 17 00:00:00 2001
From: Andrew Freiburger <Andrewefreiburger@gmail.com>
Date: Fri, 1 Jul 2022 15:12:36 -0400
Subject: [PATCH 2/5] output restored name

---
 modelseedpy/core/msgrowthphenotypes.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/modelseedpy/core/msgrowthphenotypes.py b/modelseedpy/core/msgrowthphenotypes.py
index a31c887a..dee53e1a 100644
--- a/modelseedpy/core/msgrowthphenotypes.py
+++ b/modelseedpy/core/msgrowthphenotypes.py
@@ -32,9 +32,9 @@ def simulate(self, modelutl, growth_threshold=0.001, add_missing_exchanges=False
         if not isinstance(modelutl,MSModelUtil):
             modelutl = MSModelUtil(modelutl)
         media = self.build_media()
-        results = {"growth":None, "class":None, "missing_transports":[]}
+        output = {"growth":None, "class":None, "missing_transports":[]}
         if add_missing_exchanges:
-            results["missing_transports"] = modelutl.add_missing_exchanges(media)
+            output["missing_transports"] = modelutl.add_missing_exchanges(media)
         pkgmgr = MSPackageManager.get_pkg_mgr(modelutl.model)
         pkgmgr.getpkg("KBaseMediaPkg").build_package(media, self.parent.base_uptake, self.parent.base_excretion)        
         for gene in self.gene_ko:
@@ -42,20 +42,20 @@ def simulate(self, modelutl, growth_threshold=0.001, add_missing_exchanges=False
                 geneobj = modelutl.model.genes.get_by_id(gene)
                 geneobj.knock_out()
         solution = modelutl.model.optimize()
-        results["growth"] = solution.objective_value
+        output["growth"] = solution.objective_value
         if solution.objective_value > 0 and pfba:
             solution = cobra.flux_analysis.pfba(modelutl.model)
         if save_fluxes:
-            results["fluxes"] = solution.fluxes
-        if results["growth"] >= growth_threshold:
-            results["class"] = "FP"
+            output["fluxes"] = solution.fluxes
+        if output["growth"] >= growth_threshold:
+            output["class"] = "FP"
             if self.growth > 0:
-                results["class"] = "CP"
+                output["class"] = "CP"
         else:    
-            results["class"] = "CN"
+            output["class"] = "CN"
             if self.growth > 0:
-                results["class"] = "FN"
-        return results        
+                output["class"] = "FN"
+        return output        
     
     def gapfill_model_for_phenotype(self, modelutl, default_gapfill_templates, test_conditions, default_gapfill_models=[], blacklist=[], growth_threshold=0.001, add_missing_exchanges=False):
         if not isinstance(modelutl,MSModelUtil):

From 6da528b5f48c1069878e2209484e513729fced45 Mon Sep 17 00:00:00 2001
From: Andrew Freiburger <Andrewefreiburger@gmail.com>
Date: Fri, 1 Jul 2022 15:17:25 -0400
Subject: [PATCH 3/5] adjust function parameter order

---
 modelseedpy/core/msgrowthphenotypes.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modelseedpy/core/msgrowthphenotypes.py b/modelseedpy/core/msgrowthphenotypes.py
index dee53e1a..af137e5c 100644
--- a/modelseedpy/core/msgrowthphenotypes.py
+++ b/modelseedpy/core/msgrowthphenotypes.py
@@ -99,7 +99,7 @@ def from_kbase_object(data, kbase_api):
         return growthpheno
         
     @staticmethod
-    def from_kbase_file(filename, base_media, kbase_api):   # The base_media was called in line 105 but was never defined
+    def from_kbase_file(filename, kbase_api, base_media):
         #TSV file with the following headers:media    mediaws    growth    geneko    addtlCpd
         from pandas import read_table
         table_df = read_table(filename)

From 5565e16c1825d7766f948f0f3385ea214ba1652f Mon Sep 17 00:00:00 2001
From: Andrew Freiburger <Andrewefreiburger@gmail.com>
Date: Fri, 22 Jul 2022 21:54:19 -0400
Subject: [PATCH 4/5] msgrowthphenotypes updates

---
 modelseedpy/core/msgrowthphenotypes.py | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/modelseedpy/core/msgrowthphenotypes.py b/modelseedpy/core/msgrowthphenotypes.py
index af137e5c..4d14c2af 100644
--- a/modelseedpy/core/msgrowthphenotypes.py
+++ b/modelseedpy/core/msgrowthphenotypes.py
@@ -13,18 +13,23 @@
 class MSGrowthPhenotype:
     def __init__(self, obj_id, media=None, growth=None, gene_ko=[], additional_compounds=[], parent=None, name=None):
         
-        self.id = obj_id; self.name = name; self.media = media; self.growth = growth; self.gene_ko = gene_ko
-        self.gapfilling = None; self.additional_compounds = additional_compounds; self.parent = parent
+        self.id = obj_id
         self.name = name or obj_id
+        self.growth = growth
+        self.media = media
+        self.gene_ko = gene_ko
+        self.gapfilling = None
+        self.additional_compounds = additional_compounds
+        self.parent = parent
 
     def build_media(self):
         cpd_hash = {}
         for cpd in self.additional_compounds:
             cpd_hash[cpd] = 100
         full_media = MSMedia.from_dict(cpd_hash)
-        if self.media:
+        if self.media is not None:
             full_media.merge(self.media, overwrite_overlap = False)
-        if self.parent and self.parent.base_media:
+        if self.parent is not None and self.parent.base_media is not None:
                 full_media.merge(self.parent.base_media, overwrite_overlap = False)
         return full_media
     
@@ -75,9 +80,10 @@ def gapfill_model_for_phenotype(self, modelutl, default_gapfill_templates, test_
     
 class MSGrowthPhenotypes:
     def __init__(self, base_media=None, base_uptake=0, base_excretion=1000): 
-        
-        self.base_media = base_media; self.base_uptake = base_uptake; self.base_excretion = base_excretion
+        self.base_media = base_media
         self.phenotypes = DictList()
+        self.base_uptake = base_uptake
+        self.base_excretion = base_excretion
 
     @staticmethod
     def from_compound_hash(compounds,base_media,base_uptake=0,base_excretion=1000):
@@ -164,7 +170,7 @@ def simulate_phenotypes(self,model, biomass, add_missing_exchanges=False, correc
                 gfl = None
                 if result["class"] == "FN" and correct_false_negatives:
                     pheno.gapfill_model_for_phenotype(modelutl,[template],None)
-                    if pheno.gapfilling.last_solution:
+                    if pheno.gapfilling.last_solution is not None:
                         gpfl_rxns = []
                         for rxn_id in pheno.gapfilling.last_solution["reversed"]:
                             gpfl_rxns.append(pheno.gapfilling.last_solution["reversed"][rxn_id]+rxn_id)

From e20a96cb6bc198963c833bb5a452160a8b99eb5a Mon Sep 17 00:00:00 2001
From: Andrew Freiburger <Andrewefreiburger@gmail.com>
Date: Thu, 15 Sep 2022 15:21:15 -0500
Subject: [PATCH 5/5] final polish

---
 modelseedpy/core/msgrowthphenotypes.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/modelseedpy/core/msgrowthphenotypes.py b/modelseedpy/core/msgrowthphenotypes.py
index 4d14c2af..6accf440 100644
--- a/modelseedpy/core/msgrowthphenotypes.py
+++ b/modelseedpy/core/msgrowthphenotypes.py
@@ -12,9 +12,8 @@
 
 class MSGrowthPhenotype:
     def __init__(self, obj_id, media=None, growth=None, gene_ko=[], additional_compounds=[], parent=None, name=None):
-        
         self.id = obj_id
-        self.name = name or obj_id
+        self.name = name or self.id
         self.growth = growth
         self.media = media
         self.gene_ko = gene_ko