From a96eaa2b452971e96f52e7fbd01a9dbaededb55a Mon Sep 17 00:00:00 2001
From: Connie Gao <connieg@mit.edu>
Date: Wed, 20 Jul 2016 18:32:20 -0400
Subject: [PATCH 1/5] Remove unused arguments in the
 KineticsFamily.fillKineticsRulesByAveragingUp function

---
 rmgpy/data/kinetics/family.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/rmgpy/data/kinetics/family.py b/rmgpy/data/kinetics/family.py
index 2a5357741b..31de7636a8 100644
--- a/rmgpy/data/kinetics/family.py
+++ b/rmgpy/data/kinetics/family.py
@@ -1039,15 +1039,15 @@ def getRootTemplate(self):
         else:
             return self.groups.top
     
-    def fillKineticsRulesByAveragingUp(self, rootTemplate=None, alreadyDone=None):
+    def fillKineticsRulesByAveragingUp(self):
         """
-        Fill in gaps in the kinetics rate rules by averaging child nodes.
+        Fill in gaps in the kinetics rate rules by averaging child nodes
+        recursively starting from the top level root template.
         """
-        # If no template is specified, then start at the top-level nodes
-        if rootTemplate is None:
-            rootTemplate = self.getRootTemplate()
-            alreadyDone = {}
-        self.rules.fillRulesByAveragingUp(rootTemplate, alreadyDone)
+        
+        self.rules.fillRulesByAveragingUp(self.getRootTemplate(), {})
+        
+        
 
     def applyRecipe(self, reactantStructures, forward=True, unique=True):
         """

From dc39a2565411bbf99c580ad0288ba5e5bd45881d Mon Sep 17 00:00:00 2001
From: Connie Gao <connieg@mit.edu>
Date: Wed, 20 Jul 2016 18:35:46 -0400
Subject: [PATCH 2/5] fillRulesByAveragingUp: Generate distance 1 pairings for
 each root template

As agreed upon by Prof. Green, we will no
longer average strict children, instead we will
average together manhattan distance 1 nodes and traverse
the tree completely before stopping
---
 rmgpy/data/kinetics/rules.py | 29 +++++++++++++++++------------
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/rmgpy/data/kinetics/rules.py b/rmgpy/data/kinetics/rules.py
index 2e57244e2b..0d84bffc3f 100644
--- a/rmgpy/data/kinetics/rules.py
+++ b/rmgpy/data/kinetics/rules.py
@@ -460,21 +460,26 @@ def fillRulesByAveragingUp(self, rootTemplate, alreadyDone):
             alreadyDone[rootLabel] = entry.data
             return entry.data
         
-        # Recursively descend to the child nodes
-        childrenList = [[group] for group in rootTemplate]
-        for group in childrenList:
-            parent = group.pop(0)
-            if len(parent.children) > 0:
-                group.extend(parent.children)
-            else:
-                group.append(parent)
-                
-        childrenList = getAllCombinations(childrenList)
+
+        # Generate the distance 1 pairings which must be averaged for this root template.
+        # The distance 1 template is created by taking the parent node from one or more trees
+        # and creating the combinations with children from a single remaining tree.  
+        # i.e. for some node (A,B), we want to fetch all combinations for the pairing of (A,B's children) and
+        # (A's children, B).  For node (A,B,C), we would retrieve all combinations of (A,B,C's children) 
+        # (A,B's children,C) etc...  
+        # If a particular node has no children, it is skipped from the children expansion altogether.
+
+        childrenList = []
+        for i, parent in enumerate(rootTemplate):
+            # Start with the root template, and replace the ith member with its children
+            if parent.children:
+                childrenSet = [[group] for group in rootTemplate]
+                childrenSet[i] = parent.children
+                childrenList.extend(getAllCombinations(childrenSet))
+
         kineticsList = []
         for template in childrenList:
             label = ';'.join([g.label for g in template])
-            if template == rootTemplate: 
-                continue
             
             if label in alreadyDone:
                 kinetics = alreadyDone[label]

From 00de0c1be86424753ec4cf2e2ea9e939f6070e05 Mon Sep 17 00:00:00 2001
From: Connie Gao <connieg@mit.edu>
Date: Wed, 20 Jul 2016 18:37:10 -0400
Subject: [PATCH 3/5] Set kinetics to the entry's data if it exists, but only
 AFTER children are explored

---
 rmgpy/data/kinetics/rules.py | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/rmgpy/data/kinetics/rules.py b/rmgpy/data/kinetics/rules.py
index 0d84bffc3f..fbb0da7793 100644
--- a/rmgpy/data/kinetics/rules.py
+++ b/rmgpy/data/kinetics/rules.py
@@ -448,18 +448,6 @@ def fillRulesByAveragingUp(self, rootTemplate, alreadyDone):
         
         if rootLabel in alreadyDone:
             return alreadyDone[rootLabel]
-        
-        # See if we already have a rate rule for this exact template 
-        entry = self.getRule(rootTemplate)
-        if entry is not None and entry.rank > 0:
-            # We already have a rate rule for this exact template
-            # If the entry has rank of zero, then we have so little faith
-            # in it that we'd rather use an averaged value if possible
-            # Since this entry does not have a rank of zero, we keep its
-            # value
-            alreadyDone[rootLabel] = entry.data
-            return entry.data
-        
 
         # Generate the distance 1 pairings which must be averaged for this root template.
         # The distance 1 template is created by taking the parent node from one or more trees
@@ -488,6 +476,18 @@ def fillRulesByAveragingUp(self, rootTemplate, alreadyDone):
             
             if kinetics is not None:
                 kineticsList.append([kinetics, template])
+                
+        # See if we already have a rate rule for this exact template instead
+        # and return it now that we have finished searching its children
+        entry = self.getRule(rootTemplate)
+        if entry is not None and entry.rank > 0:
+            # We already have a rate rule for this exact template
+            # If the entry has rank of zero, then we have so little faith
+            # in it that we'd rather use an averaged value if possible
+            # Since this entry does not have a rank of zero, we keep its
+            # value
+            alreadyDone[rootLabel] = entry.data
+            return entry.data
         
         if len(kineticsList) > 0:
             

From 6aaadef018d816615bf97c952bd92e42c11da94b Mon Sep 17 00:00:00 2001
From: Connie Gao <connieg@mit.edu>
Date: Wed, 20 Jul 2016 18:38:58 -0400
Subject: [PATCH 4/5] Change kinetics average commenting, and don't average the
 kineticsList when there's just one entry

Deep copy the kinetics even if one entry is used
in the average, because we are going to be modifying the comments
and we don't want to modify the original object.

Use just the children's template labels for an averaged rule's comments.
This modifies the commenting that we used to do, which
traces the entire averaging history by recursively
appending to the kinetics comments in order to
view the actual original templates that factor into
the average.  Instead this new method of writing
the kinetics comments just lists the distance 1 children that
in fact has kinetics.  In order to do more detailed analysis, one
would have to go inside to uncomment the debug lines
that use the former kinetics comment style that retains
full information.  However, this is too memory intensive
and difficult to read for normal RMG jobs, and wil print
out too much to verbose chemkin output using the original method.
---
 rmgpy/data/kinetics/rules.py | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/rmgpy/data/kinetics/rules.py b/rmgpy/data/kinetics/rules.py
index fbb0da7793..03a2fa5837 100644
--- a/rmgpy/data/kinetics/rules.py
+++ b/rmgpy/data/kinetics/rules.py
@@ -491,14 +491,30 @@ def fillRulesByAveragingUp(self, rootTemplate, alreadyDone):
         
         if len(kineticsList) > 0:
             
-            # We found one or more results! Let's average them together
-            kinetics = self.__getAverageKinetics([k for k, t in kineticsList])
             if len(kineticsList) > 1:
-                kinetics.comment += 'Average of ({0})'.format(
-                    ' + '.join(k.comment if k.comment != '' else ';'.join(g.label for g in t) for k, t in kineticsList))
+                # We found one or more results! Let's average them together
+                kinetics = self.__getAverageKinetics([k for k, t in kineticsList])
+                kinetics.comment = 'Average of ({0})'.format(
+                     ' + '.join(';'.join(g.label for g in t) for k, t in kineticsList))
+                
+                # For debug mode: uncomment the following kinetics commenting
+                # lines and use them instead of the lines above. Caution: large memory usage.
+
+                # kinetics.comment += 'Average of ({0})'.format(
+                #     ' + '.join(k.comment if k.comment != '' else ';'.join(g.label for g in t) for k, t in kineticsList))
+
             else:
                 k,t = kineticsList[0]
-                kinetics.comment += k.comment if k.comment != '' else ';'.join(g.label for g in t)
+                kinetics = deepcopy(k)
+                # Even though we are using just a single set of kinetics, it's still considered
+                # an average.  It just happens that the other distance 1 children had no data.
+                kinetics.comment = 'Average of ({0})'.format(';'.join(g.label for g in t))
+                
+                # For debug mode: uncomment the following kinetics commenting
+                # lines and use them instead of the lines above. Caution: large memory usage.
+
+                # kinetics.comment += 'Average of ({0}).format(k.comment if k.comment != '' else ';'.join(g.label for g in t))
+            
             entry = Entry(
                 index = 0,
                 label = rootLabel,

From b21bc48ae066f249f18dcc579f272e93c4a9b400 Mon Sep 17 00:00:00 2001
From: Connie Gao <connieg@mit.edu>
Date: Wed, 20 Jul 2016 18:46:23 -0400
Subject: [PATCH 5/5] Select the rate rules to average based on the norm
 distance

Previously, we were using a distance algorithm that choose
the manhattan distance.  Now we move to use the Euclidian norm
distance instead in hopes of decreasing the usage of very general nodes

Speed up norm distance calculation by storing the node's level
in the Entry() object when parsing group tree
---
 rmgpy/data/base.py           |  7 +++++--
 rmgpy/data/kinetics/rules.py | 27 ++++++++++++++++++++++++++-
 2 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/rmgpy/data/base.py b/rmgpy/data/base.py
index b47bb1032f..65106a1505 100644
--- a/rmgpy/data/base.py
+++ b/rmgpy/data/base.py
@@ -451,7 +451,7 @@ def loadOldDictionary(self, path, pattern):
 
     def __loadTree(self, tree):
         """
-        Parse an old-style RMG tree located at `tree`. An RMG tree is an n-ary
+        Parse an group tree located at `tree`. An RMG tree is an n-ary
         tree representing the hierarchy of items in the dictionary.
         """
 
@@ -498,7 +498,10 @@ def __loadTree(self, tree):
                 else:
                     entry.parent = None
                     self.top.append(entry)
-
+                    
+                # Save the level of the tree into the entry
+                entry.level = level
+                
                 # Add node to list of parents for subsequent iteration
                 parents.append(label)
 
diff --git a/rmgpy/data/kinetics/rules.py b/rmgpy/data/kinetics/rules.py
index 03a2fa5837..7e70cb764e 100644
--- a/rmgpy/data/kinetics/rules.py
+++ b/rmgpy/data/kinetics/rules.py
@@ -565,7 +565,25 @@ def __getAverageKinetics(self, kineticsList):
             E0 = (E0*0.001,"kJ/mol"),
         )
         return averagedKinetics
+    
+    def calculateNormDistance(self, template, otherTemplate):
+        """
+        Calculate the norm distance squared between two rate rules with
+        `template` and `otherTemplate`.  The norm distance is 
+        a^2 + b^2 + c^2 .... when a is the distance between the nodes in the
+        first tree, b is the distance between the nodes in the second tree, etc.
+        """
+        
+        # Do it the stupid way first and calculate distances from the top 
+        # rather than from each other for now... it's dumb but need to see results first
+        import numpy
+        depth = numpy.array([node.level for node in template])
+        otherDepth = numpy.array([otherNode.level for otherNode in otherTemplate])
 
+        distance = numpy.array(depth-otherDepth)
+        norm = numpy.dot(distance,distance)
+        return norm
+        
     def estimateKinetics(self, template, degeneracy=1):
         """
         Determine the appropriate kinetics for a reaction with the given
@@ -588,7 +606,14 @@ def getTemplateLabel(template):
                 kineticsList.append([kinetics, t])
             
             if len(kineticsList) > 0:                 
-                                
+                
+                if len(kineticsList) > 1:
+                    # Filter the kinetics to use templates with the lowest minimum euclidean distance 
+                    # from the specified template
+                    norms = [self.calculateNormDistance(template, t) for kinetics,t in kineticsList]
+                    minNorm = min(norms) 
+                    kineticsList = [pair for pair, norm in zip(kineticsList,norms) if norm == min(norms)]
+                    
                 if len(kineticsList) == 1:
                     kinetics, t = kineticsList[0]
                     # Check whether the exact rate rule for the original template (most specific