From a96eaa2b452971e96f52e7fbd01a9dbaededb55a Mon Sep 17 00:00:00 2001 From: Connie Gao Date: Wed, 20 Jul 2016 18:32:20 -0400 Subject: [PATCH 1/5] Remove unused arguments in the KineticsFamily.fillKineticsRulesByAveragingUp function --- rmgpy/data/kinetics/family.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/rmgpy/data/kinetics/family.py b/rmgpy/data/kinetics/family.py index 2a5357741b..31de7636a8 100644 --- a/rmgpy/data/kinetics/family.py +++ b/rmgpy/data/kinetics/family.py @@ -1039,15 +1039,15 @@ def getRootTemplate(self): else: return self.groups.top - def fillKineticsRulesByAveragingUp(self, rootTemplate=None, alreadyDone=None): + def fillKineticsRulesByAveragingUp(self): """ - Fill in gaps in the kinetics rate rules by averaging child nodes. + Fill in gaps in the kinetics rate rules by averaging child nodes + recursively starting from the top level root template. """ - # If no template is specified, then start at the top-level nodes - if rootTemplate is None: - rootTemplate = self.getRootTemplate() - alreadyDone = {} - self.rules.fillRulesByAveragingUp(rootTemplate, alreadyDone) + + self.rules.fillRulesByAveragingUp(self.getRootTemplate(), {}) + + def applyRecipe(self, reactantStructures, forward=True, unique=True): """ From dc39a2565411bbf99c580ad0288ba5e5bd45881d Mon Sep 17 00:00:00 2001 From: Connie Gao Date: Wed, 20 Jul 2016 18:35:46 -0400 Subject: [PATCH 2/5] fillRulesByAveragingUp: Generate distance 1 pairings for each root template As agreed upon by Prof. Green, we will no longer average strict children, instead we will average together manhattan distance 1 nodes and traverse the tree completely before stopping --- rmgpy/data/kinetics/rules.py | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/rmgpy/data/kinetics/rules.py b/rmgpy/data/kinetics/rules.py index 2e57244e2b..0d84bffc3f 100644 --- a/rmgpy/data/kinetics/rules.py +++ b/rmgpy/data/kinetics/rules.py @@ -460,21 +460,26 @@ def fillRulesByAveragingUp(self, rootTemplate, alreadyDone): alreadyDone[rootLabel] = entry.data return entry.data - # Recursively descend to the child nodes - childrenList = [[group] for group in rootTemplate] - for group in childrenList: - parent = group.pop(0) - if len(parent.children) > 0: - group.extend(parent.children) - else: - group.append(parent) - - childrenList = getAllCombinations(childrenList) + + # Generate the distance 1 pairings which must be averaged for this root template. + # The distance 1 template is created by taking the parent node from one or more trees + # and creating the combinations with children from a single remaining tree. + # i.e. for some node (A,B), we want to fetch all combinations for the pairing of (A,B's children) and + # (A's children, B). For node (A,B,C), we would retrieve all combinations of (A,B,C's children) + # (A,B's children,C) etc... + # If a particular node has no children, it is skipped from the children expansion altogether. + + childrenList = [] + for i, parent in enumerate(rootTemplate): + # Start with the root template, and replace the ith member with its children + if parent.children: + childrenSet = [[group] for group in rootTemplate] + childrenSet[i] = parent.children + childrenList.extend(getAllCombinations(childrenSet)) + kineticsList = [] for template in childrenList: label = ';'.join([g.label for g in template]) - if template == rootTemplate: - continue if label in alreadyDone: kinetics = alreadyDone[label] From 00de0c1be86424753ec4cf2e2ea9e939f6070e05 Mon Sep 17 00:00:00 2001 From: Connie Gao Date: Wed, 20 Jul 2016 18:37:10 -0400 Subject: [PATCH 3/5] Set kinetics to the entry's data if it exists, but only AFTER children are explored --- rmgpy/data/kinetics/rules.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/rmgpy/data/kinetics/rules.py b/rmgpy/data/kinetics/rules.py index 0d84bffc3f..fbb0da7793 100644 --- a/rmgpy/data/kinetics/rules.py +++ b/rmgpy/data/kinetics/rules.py @@ -448,18 +448,6 @@ def fillRulesByAveragingUp(self, rootTemplate, alreadyDone): if rootLabel in alreadyDone: return alreadyDone[rootLabel] - - # See if we already have a rate rule for this exact template - entry = self.getRule(rootTemplate) - if entry is not None and entry.rank > 0: - # We already have a rate rule for this exact template - # If the entry has rank of zero, then we have so little faith - # in it that we'd rather use an averaged value if possible - # Since this entry does not have a rank of zero, we keep its - # value - alreadyDone[rootLabel] = entry.data - return entry.data - # Generate the distance 1 pairings which must be averaged for this root template. # The distance 1 template is created by taking the parent node from one or more trees @@ -488,6 +476,18 @@ def fillRulesByAveragingUp(self, rootTemplate, alreadyDone): if kinetics is not None: kineticsList.append([kinetics, template]) + + # See if we already have a rate rule for this exact template instead + # and return it now that we have finished searching its children + entry = self.getRule(rootTemplate) + if entry is not None and entry.rank > 0: + # We already have a rate rule for this exact template + # If the entry has rank of zero, then we have so little faith + # in it that we'd rather use an averaged value if possible + # Since this entry does not have a rank of zero, we keep its + # value + alreadyDone[rootLabel] = entry.data + return entry.data if len(kineticsList) > 0: From 6aaadef018d816615bf97c952bd92e42c11da94b Mon Sep 17 00:00:00 2001 From: Connie Gao Date: Wed, 20 Jul 2016 18:38:58 -0400 Subject: [PATCH 4/5] Change kinetics average commenting, and don't average the kineticsList when there's just one entry Deep copy the kinetics even if one entry is used in the average, because we are going to be modifying the comments and we don't want to modify the original object. Use just the children's template labels for an averaged rule's comments. This modifies the commenting that we used to do, which traces the entire averaging history by recursively appending to the kinetics comments in order to view the actual original templates that factor into the average. Instead this new method of writing the kinetics comments just lists the distance 1 children that in fact has kinetics. In order to do more detailed analysis, one would have to go inside to uncomment the debug lines that use the former kinetics comment style that retains full information. However, this is too memory intensive and difficult to read for normal RMG jobs, and wil print out too much to verbose chemkin output using the original method. --- rmgpy/data/kinetics/rules.py | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/rmgpy/data/kinetics/rules.py b/rmgpy/data/kinetics/rules.py index fbb0da7793..03a2fa5837 100644 --- a/rmgpy/data/kinetics/rules.py +++ b/rmgpy/data/kinetics/rules.py @@ -491,14 +491,30 @@ def fillRulesByAveragingUp(self, rootTemplate, alreadyDone): if len(kineticsList) > 0: - # We found one or more results! Let's average them together - kinetics = self.__getAverageKinetics([k for k, t in kineticsList]) if len(kineticsList) > 1: - kinetics.comment += 'Average of ({0})'.format( - ' + '.join(k.comment if k.comment != '' else ';'.join(g.label for g in t) for k, t in kineticsList)) + # We found one or more results! Let's average them together + kinetics = self.__getAverageKinetics([k for k, t in kineticsList]) + kinetics.comment = 'Average of ({0})'.format( + ' + '.join(';'.join(g.label for g in t) for k, t in kineticsList)) + + # For debug mode: uncomment the following kinetics commenting + # lines and use them instead of the lines above. Caution: large memory usage. + + # kinetics.comment += 'Average of ({0})'.format( + # ' + '.join(k.comment if k.comment != '' else ';'.join(g.label for g in t) for k, t in kineticsList)) + else: k,t = kineticsList[0] - kinetics.comment += k.comment if k.comment != '' else ';'.join(g.label for g in t) + kinetics = deepcopy(k) + # Even though we are using just a single set of kinetics, it's still considered + # an average. It just happens that the other distance 1 children had no data. + kinetics.comment = 'Average of ({0})'.format(';'.join(g.label for g in t)) + + # For debug mode: uncomment the following kinetics commenting + # lines and use them instead of the lines above. Caution: large memory usage. + + # kinetics.comment += 'Average of ({0}).format(k.comment if k.comment != '' else ';'.join(g.label for g in t)) + entry = Entry( index = 0, label = rootLabel, From b21bc48ae066f249f18dcc579f272e93c4a9b400 Mon Sep 17 00:00:00 2001 From: Connie Gao Date: Wed, 20 Jul 2016 18:46:23 -0400 Subject: [PATCH 5/5] Select the rate rules to average based on the norm distance Previously, we were using a distance algorithm that choose the manhattan distance. Now we move to use the Euclidian norm distance instead in hopes of decreasing the usage of very general nodes Speed up norm distance calculation by storing the node's level in the Entry() object when parsing group tree --- rmgpy/data/base.py | 7 +++++-- rmgpy/data/kinetics/rules.py | 27 ++++++++++++++++++++++++++- 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/rmgpy/data/base.py b/rmgpy/data/base.py index b47bb1032f..65106a1505 100644 --- a/rmgpy/data/base.py +++ b/rmgpy/data/base.py @@ -451,7 +451,7 @@ def loadOldDictionary(self, path, pattern): def __loadTree(self, tree): """ - Parse an old-style RMG tree located at `tree`. An RMG tree is an n-ary + Parse an group tree located at `tree`. An RMG tree is an n-ary tree representing the hierarchy of items in the dictionary. """ @@ -498,7 +498,10 @@ def __loadTree(self, tree): else: entry.parent = None self.top.append(entry) - + + # Save the level of the tree into the entry + entry.level = level + # Add node to list of parents for subsequent iteration parents.append(label) diff --git a/rmgpy/data/kinetics/rules.py b/rmgpy/data/kinetics/rules.py index 03a2fa5837..7e70cb764e 100644 --- a/rmgpy/data/kinetics/rules.py +++ b/rmgpy/data/kinetics/rules.py @@ -565,7 +565,25 @@ def __getAverageKinetics(self, kineticsList): E0 = (E0*0.001,"kJ/mol"), ) return averagedKinetics + + def calculateNormDistance(self, template, otherTemplate): + """ + Calculate the norm distance squared between two rate rules with + `template` and `otherTemplate`. The norm distance is + a^2 + b^2 + c^2 .... when a is the distance between the nodes in the + first tree, b is the distance between the nodes in the second tree, etc. + """ + + # Do it the stupid way first and calculate distances from the top + # rather than from each other for now... it's dumb but need to see results first + import numpy + depth = numpy.array([node.level for node in template]) + otherDepth = numpy.array([otherNode.level for otherNode in otherTemplate]) + distance = numpy.array(depth-otherDepth) + norm = numpy.dot(distance,distance) + return norm + def estimateKinetics(self, template, degeneracy=1): """ Determine the appropriate kinetics for a reaction with the given @@ -588,7 +606,14 @@ def getTemplateLabel(template): kineticsList.append([kinetics, t]) if len(kineticsList) > 0: - + + if len(kineticsList) > 1: + # Filter the kinetics to use templates with the lowest minimum euclidean distance + # from the specified template + norms = [self.calculateNormDistance(template, t) for kinetics,t in kineticsList] + minNorm = min(norms) + kineticsList = [pair for pair, norm in zip(kineticsList,norms) if norm == min(norms)] + if len(kineticsList) == 1: kinetics, t = kineticsList[0] # Check whether the exact rate rule for the original template (most specific