From 3a93b3d8fd0c09c5d601b3bfc7eb61e22a4c9e37 Mon Sep 17 00:00:00 2001
From: Richard West <r.west@neu.edu>
Date: Wed, 28 Nov 2012 17:49:16 -0500
Subject: [PATCH 01/39] Create generateThermoDataForListOfSpecies() method in
 CoreEdgeReactionModel

This is so we can do parallelization etc within this method.
For now it's very simple and just the same as before.
---
 rmgpy/rmg/model.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/rmgpy/rmg/model.py b/rmgpy/rmg/model.py
index af32ad5146..2927dfe56f 100644
--- a/rmgpy/rmg/model.py
+++ b/rmgpy/rmg/model.py
@@ -611,8 +611,7 @@ def enlarge(self, newObject):
             
         # Generate thermodynamics of new species
         logging.info('Generating thermodynamics for new species...')
-        for spec in newSpeciesList:
-            spec.generateThermoData(database)
+        self.generateThermoDataForListOfSpecies(newSpeciesList)
         
         # Generate kinetics of new reactions
         logging.info('Generating kinetics for new reactions...')
@@ -658,6 +657,15 @@ def enlarge(self, newObject):
         )
 
         logging.info('')
+    
+    def generateThermoDataForListOfSpecies(self, listOfSpecies):
+        """
+        Generates the thermo data for a list of species.
+        
+        Results are stored in the species objects themselves.
+        """
+        for spec in listOfSpecies:
+            spec.generateThermoData(database)
 
     def processNewReactions(self, newReactions, newSpecies, pdepNetwork=None):
         """

From 6988ab2d05d6da8dd2729306fc4b20ddff5bf684 Mon Sep 17 00:00:00 2001
From: Richard West <r.west@neu.edu>
Date: Wed, 28 Nov 2012 19:13:37 -0500
Subject: [PATCH 02/39] An attempt at using SCOOP to parallelize the thermo
 data estimation.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Run as python -m scoop -vv rmg.py path/to/input.py

This is VERY slow, as we're pickling, passing, and unpickling
the entire database every time we try to evaluate a species.

…but it works :)
---
 rmgpy/rmg/model.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/rmgpy/rmg/model.py b/rmgpy/rmg/model.py
index 2927dfe56f..296dba0be9 100644
--- a/rmgpy/rmg/model.py
+++ b/rmgpy/rmg/model.py
@@ -37,6 +37,8 @@
 import numpy
 import os.path
 
+from scoop import futures
+
 from rmgpy.display import display
 
 import rmgpy.constants as constants
@@ -54,6 +56,9 @@
 from pdep import PDepReaction, PDepNetwork, PressureDependenceError
 
 
+def makeThermoForSpecies(spec, database=None):
+    spec.generateThermoData(database)
+    return spec.thermo
 ################################################################################
 
 class Species(rmgpy.species.Species):
@@ -664,8 +669,13 @@ def generateThermoDataForListOfSpecies(self, listOfSpecies):
         
         Results are stored in the species objects themselves.
         """
-        for spec in listOfSpecies:
-            spec.generateThermoData(database)
+        # this works without scoop:
+        #outputs = map((lambda spec: makeThermoForSpecies(spec, database=rmgpy.data.rmg.database)), listOfSpecies)
+        # this tried so do it via scoop's map:
+        outputs = futures.map(makeThermoForSpecies, listOfSpecies, database=rmgpy.data.rmg.database)
+        for spec, thermo in zip(listOfSpecies, outputs):
+            spec.thermo = thermo
+
 
     def processNewReactions(self, newReactions, newSpecies, pdepNetwork=None):
         """

From 5023117a78f29d7f8ea9f05dce193f82d9f1522d Mon Sep 17 00:00:00 2001
From: Richard West <r.west@neu.edu>
Date: Wed, 3 Apr 2013 18:57:11 -0400
Subject: [PATCH 03/39] Pickling and unpickle database for scoop.

This is the start of a framework for sharing the database
across multiple workers with scoop.

Saving is a method of rmgpy.data.rmg.
Loading is done when needed.

The filename used to store the database pickle is set via the environment variable
RMG_DB_FILE.

Call it with something like:
 RMG_DB_FILE=$PWD/database.pkl python -m scoop -vv rmg.py path/to/input.py
---
 rmgpy/data/rmg.py  | 16 ++++++++++++++++
 rmgpy/rmg/main.py  |  5 +++++
 rmgpy/rmg/model.py | 23 ++++++++++++++++++-----
 3 files changed, 39 insertions(+), 5 deletions(-)

diff --git a/rmgpy/data/rmg.py b/rmgpy/data/rmg.py
index 80be92a6a4..92f8203c1f 100644
--- a/rmgpy/data/rmg.py
+++ b/rmgpy/data/rmg.py
@@ -34,6 +34,7 @@
 """
 
 import os.path
+import logging
 
 from base import ForbiddenStructures
 from thermo import ThermoDatabase
@@ -174,3 +175,18 @@ def saveOld(self, path):
         self.forbiddenStructures.saveOld(os.path.join(path, 'ForbiddenStructures.txt'))
         self.kinetics.saveOld(path)
         self.statmech.saveOld(path)
+        
+    def saveToPickle(self, path):
+        """
+        Save the database to a pickle file.
+        
+        This is so that other workers (in a parallel computing environment)
+        can load it easily from disk.
+        """
+        import cPickle
+        global databaseFilePath
+        databaseFilePath = path
+        logging.info('Saving database pickle file {0!s}'.format(databaseFilePath))
+        f = open(path, 'wb')
+        cPickle.dump(self, f, cPickle.HIGHEST_PROTOCOL)
+        f.close()
diff --git a/rmgpy/rmg/main.py b/rmgpy/rmg/main.py
index eb4ca06199..7e2c747e6c 100644
--- a/rmgpy/rmg/main.py
+++ b/rmgpy/rmg/main.py
@@ -262,6 +262,11 @@ def initialize(self, args):
         
         # Load databases
         self.loadDatabase()
+        
+        # Save database pickle
+        #databaseFilePath = os.path.join(self.outputDirectory,'database.pkl')
+        databaseFilePath = os.environ['RMG_DB_FILE']
+        self.database.saveToPickle(databaseFilePath)
     
         # Set wall time
         if args.walltime == '0': 
diff --git a/rmgpy/rmg/model.py b/rmgpy/rmg/model.py
index 296dba0be9..fcdd3c5cdd 100644
--- a/rmgpy/rmg/model.py
+++ b/rmgpy/rmg/model.py
@@ -55,10 +55,24 @@
 
 from pdep import PDepReaction, PDepNetwork, PressureDependenceError
 
+__database = None
 
-def makeThermoForSpecies(spec, database=None):
-    spec.generateThermoData(database)
+def makeThermoForSpecies(spec):
+    """
+    Make thermo for a species.
+    """
+    global __database
+    if __database == None:
+        """Load the database from some pickle file"""
+        import cPickle, logging
+        filename = os.environ['RMG_DB_FILE']
+        logging.info('Loading database pickle file from {0!r}'.format(filename))
+        f = open(filename, 'rb')
+        __database = cPickle.load(f)
+        f.close()
+    spec.generateThermoData(__database)
     return spec.thermo
+
 ################################################################################
 
 class Species(rmgpy.species.Species):
@@ -670,13 +684,12 @@ def generateThermoDataForListOfSpecies(self, listOfSpecies):
         Results are stored in the species objects themselves.
         """
         # this works without scoop:
-        #outputs = map((lambda spec: makeThermoForSpecies(spec, database=rmgpy.data.rmg.database)), listOfSpecies)
+        #outputs = map(makeThermoForSpecies, listOfSpecies)
         # this tried so do it via scoop's map:
-        outputs = futures.map(makeThermoForSpecies, listOfSpecies, database=rmgpy.data.rmg.database)
+        outputs = futures.map(makeThermoForSpecies, listOfSpecies)
         for spec, thermo in zip(listOfSpecies, outputs):
             spec.thermo = thermo
 
-
     def processNewReactions(self, newReactions, newSpecies, pdepNetwork=None):
         """
         Process a list of newly-generated reactions involving the new core

From 7b711191b478eb9586ee535851531eb9068a687f Mon Sep 17 00:00:00 2001
From: Richard West <r.west@neu.edu>
Date: Wed, 27 Mar 2013 15:36:06 -0400
Subject: [PATCH 04/39] Added scoop example input file and LSF submission file.

The example is a copy of the methylformate example.
The lsf.sh script should be submitted to the LSF queuing system.
This is the system used on "Venture" at Northeastern University.

Submit with "bsub < lsf.sh".

Apparently Lava is related, so it may work for that also.
http://en.wikipedia.org/wiki/Platform_LSF

Equivalent ones should probably be written for PBS, etc.

NB. the RMG_DB_FILE environment variable is required.
---
 examples/rmg/scoop/input.py | 159 ++++++++++++++++++++++++++++++++++++
 examples/rmg/scoop/lsf.sh   |  60 ++++++++++++++
 2 files changed, 219 insertions(+)
 create mode 100644 examples/rmg/scoop/input.py
 create mode 100755 examples/rmg/scoop/lsf.sh

diff --git a/examples/rmg/scoop/input.py b/examples/rmg/scoop/input.py
new file mode 100644
index 0000000000..40efdda024
--- /dev/null
+++ b/examples/rmg/scoop/input.py
@@ -0,0 +1,159 @@
+# Data sources
+database(
+    thermoLibraries = ['primaryThermoLibrary','DFT_QCI_thermo','GRI-Mech3.0'],
+    reactionLibraries = [('Methylformate',False),('Glarborg/highP',False)],
+    seedMechanisms = ['Glarborg/C2'],
+    kineticsDepositories = ['training'],
+    kineticsFamilies = ['!Intra_Disproportionation'],
+    kineticsEstimator = 'rate rules',
+)
+
+# List of species
+species(
+    label='Mfmt',
+    reactive=True,
+    structure=SMILES("COC=O"),
+)
+species(
+    label='O2',
+    reactive=True,
+    structure=SMILES("[O][O]"),
+)
+species(
+    label='C2H',
+    reactive=True,
+    structure=SMILES("C#[C]"),
+)
+species(
+    label='CH',
+    reactive=True,
+    structure=adjacencyList(
+        """
+        1     C     3 {2,S}
+        2     H     0 {1,S}
+        """),
+)
+species(
+    label='H2O',
+    reactive=True,
+    structure=SMILES("O"),
+)
+species(
+    label='H2',
+    reactive=True,
+    structure=SMILES("[H][H]"),
+)
+species(
+    label='CO',
+    reactive=True,
+    structure=SMILES("[C]=O"),
+)
+species(
+    label='CO2',
+    reactive=True,
+    structure=SMILES("C(=O)=O"),
+)
+species(
+    label='CH4',
+    reactive=True,
+    structure=SMILES("C"),
+)
+species(
+    label='CH3',
+    reactive=True,
+    structure=SMILES("[CH3]"),
+)
+species(
+    label='CH3OH',
+    reactive=True,
+    structure=SMILES("CO"),
+)
+species(
+    label='C2H4',
+    reactive=True,
+    structure=SMILES("C=C"),
+)
+species(
+    label='C2H2',
+    reactive=True,
+    structure=SMILES("C#C"),
+)
+species(
+    label='CH2O',
+    reactive=True,
+    structure=SMILES("C=O"),
+)
+species(
+    label='CH3CHO',
+    reactive=True,
+    structure=SMILES("CC=O"),
+)
+
+
+# Bath gas
+species(
+    label='Ar',
+    reactive=False,
+    structure=InChI("InChI=1S/Ar"),
+)
+
+# Reaction systems
+simpleReactor(
+    temperature=(650,'K'),
+    pressure=(1.0,'bar'),
+    initialMoleFractions={
+        "Mfmt": 0.01,
+        "O2": 0.02,
+        "Ar": 0.08,
+    },
+    terminationTime=(0.5,'s'),
+)
+simpleReactor(
+    temperature=(1350,'K'),
+    pressure=(3.0,'bar'),
+    initialMoleFractions={
+        "Mfmt": 0.01,
+        "O2": 0.02,
+        "Ar": 0.97,
+    },
+    terminationTime=(0.5,'s'),
+)
+simpleReactor(
+    temperature=(1950,'K'),
+    pressure=(10.0,'bar'),
+    initialMoleFractions={
+        "Mfmt": 0.01,
+        "O2": 0.02,
+        "Ar": 0.97,
+    },
+    terminationTime=(0.5,'s'),
+)
+
+simulator(
+    atol=1e-22,
+    rtol=1e-8,
+)
+
+model(
+    toleranceKeepInEdge=0.0,
+    toleranceMoveToCore=0.0005,
+    toleranceInterruptSimulation=1.0,
+    maximumEdgeSpecies=100000
+)
+
+pressureDependence(
+    method='modified strong collision', # 'reservoir state'
+    maximumGrainSize=(1.0,'kcal/mol'),
+    minimumNumberOfGrains=200,
+    temperatures=(290,3500,'K',8),
+    pressures=(0.02,100,'bar',5),
+    interpolation=('Chebyshev', 6, 4),
+)
+
+options(
+    units='si',
+    saveRestartPeriod=None,
+    drawMolecules=False,
+    generatePlots=False,
+    saveConcentrationProfiles=True,
+)
diff --git a/examples/rmg/scoop/lsf.sh b/examples/rmg/scoop/lsf.sh
new file mode 100755
index 0000000000..dfa2e739c2
--- /dev/null
+++ b/examples/rmg/scoop/lsf.sh
@@ -0,0 +1,60 @@
+#!/bin/sh
+#BSUB -o RMG.out
+#BSUB -J RMGPyScoop
+#BSUB -n 8
+#BSUB -e error_log
+#BSUB -q medium_priority
+
+# This is a job submission file for a LSF queuing system to run
+# the SCOOP-enabled parallel version of RMG-Py across 8 CPUs on
+# a number of different compute nodes on a (potentially heterogeneous) cluster.
+
+source ~/.bash_profile
+
+LAMHOST_FILE=hosts
+
+# start a new host file from scratch
+rm -f $LAMHOST_FILE
+touch $LAMHOST_FILE
+# echo "# LAMMPI host file created by LSF on `date`" >> $LAMHOST_FILE
+# check if we were able to start writing the conf file
+if [ -f $LAMHOST_FILE ]; then
+	:
+else
+	echo "$0: can't create $LAMHOST_FILE"
+	exit 1
+fi
+HOST=""
+NUM_PROC=""
+FLAG=""
+TOTAL_CPUS=0
+for TOKEN in $LSB_MCPU_HOSTS
+do
+	if [ -z "$FLAG" ]; then
+		HOST="$TOKEN"
+		FLAG="0"
+	else
+		NUM_PROC="$TOKEN"
+		TOTAL_CPUS=`expr $TOTAL_CPUS + $NUM_PROC`
+		FLAG="1"
+	fi
+	if [ "$FLAG" = "1" ]; then
+		_x=0
+		while [ $_x -lt $NUM_PROC ]
+		do
+			echo "$HOST" >>$LAMHOST_FILE
+			_x=`expr $_x + 1`
+		done
+		# get ready for the next host
+		FLAG=""
+		HOST=""
+		NUM_PROC=""
+	fi
+done
+# last thing added to LAMHOST_FILE
+#echo "# end of LAMHOST file" >> $LAMHOST_FILE
+echo "Your lamboot hostfile looks like:"
+cat $LAMHOST_FILE
+
+export RMG_DB_FILE=$PWD/database.pkl
+python -m scoop -vv --hostfile $LAMHOST_FILE $RMGpy/rmg.py input.py > RMG.stdout.log

From 7fca647e4b2b6fed9d84ba2111c72fe04982697e Mon Sep 17 00:00:00 2001
From: Richard West <r.west@neu.edu>
Date: Fri, 5 Apr 2013 15:26:57 -0400
Subject: [PATCH 05/39] Added an rmgpy.utilities module with path_checksum()
 function.

This function returns the checksum (hash) of a list of files/folders,
eg.
  hash = rmgpy.utilities.path_checksum(['path/to/database'])

This will be useful for checking whether things have changed, for cache validation.
---
 rmgpy/utilities.py | 52 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)
 create mode 100644 rmgpy/utilities.py

diff --git a/rmgpy/utilities.py b/rmgpy/utilities.py
new file mode 100644
index 0000000000..3a7ec28ba8
--- /dev/null
+++ b/rmgpy/utilities.py
@@ -0,0 +1,52 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+'''
+Created on Apr 5, 2013
+
+@author: rwest
+
+
+'''
+
+import hashlib
+from os.path import normpath, walk, isdir, isfile, dirname, basename, \
+    exists as path_exists, join as path_join
+
+def path_checksum(paths):
+    """
+    Recursively calculates a checksum representing the contents of all files
+    found with a sequence of file and/or directory paths.
+    
+    eg. path_checksum(['/tmp'])
+
+    Based on post by David Moss  at
+    http://code.activestate.com/recipes/576973-getting-the-sha-1-or-md5-hash-of-a-directory/
+    """
+    if not hasattr(paths, '__iter__'):
+        raise TypeError('sequence or iterable expected not %r!' % type(paths))
+
+    def _update_checksum(checksum, dirname, filenames):
+        for filename in sorted(filenames):
+            path = path_join(dirname, filename)
+            if isfile(path):
+                #print path
+                fh = open(path, 'rb')
+                while 1:
+                    buf = fh.read(4096)
+                    if not buf : break
+                    checksum.update(buf)
+                fh.close()
+
+    chksum = hashlib.sha1()
+
+    for path in sorted([normpath(f) for f in paths]):
+        if path_exists(path):
+            if isdir(path):
+                walk(path, _update_checksum, chksum)
+            elif isfile(path):
+                _update_checksum(chksum, dirname(path), basename(path))
+
+    return chksum.hexdigest()
+
+if __name__ == '__main__':
+    print path_checksum([r'/tmp', '/etc/hosts'])

From 6800963bf1bc47a989071adcdbfc1b4684cd99e9 Mon Sep 17 00:00:00 2001
From: Richard West <r.west@neu.edu>
Date: Fri, 5 Apr 2013 14:57:40 -0400
Subject: [PATCH 06/39] Cache the RMG Database in a pickle and a hash, to speed
 up reloading it.

We hash a bunch of metadata to try to be sure that the cached database is the
same as what would be loaded if you loaded it from scratch. Hopefully I have
included everything that matters.
---
 rmgpy/data/__init__.py |  5 +++
 rmgpy/rmg/main.py      | 72 +++++++++++++++++++++++++++++++++++++++---
 2 files changed, 72 insertions(+), 5 deletions(-)

diff --git a/rmgpy/data/__init__.py b/rmgpy/data/__init__.py
index e4558f7c1a..afa4dea966 100644
--- a/rmgpy/data/__init__.py
+++ b/rmgpy/data/__init__.py
@@ -31,4 +31,9 @@
 import os.path
 
 def getDatabaseDirectory():
+    raise NotImplementedError("This is wrong.")
     return os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', 'database'))
+
+def getSourceHash():
+    import rmgpy.utilities
+    return rmgpy.utilities.path_checksum([os.path.dirname(__file__)])
\ No newline at end of file
diff --git a/rmgpy/rmg/main.py b/rmgpy/rmg/main.py
index 7e2c747e6c..5a6a371642 100644
--- a/rmgpy/rmg/main.py
+++ b/rmgpy/rmg/main.py
@@ -193,7 +193,67 @@ def saveInput(self, path=None):
         saveInputFile(path, self)
         
     def loadDatabase(self):
+        """
+        Load the RMG Database.
+        
+        The data is loaded from self.databaseDirectory, according to settings in:
         
+        * self.thermoLibraries
+        * self.reactionLibraries
+        * self.seedMechanisms
+        * self.kineticsFamilies
+        * self.kineticsDepositories
+         
+        If `self.kineticsEstimator == 'rate rules'` then the training set values are 
+        added and the blanks are filled in by averaging.
+        
+        If self.outputDirectory contains :file:`database.pkl` and :file:`database.hash` files then
+        these are checked for validity and used as a cache. Once loaded (and averages filled 
+        in if necessary) then a cache (pickle and hash) is saved.
+        """
+        import inspect, hashlib, cPickle, rmgpy.utilities
+        
+        # Make a hash of everything that could alter the contents of the database once it is fully loaded.
+        # Then we can compare this hash to the cached file to see if the cache is valid.
+        database_metadata = {
+            'path': self.databaseDirectory,
+            'database hash': rmgpy.utilities.path_checksum([self.databaseDirectory]),
+            'thermoLibraries': self.thermoLibraries,
+            'reactionLibraries': [library for library, option in self.reactionLibraries],
+            'seedMechanisms': self.seedMechanisms,
+            'kineticsFamilies': self.kineticsFamilies,
+            'kineticsDepositories': self.kineticsDepositories,
+            #'frequenciesLibraries': self.statmechLibraries,
+            'kineticsEstimator': self.kineticsEstimator,
+            'rmgpy.data source hash': rmgpy.data.getSourceHash(),
+            'this source hash': hashlib.sha1(inspect.getsource(self.__class__)).hexdigest(),
+            }
+        database_hash = hashlib.sha1(cPickle.dumps(database_metadata)).hexdigest()
+        cache_hash_file = os.path.join(self.outputDirectory,'database.hash')
+        cache_pickle_file = os.path.join(self.outputDirectory,'database.pkl')
+        if not os.path.exists(cache_pickle_file):
+            logging.info("Couldn't find a database cache file {0!r} so will reload from source.".format(cache_pickle_file))
+        elif not os.path.exists(cache_hash_file):
+            logging.info("Couldn't find database cache hash file {0!r} to validate cache so will reload from source.".format(cache_hash_file))
+        else:
+            if database_hash != open(cache_hash_file,'r').read():
+                logging.info("According to hash file, it looks like database cache is not valid. Will clear it and reload.")
+                os.unlink(cache_hash_file)
+                os.unlink(cache_pickle_file)
+            else:
+                logging.info("According to hash file, it looks like database cache is valid.")
+                database = cPickle.load(open(cache_pickle_file, 'rb'))
+                # Check the database from the pickle really does have the hash in the database.hash file.
+                if database.hash == database_hash:
+                    logging.info("Database loaded from {0} has correct hash. Will use this cache.".format(cache_pickle_file))
+                    self.database = database
+                    rmgpy.data.rmg.database = database # we need to store it in this module level variable too!
+                    return
+                else:
+                    logging.info("Database loaded from {0} has INCORRECT hash. Will clear the cache and reload.".format(cache_pickle_file))
+                    os.unlink(cache_hash_file)
+                    os.unlink(cache_pickle_file)
+
         self.database = RMGDatabase()
         self.database.load(
             path = self.databaseDirectory,
@@ -212,6 +272,13 @@ def loadDatabase(self):
             logging.info('Filling in rate rules in kinetics families by averaging...')
             for family in self.database.kinetics.families.values():
                 family.fillKineticsRulesByAveragingUp()
+                
+        self.database.hash = database_hash # store the hash in the database so we can check it when it is next pickled.
+        logging.info("Saving database cache in {0!r}".format(cache_pickle_file))
+        self.database.saveToPickle(cache_pickle_file)
+        with open(cache_hash_file,'w') as f:
+            f.write(database_hash)
+        
     
     def initialize(self, args):
         """
@@ -262,11 +329,6 @@ def initialize(self, args):
         
         # Load databases
         self.loadDatabase()
-        
-        # Save database pickle
-        #databaseFilePath = os.path.join(self.outputDirectory,'database.pkl')
-        databaseFilePath = os.environ['RMG_DB_FILE']
-        self.database.saveToPickle(databaseFilePath)
     
         # Set wall time
         if args.walltime == '0': 

From 6c12a790111db02bc2efeb31bb21c0f52a7c8f05 Mon Sep 17 00:00:00 2001
From: Richard West <r.west@neu.edu>
Date: Fri, 5 Apr 2013 15:08:55 -0400
Subject: [PATCH 07/39] Use scoop.shared module to share database cache
 location and hash.

We pass around the location and hash of the database cache file so
that each worker can load it and check it has the right version.

This removes the need for the "RMG_DB_FILE" environment variable.
---
 examples/rmg/scoop/lsf.sh | 1 -
 rmgpy/rmg/main.py         | 3 ++-
 rmgpy/rmg/model.py        | 7 +++++--
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/examples/rmg/scoop/lsf.sh b/examples/rmg/scoop/lsf.sh
index dfa2e739c2..b559fb0224 100755
--- a/examples/rmg/scoop/lsf.sh
+++ b/examples/rmg/scoop/lsf.sh
@@ -56,5 +56,4 @@ done
 echo "Your lamboot hostfile looks like:"
 cat $LAMHOST_FILE
 
-export RMG_DB_FILE=$PWD/database.pkl
 python -m scoop -vv --hostfile $LAMHOST_FILE $RMGpy/rmg.py input.py > RMG.stdout.log
diff --git a/rmgpy/rmg/main.py b/rmgpy/rmg/main.py
index 5a6a371642..1e46fc4204 100644
--- a/rmgpy/rmg/main.py
+++ b/rmgpy/rmg/main.py
@@ -211,7 +211,7 @@ def loadDatabase(self):
         these are checked for validity and used as a cache. Once loaded (and averages filled 
         in if necessary) then a cache (pickle and hash) is saved.
         """
-        import inspect, hashlib, cPickle, rmgpy.utilities
+        import inspect, hashlib, cPickle, rmgpy.utilities, scoop.shared
         
         # Make a hash of everything that could alter the contents of the database once it is fully loaded.
         # Then we can compare this hash to the cached file to see if the cache is valid.
@@ -231,6 +231,7 @@ def loadDatabase(self):
         database_hash = hashlib.sha1(cPickle.dumps(database_metadata)).hexdigest()
         cache_hash_file = os.path.join(self.outputDirectory,'database.hash')
         cache_pickle_file = os.path.join(self.outputDirectory,'database.pkl')
+        scoop.shared.setConst(databaseFile=cache_pickle_file, databaseHash=database_hash)
         if not os.path.exists(cache_pickle_file):
             logging.info("Couldn't find a database cache file {0!r} so will reload from source.".format(cache_pickle_file))
         elif not os.path.exists(cache_hash_file):
diff --git a/rmgpy/rmg/model.py b/rmgpy/rmg/model.py
index fcdd3c5cdd..b4a6366bb4 100644
--- a/rmgpy/rmg/model.py
+++ b/rmgpy/rmg/model.py
@@ -37,6 +37,7 @@
 import numpy
 import os.path
 
+import scoop
 from scoop import futures
 
 from rmgpy.display import display
@@ -65,11 +66,13 @@ def makeThermoForSpecies(spec):
     if __database == None:
         """Load the database from some pickle file"""
         import cPickle, logging
-        filename = os.environ['RMG_DB_FILE']
-        logging.info('Loading database pickle file from {0!r}'.format(filename))
+        filename = scoop.shared.getConst('databaseFile')
+        database_hash = scoop.shared.getConst('databaseHash')
+        logging.info('Loading database pickle file from {0!r} on worker {1}'.format(filename, scoop.WORKER_NAME.decode() ))
         f = open(filename, 'rb')
         __database = cPickle.load(f)
         f.close()
+        assert __database.hash == database_hash, "Database loaded from {0!r} doesn't match expected hash!".format(filename)
     spec.generateThermoData(__database)
     return spec.thermo
 

From 507a87546e9f529375b623f46538e5e9ce9bfdf3 Mon Sep 17 00:00:00 2001
From: keceli <keceli@gmail.com>
Date: Wed, 28 Aug 2013 12:39:27 -0400
Subject: [PATCH 08/39] First attempt to add QM capability with scoop.

---
 rmgpy/rmg/model.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/rmgpy/rmg/model.py b/rmgpy/rmg/model.py
index 9632148cdd..4a17cdb0b9 100644
--- a/rmgpy/rmg/model.py
+++ b/rmgpy/rmg/model.py
@@ -64,7 +64,7 @@
 
 __database = None
 
-def makeThermoForSpecies(spec):
+def makeThermoForSpecies(spec,qmValue=None):
     """
     Make thermo for a species.
     """
@@ -74,12 +74,12 @@ def makeThermoForSpecies(spec):
         import cPickle, logging
         filename = scoop.shared.getConst('databaseFile')
         database_hash = scoop.shared.getConst('databaseHash')
-        logging.info('Loading database pickle file from {0!r} on worker {1}'.format(filename, scoop.WORKER_NAME.decode() ))
+        logging.info('Loading database pickle2 file from {0!r} on worker {1}'.format(filename, scoop.WORKER_NAME.decode() ))
         f = open(filename, 'rb')
         __database = cPickle.load(f)
         f.close()
         assert __database.hash == database_hash, "Database loaded from {0!r} doesn't match expected hash!".format(filename)
-    spec.generateThermoData(__database)
+    spec.generateThermoData(__database,quantumMechanics=qmValue)
     return spec.thermo
 
 ################################################################################
@@ -785,7 +785,7 @@ def generateThermoDataForListOfSpecies(self, listOfSpecies):
         # this works without scoop:
         #outputs = map(makeThermoForSpecies, listOfSpecies)
         # this tried so do it via scoop's map:
-        outputs = futures.map(makeThermoForSpecies, listOfSpecies)
+        outputs = futures.map(makeThermoForSpecies, listOfSpecies,qmValue=self.quantumMechanics)
         for spec, thermo in zip(listOfSpecies, outputs):
             spec.thermo = thermo
 

From 997e8d8b63c9393f4da74604fc3a0dd975f4db12 Mon Sep 17 00:00:00 2001
From: keceli <keceli@gmail.com>
Date: Wed, 28 Aug 2013 22:04:14 -0400
Subject: [PATCH 09/39] Some explanation on how to install and use scoop mainly
 on Pharos

---
 README.scoop | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)
 create mode 100644 README.scoop

diff --git a/README.scoop b/README.scoop
new file mode 100644
index 0000000000..a72382cd05
--- /dev/null
+++ b/README.scoop
@@ -0,0 +1,16 @@
+Installing scoop:
+Easiest way:
+If you have root access
+pip install scoop
+If not
+pip install --user scoop
+
+Using scoop:
+RMG-py with scoop doesn't work perfectly, yet. But, you can still use it for doing QM calculations in parallel.
+There are several catches.
+1)Job submission through grid engine (in pharos) doesn't work. You need to submit a fake job and then ssh to that node and run your job interactively.
+You can use sleep command for that. (sleep 1d, sleep 10h, etc.)
+2)For the job to continue when you exit from the node, you need to use nohup command. You can type:
+nohup python -m scoop.__main__ -vv -n 48 $RMGpy/rmg.py input.py > RMG.sdout.log &
+-n 48 specifies that you will have 48 workers. You can change it based on the available number of processors.
+

From 56ca670b5b37b8e116b4244ea569781efb8858d5 Mon Sep 17 00:00:00 2001
From: keceli <keceli@gmail.com>
Date: Fri, 4 Oct 2013 01:56:41 -0400
Subject: [PATCH 10/39] rmgpy/qm/mopac.py: Wait for 1 sec for the buffer to
 write to disc. Temporary solution to avoid attribute errors from cclib during
 parsing.

---
 rmgpy/qm/mopac.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/rmgpy/qm/mopac.py b/rmgpy/qm/mopac.py
index c8c20e3547..0ab14aba15 100644
--- a/rmgpy/qm/mopac.py
+++ b/rmgpy/qm/mopac.py
@@ -63,7 +63,9 @@ def run(self):
         # submits the input file to mopac
         process = Popen([self.executablePath, self.inputFilePath])
         process.communicate()# necessary to wait for executable termination!
-    
+        #Wait for OS to flush the buffer to disk. There should be a better way
+        import time
+        time.sleep(1)
         return self.verifyOutputFile()
         
     def verifyOutputFile(self):
@@ -86,7 +88,7 @@ def verifyOutputFile(self):
         if not os.path.exists(self.outputFilePath):
             logging.debug("Output file {0} does not (yet) exist.".format(self.outputFilePath))
             return False
-    
+        
         InChIMatch=False #flag (1 or 0) indicating whether the InChI in the file matches InChIaug this can only be 1 if InChIFound is also 1
         InChIFound=False #flag (1 or 0) indicating whether an InChI was found in the log file
         
@@ -265,4 +267,4 @@ def inputFileKeywords(self, attempt):
                 multiplicity_keys,
                 )
 
-        return top_keys, bottom_keys, polar_keys
\ No newline at end of file
+        return top_keys, bottom_keys, polar_keys

From 5d060eb74d7929b3f600ad23c75e6546621d4e4f Mon Sep 17 00:00:00 2001
From: keceli <keceli@gmail.com>
Date: Fri, 11 Oct 2013 14:05:08 -0400
Subject: [PATCH 11/39] gaussian.py: added sleep

---
 rmgpy/qm/gaussian.py | 5 +++--
 rmgpy/rmg/model.py   | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/rmgpy/qm/gaussian.py b/rmgpy/qm/gaussian.py
index f6a7e87d94..5b640fa75a 100644
--- a/rmgpy/qm/gaussian.py
+++ b/rmgpy/qm/gaussian.py
@@ -51,7 +51,8 @@ def run(self):
         # submits the input file to Gaussian
         process = Popen([self.executablePath, self.inputFilePath, self.outputFilePath])
         process.communicate()# necessary to wait for executable termination!
-        
+        import time
+        time.sleep(1)
         return self.verifyOutputFile()
         
     def verifyOutputFile(self):
@@ -250,4 +251,4 @@ def inputFileKeywords(self, attempt):
         assert attempt <= self.maxAttempts
         if attempt > self.scriptAttempts:
             attempt -= self.scriptAttempts
-        return self.keywords[attempt-1]
\ No newline at end of file
+        return self.keywords[attempt-1]
diff --git a/rmgpy/rmg/model.py b/rmgpy/rmg/model.py
index 4a17cdb0b9..9076bb4ba5 100644
--- a/rmgpy/rmg/model.py
+++ b/rmgpy/rmg/model.py
@@ -359,7 +359,7 @@ def checkForExistingSpecies(self, molecule):
         # Return an existing species if a match is found
         formula = molecule.getFormula()
         try:
-             speciesList = self.speciesDict[formula]
+            speciesList = self.speciesDict[formula]
         except KeyError:
             return False, None
         for spec in speciesList:

From 7c68b07f07c96790061d7d57ef8e9b9ca2465577 Mon Sep 17 00:00:00 2001
From: keceli <keceli@gmail.com>
Date: Fri, 11 Oct 2013 14:34:27 -0400
Subject: [PATCH 12/39] gaussian.py: required for gaussian to work on pharos

---
 rmgpy/qm/gaussian.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/rmgpy/qm/gaussian.py b/rmgpy/qm/gaussian.py
index f6a7e87d94..39df6b639c 100644
--- a/rmgpy/qm/gaussian.py
+++ b/rmgpy/qm/gaussian.py
@@ -18,15 +18,18 @@ class Gaussian:
     inputFileExtension = '.gjf'
     outputFileExtension = '.log'
     
-    gaussEnv = os.getenv('GAUSS_EXEDIR') or os.getenv('g09root') or os.getenv('g03root') or ""
+    #gaussEnv = os.getenv('GAUSS_EXEDIR') or os.getenv('g09root') or os.getenv('g03root') or ""
+    gaussEnv = os.getenv('g03root') or os.getenv('g09root') 
+    print gaussEnv
     if os.path.exists(os.path.join(gaussEnv , 'g09')):
         executablePath = os.path.join(gaussEnv , 'g09')
-    elif os.path.exists(os.path.join(gaussEnv , 'g03')):
-        executablePath = os.path.join(gaussEnv , 'g03')
+    elif os.path.lexists(os.path.join(gaussEnv , 'g03')):
+        executablePath = os.path.join(gaussEnv , 'g03/g03')
     else:
         executablePath = os.path.join(gaussEnv , '(g03 or g09)')
 
     usePolar = False
+    logging.info('executablePath=',executablePath)
     
     #: List of phrases that indicate failure
     #: NONE of these must be present in a succesful job.
@@ -49,6 +52,7 @@ def testReady(self):
     def run(self):
         self.testReady()
         # submits the input file to Gaussian
+        print [self.executablePath, self.inputFilePath, self.outputFilePath]
         process = Popen([self.executablePath, self.inputFilePath, self.outputFilePath])
         process.communicate()# necessary to wait for executable termination!
         
@@ -250,4 +254,4 @@ def inputFileKeywords(self, attempt):
         assert attempt <= self.maxAttempts
         if attempt > self.scriptAttempts:
             attempt -= self.scriptAttempts
-        return self.keywords[attempt-1]
\ No newline at end of file
+        return self.keywords[attempt-1]

From 384db4e4e3ccfe02e1c1385672f268784a970961 Mon Sep 17 00:00:00 2001
From: keceli <keceli@gmail.com>
Date: Sat, 12 Oct 2013 02:03:11 -0400
Subject: [PATCH 13/39] attempt to run RMG without thermolibrary

---
 rmgpy/data/thermo.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/rmgpy/data/thermo.py b/rmgpy/data/thermo.py
index a5b5213d29..cd3f12c08c 100644
--- a/rmgpy/data/thermo.py
+++ b/rmgpy/data/thermo.py
@@ -734,6 +734,8 @@ def getThermoDataFromLibrary(self, species, library):
         
         Returns a tuple: (ThermoData, library, entry)  or None.
         """
+        if library is None:
+            return None
         for label, entry in library.entries.iteritems():
             for molecule in species.molecule:
                 if molecule.isIsomorphic(entry.item) and entry.data is not None:

From 79e67133586c8cfdb30c966060fa7e73c01bab36 Mon Sep 17 00:00:00 2001
From: keceli <keceli@gmail.com>
Date: Tue, 22 Oct 2013 13:41:36 -0400
Subject: [PATCH 14/39] Parallelizing thermoEstimator.py.

---
 examples/thermoEstimator/run.sh |  2 +-
 thermoEstimator.py              | 13 +++++++++----
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/examples/thermoEstimator/run.sh b/examples/thermoEstimator/run.sh
index d8dbeea875..8294763b04 100755
--- a/examples/thermoEstimator/run.sh
+++ b/examples/thermoEstimator/run.sh
@@ -1,4 +1,4 @@
 #!/bin/bash
 
 # Run the thermo estimator on the given thermo input file
-python ../../thermoEstimator.py input.py
+python $RMGpy/thermoEstimator.py input.py
diff --git a/thermoEstimator.py b/thermoEstimator.py
index 086dca17b6..6a6a875329 100755
--- a/thermoEstimator.py
+++ b/thermoEstimator.py
@@ -12,7 +12,9 @@
 from rmgpy.rmg.main import RMG
 from rmgpy.data.thermo import ThermoLibrary
 from rmgpy.chemkin import writeThermoEntry
-
+from rmgpy.rmg.model import makeThermoForSpecies
+import scoop
+from scoop import futures
 ################################################################################
 
 def runThermoEstimator(inputFile):
@@ -32,9 +34,12 @@ def runThermoEstimator(inputFile):
     # ThermoLibrary format with values for H, S, and Cp's.
     output = open(os.path.join(rmg.outputDirectory, 'output.txt'),'wb')
     library = ThermoLibrary(name='Thermo Estimation Library')
-    for species in rmg.initialSpecies:
-        species.generateThermoData(rmg.database, quantumMechanics=rmg.reactionModel.quantumMechanics)
-
+ #   for species in rmg.initialSpecies:
+ #       species.generateThermoData(rmg.database, quantumMechanics=rmg.reactionModel.quantumMechanics)
+    listOfSpecies=rmg.initialSpecies
+    outputList = futures.map(makeThermoForSpecies, listOfSpecies,qmValue=rmg.reactionModel.quantumMechanics)
+    for species, thermo in zip(listOfSpecies, outputList):
+        species.thermo = thermo   
         library.loadEntry(
             index = len(library.entries) + 1,
             label = species.label,

From 13af2d95c0b74873f95e17f0baf57e5ce6735dfc Mon Sep 17 00:00:00 2001
From: keceli <keceli@gmail.com>
Date: Thu, 31 Oct 2013 16:12:35 -0400
Subject: [PATCH 15/39] PM6 and PM7 options are added for MOPAC. Usage
 software=MOPACPM6, etc.

---
 rmgpy/qm/main.py  |  14 ++++++-
 rmgpy/qm/mopac.py | 102 +++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 114 insertions(+), 2 deletions(-)

diff --git a/rmgpy/qm/main.py b/rmgpy/qm/main.py
index d5243f4ca3..4d557f9f14 100644
--- a/rmgpy/qm/main.py
+++ b/rmgpy/qm/main.py
@@ -141,10 +141,22 @@ def getThermoData(self, molecule):
         
         Ignores the settings onlyCyclics and maxRadicalNumber and does the calculation anyway if asked.
         (I.e. the code that chooses whether to call this method should consider those settings).
+        Options for QM calculations are:
+        mopac: Default calculation with Mopac is PM3 semiempirical method, should be changed to PM6 or PM7
+        mopacPM3: PM3, Same as mopac option.
+        mopacPM6: PM6, better than PM3 (Journal of Molecular Modeling 13, 1173–1213, 2007.)
+        mopacPM7: PM7, excludes computational results from training set, might be better or slightly worse compared to PM6
+        gaussian: Only PM3 is available. 
         """
         if self.settings.software == 'mopac':
             qm_molecule_calculator = rmgpy.qm.mopac.MopacMolPM3(molecule, self.settings)
             thermo0 = qm_molecule_calculator.generateThermoData()
+        elif self.settings.software == 'mopacPM6':
+            qm_molecule_calculator = rmgpy.qm.mopac.MopacMolPM6(molecule, self.settings)
+            thermo0 = qm_molecule_calculator.generateThermoData()
+        elif self.settings.software == 'mopacPM7':
+            qm_molecule_calculator = rmgpy.qm.mopac.MopacMolPM7(molecule, self.settings)
+            thermo0 = qm_molecule_calculator.generateThermoData()
         elif self.settings.software == 'gaussian':
             qm_molecule_calculator = rmgpy.qm.gaussian.GaussianMolPM3(molecule, self.settings)
             thermo0 = qm_molecule_calculator.generateThermoData()
@@ -152,4 +164,4 @@ def getThermoData(self, molecule):
             raise Exception("Unknown QM software '{0}'".format(self.settings.software))
         return thermo0
     
-        
\ No newline at end of file
+        
diff --git a/rmgpy/qm/mopac.py b/rmgpy/qm/mopac.py
index 04ded33b91..ec1b3db5dd 100644
--- a/rmgpy/qm/mopac.py
+++ b/rmgpy/qm/mopac.py
@@ -266,4 +266,104 @@ def inputFileKeywords(self, attempt):
                 multiplicity_keys,
                 )
 
-        return top_keys, bottom_keys, polar_keys
\ No newline at end of file
+        return top_keys, bottom_keys, polar_keys
+    
+class MopacMolPM6(MopacMol):
+
+    #: Keywords that will be added at the top and bottom of the qm input file
+    keywords = [
+                {'top':"precise nosym", 'bottom':"oldgeo thermo nosym precise "},
+                {'top':"precise nosym gnorm=0.0 nonr", 'bottom':"oldgeo thermo nosym precise "},
+                {'top':"precise nosym gnorm=0.0", 'bottom':"oldgeo thermo nosym precise "},
+                {'top':"precise nosym gnorm=0.0 bfgs", 'bottom':"oldgeo thermo nosym precise "},
+                {'top':"precise nosym recalc=10 dmax=0.10 nonr cycles=2000 t=2000", 'bottom':"oldgeo thermo nosym precise "},
+                ]
+
+    @property
+    def scriptAttempts(self):
+        "The number of attempts with different script keywords"
+        return len(self.keywords)
+        
+    @property
+    def maxAttempts(self):
+        "The total number of attempts to try"
+        return 2 * len(self.keywords)
+
+
+    def inputFileKeywords(self, attempt):
+        """
+        Return the top, bottom, and polar keywords for attempt number `attempt`.
+        
+        NB. `attempt`s begin at 1, not 0.
+        """
+        assert attempt <= self.maxAttempts
+        
+        if attempt > self.scriptAttempts:
+            attempt -= self.scriptAttempts
+        
+        multiplicity_keys = self.multiplicityKeywords[self.geometry.multiplicity]
+
+        top_keys = "pm6 {0} {1}".format(
+                multiplicity_keys,
+                self.keywords[attempt-1]['top'],
+                )
+        bottom_keys = "{0} pm6 {1}".format(
+                self.keywords[attempt-1]['bottom'],
+                multiplicity_keys,
+                )
+        polar_keys = "oldgeo {0} nosym precise pm6 {1}".format(
+                'polar' if self.geometry.multiplicity == 1 else 'static',
+                multiplicity_keys,
+                )
+
+        return top_keys, bottom_keys, polar_keys
+
+class MopacMolPM7(MopacMol):
+
+    #: Keywords that will be added at the top and bottom of the qm input file
+    keywords = [
+                {'top':"precise nosym", 'bottom':"oldgeo thermo nosym precise "},
+                {'top':"precise nosym gnorm=0.0 nonr", 'bottom':"oldgeo thermo nosym precise "},
+                {'top':"precise nosym gnorm=0.0", 'bottom':"oldgeo thermo nosym precise "},
+                {'top':"precise nosym gnorm=0.0 bfgs", 'bottom':"oldgeo thermo nosym precise "},
+                {'top':"precise nosym recalc=10 dmax=0.10 nonr cycles=2000 t=2000", 'bottom':"oldgeo thermo nosym precise "},
+                ]
+
+    @property
+    def scriptAttempts(self):
+        "The number of attempts with different script keywords"
+        return len(self.keywords)
+        
+    @property
+    def maxAttempts(self):
+        "The total number of attempts to try"
+        return 2 * len(self.keywords)
+
+
+    def inputFileKeywords(self, attempt):
+        """
+        Return the top, bottom, and polar keywords for attempt number `attempt`.
+        
+        NB. `attempt`s begin at 1, not 0.
+        """
+        assert attempt <= self.maxAttempts
+        
+        if attempt > self.scriptAttempts:
+            attempt -= self.scriptAttempts
+        
+        multiplicity_keys = self.multiplicityKeywords[self.geometry.multiplicity]
+
+        top_keys = "pm7 {0} {1}".format(
+                multiplicity_keys,
+                self.keywords[attempt-1]['top'],
+                )
+        bottom_keys = "{0} pm7 {1}".format(
+                self.keywords[attempt-1]['bottom'],
+                multiplicity_keys,
+                )
+        polar_keys = "oldgeo {0} nosym precise pm7 {1}".format(
+                'polar' if self.geometry.multiplicity == 1 else 'static',
+                multiplicity_keys,
+                )
+
+        return top_keys, bottom_keys, polar_keys    
\ No newline at end of file

From f3b60206628aa40575c6f7743316bbb8fcab306a Mon Sep 17 00:00:00 2001
From: keceli <keceli@gmail.com>
Date: Fri, 1 Nov 2013 00:10:17 -0400
Subject: [PATCH 16/39] Revert "gaussian.py: required for gaussian to work on
 pharos"

This reverts commit 7c68b07f07c96790061d7d57ef8e9b9ca2465577.
---
 rmgpy/qm/gaussian.py | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/rmgpy/qm/gaussian.py b/rmgpy/qm/gaussian.py
index 39df6b639c..f6a7e87d94 100644
--- a/rmgpy/qm/gaussian.py
+++ b/rmgpy/qm/gaussian.py
@@ -18,18 +18,15 @@ class Gaussian:
     inputFileExtension = '.gjf'
     outputFileExtension = '.log'
     
-    #gaussEnv = os.getenv('GAUSS_EXEDIR') or os.getenv('g09root') or os.getenv('g03root') or ""
-    gaussEnv = os.getenv('g03root') or os.getenv('g09root') 
-    print gaussEnv
+    gaussEnv = os.getenv('GAUSS_EXEDIR') or os.getenv('g09root') or os.getenv('g03root') or ""
     if os.path.exists(os.path.join(gaussEnv , 'g09')):
         executablePath = os.path.join(gaussEnv , 'g09')
-    elif os.path.lexists(os.path.join(gaussEnv , 'g03')):
-        executablePath = os.path.join(gaussEnv , 'g03/g03')
+    elif os.path.exists(os.path.join(gaussEnv , 'g03')):
+        executablePath = os.path.join(gaussEnv , 'g03')
     else:
         executablePath = os.path.join(gaussEnv , '(g03 or g09)')
 
     usePolar = False
-    logging.info('executablePath=',executablePath)
     
     #: List of phrases that indicate failure
     #: NONE of these must be present in a succesful job.
@@ -52,7 +49,6 @@ def testReady(self):
     def run(self):
         self.testReady()
         # submits the input file to Gaussian
-        print [self.executablePath, self.inputFilePath, self.outputFilePath]
         process = Popen([self.executablePath, self.inputFilePath, self.outputFilePath])
         process.communicate()# necessary to wait for executable termination!
         
@@ -254,4 +250,4 @@ def inputFileKeywords(self, attempt):
         assert attempt <= self.maxAttempts
         if attempt > self.scriptAttempts:
             attempt -= self.scriptAttempts
-        return self.keywords[attempt-1]
+        return self.keywords[attempt-1]
\ No newline at end of file

From 2231026677a9e826f138a85562611a4ebaffd98d Mon Sep 17 00:00:00 2001
From: keceli <keceli@gmail.com>
Date: Fri, 1 Nov 2013 11:38:06 -0400
Subject: [PATCH 17/39] Trying to avoid sleep solution for AttributeError in
 cclib object

---
 rmgpy/qm/mopac.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/rmgpy/qm/mopac.py b/rmgpy/qm/mopac.py
index c052003c0d..b8c16e6fa8 100644
--- a/rmgpy/qm/mopac.py
+++ b/rmgpy/qm/mopac.py
@@ -65,7 +65,7 @@ def run(self):
         process.communicate()# necessary to wait for executable termination!
         #Wait for OS to flush the buffer to disk. There should be a better way
         import time
-        time.sleep(1)
+ #       time.sleep(1)
         return self.verifyOutputFile()
         
     def verifyOutputFile(self):
@@ -179,7 +179,8 @@ def writeInputFile(self, attempt):
         input_string = '\n'.join(output)
         
         top_keys, bottom_keys, polar_keys = self.inputFileKeywords(attempt)
-        with open(self.inputFilePath, 'w') as mopacFile:
+        buffering=0
+        with open(self.inputFilePath, 'w',buffering) as mopacFile:
             mopacFile.write(top_keys)
             mopacFile.write('\n')
             mopacFile.write(input_string)

From f3e0bdf0326f3750f7591b4b3b98409734499de1 Mon Sep 17 00:00:00 2001
From: keceli <keceli@gmail.com>
Date: Tue, 5 Nov 2013 15:44:27 -0500
Subject: [PATCH 18/39] Reverting a mistake I did during the merge with master

---
 rmgpy/qm/mopac.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/rmgpy/qm/mopac.py b/rmgpy/qm/mopac.py
index c08a9b65d4..3742dc2e69 100644
--- a/rmgpy/qm/mopac.py
+++ b/rmgpy/qm/mopac.py
@@ -178,6 +178,7 @@ def writeInputFile(self, attempt):
         output.append('')
         input_string = '\n'.join(output)
         
+        top_keys, bottom_keys, polar_keys = self.inputFileKeywords(attempt)
         with open(self.inputFilePath, 'w') as mopacFile:
             mopacFile.write(top_keys)
             mopacFile.write('\n')

From f87faf902868a9c3e909aac2b396d2a6a51f3796 Mon Sep 17 00:00:00 2001
From: keceli <keceli@gmail.com>
Date: Fri, 8 Nov 2013 00:53:36 -0500
Subject: [PATCH 19/39] Thermoestimator generates output even if the job does
 not finish

Added a generator to divide species list into chunks (100 species) so that output.txt is written once a chunk is calculated.
---
 rmgpy/qm/mopac.py  |  2 +-
 thermoEstimator.py | 34 +++++++++++++++++++++-------------
 2 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/rmgpy/qm/mopac.py b/rmgpy/qm/mopac.py
index 3742dc2e69..3b116b3279 100644
--- a/rmgpy/qm/mopac.py
+++ b/rmgpy/qm/mopac.py
@@ -64,7 +64,7 @@ def run(self):
         process = Popen([self.executablePath, self.inputFilePath])
         process.communicate()# necessary to wait for executable termination!
         #Wait for OS to flush the buffer to disk. There should be a better way
-        import time
+ #       import time
  #       time.sleep(1)
         return self.verifyOutputFile()
         
diff --git a/thermoEstimator.py b/thermoEstimator.py
index 6a6a875329..353c27076d 100755
--- a/thermoEstimator.py
+++ b/thermoEstimator.py
@@ -16,7 +16,13 @@
 import scoop
 from scoop import futures
 ################################################################################
-
+def chunks(l, n):
+    """ Yield successive n-sized chunks from l.
+        From http://stackoverflow.com/questions/312443/how-do-you-split-a-list-into-evenly-sized-chunks-in-python
+    """
+    for i in xrange(0, len(l), n):
+        yield l[i:i+n]
+        
 def runThermoEstimator(inputFile):
     """
     Estimate thermo for a list of species using RMG and the settings chosen inside a thermo input file.
@@ -37,18 +43,20 @@ def runThermoEstimator(inputFile):
  #   for species in rmg.initialSpecies:
  #       species.generateThermoData(rmg.database, quantumMechanics=rmg.reactionModel.quantumMechanics)
     listOfSpecies=rmg.initialSpecies
-    outputList = futures.map(makeThermoForSpecies, listOfSpecies,qmValue=rmg.reactionModel.quantumMechanics)
-    for species, thermo in zip(listOfSpecies, outputList):
-        species.thermo = thermo   
-        library.loadEntry(
-            index = len(library.entries) + 1,
-            label = species.label,
-            molecule = species.molecule[0].toAdjacencyList(),
-            thermo = species.thermo.toThermoData(),
-            shortDesc = species.thermo.comment,
-        )
-        output.write(writeThermoEntry(species))
-        output.write('\n')
+    chunksize=50
+    for chunk in list(chunks(listOfSpecies,chunksize)):
+        outputList = futures.map(makeThermoForSpecies, chunk,qmValue=rmg.reactionModel.quantumMechanics)
+        for species, thermo in zip(chunk, outputList):
+            species.thermo = thermo   
+            library.loadEntry(
+                index = len(library.entries) + 1,
+                label = species.label,
+                molecule = species.molecule[0].toAdjacencyList(),
+                thermo = species.thermo.toThermoData(),
+                shortDesc = species.thermo.comment,
+            )
+            output.write(writeThermoEntry(species))
+            output.write('\n')
     
     output.close()
     library.save(os.path.join(rmg.outputDirectory,'ThermoLibrary.py'))

From 458c5e715551c833d075122ddcd62c53d4ebdb2a Mon Sep 17 00:00:00 2001
From: keceli <keceli@gmail.com>
Date: Fri, 8 Nov 2013 14:06:23 -0500
Subject: [PATCH 20/39] Added profiling for thermoEstimator

---
 thermoEstimator.py | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/thermoEstimator.py b/thermoEstimator.py
index 353c27076d..7be4b7247f 100755
--- a/thermoEstimator.py
+++ b/thermoEstimator.py
@@ -9,7 +9,7 @@
 """
 
 import os.path
-from rmgpy.rmg.main import RMG
+from rmgpy.rmg.main import RMG, processProfileStats, makeProfileGraph
 from rmgpy.data.thermo import ThermoLibrary
 from rmgpy.chemkin import writeThermoEntry
 from rmgpy.rmg.model import makeThermoForSpecies
@@ -71,8 +71,31 @@ def runThermoEstimator(inputFile):
     parser = argparse.ArgumentParser()
     parser.add_argument('input', metavar='INPUT', type=str, nargs=1,
         help='Thermo input file')
+    parser.add_argument('-p', '--profile', action='store_true', help='run under cProfile to gather profiling statistics, and postprocess them if job completes')
+    parser.add_argument('-P', '--postprocess', action='store_true', help='postprocess profiling statistics from previous [failed] run; does not run the simulation')
+
     args = parser.parse_args()
     
     inputFile = os.path.abspath(args.input[0])
+   
+    if args.postprocess:
+        print "Postprocessing the profiler statistics (will be appended to thermo.log)"
+        args.profile = True
     
-    runThermoEstimator(inputFile)
\ No newline at end of file
+    if args.profile:
+        import cProfile, sys, pstats, os
+        global_vars = {}
+        local_vars = {'inputFile': inputFile,'runThermoEstimator':runThermoEstimator}
+        command = """runThermoEstimator(inputFile)"""
+        stats_file = 'thermo.profile'
+        print("Running under cProfile")
+        if not args.postprocess:
+        # actually run the program!
+            cProfile.runctx(command, global_vars, local_vars, stats_file)
+        # postprocess the stats
+        log_file = 'thermo.log'
+        processProfileStats(stats_file, log_file)
+        makeProfileGraph(stats_file)
+        
+    else:
+        runThermoEstimator(inputFile)
\ No newline at end of file

From 6ec8ae76b0c648d68eac9bf74e98ce6e146f57ec Mon Sep 17 00:00:00 2001
From: keceli <keceli@gmail.com>
Date: Sat, 9 Nov 2013 02:46:39 -0500
Subject: [PATCH 21/39] Added makeProfileGraph as a script to be able to modify
 profile graphics with different thresholds

---
 makeProfileGraph.py | 60 +++++++++++++++++++++++++++++++++++++++++++++
 thermoEstimator.py  |  3 ++-
 2 files changed, 62 insertions(+), 1 deletion(-)
 create mode 100644 makeProfileGraph.py

diff --git a/makeProfileGraph.py b/makeProfileGraph.py
new file mode 100644
index 0000000000..d49ea55da9
--- /dev/null
+++ b/makeProfileGraph.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+def makeProfileGraph(stats_file,thresh_node,thresh_edge):
+    """
+    Uses gprof2dot to create a graphviz dot file of the profiling information.
+    
+    This requires the gprof2dot package available via `pip install gprof2dot`.
+    Renders the result using the program 'dot' via a command like
+    `dot -Tpdf input.dot -o output.pdf`.
+    """
+    try:
+        from gprof2dot import gprof2dot
+    except ImportError:
+        print('Package gprof2dot not found. Unable to create a graph of the profile statistics.')
+        print("`pip install gprof2dot` if you don't have it.")
+        return
+    import subprocess
+    m = gprof2dot.Main()
+    class Options:
+        pass
+    m.options = Options()
+    m.options.node_thres = thresh_node# default 0.8
+    m.options.edge_thres = thresh_edge # default 0.1
+    m.options.strip = False
+    m.options.wrap = True
+    m.theme = m.themes['color'] # bw color gray pink
+    parser = gprof2dot.PstatsParser(stats_file)
+    m.profile = parser.parse()
+    dot_file = stats_file + '.dot'
+    m.output = open(dot_file,'wt')
+    m.write_graph()
+    m.output.close()
+    try:
+        subprocess.check_call(['dot', '-Tpdf', dot_file, '-o', '{0}.pdf'.format(dot_file)])
+    except subprocess.CalledProcessError:
+        print("Error returned by 'dot' when generating graph of the profile statistics.")
+        print("To try it yourself:\n     dot -Tpdf {0} -o {0}.pdf".format(dot_file))
+    except OSError:
+        print("Couldn't run 'dot' to create graph of profile statistics. Check graphviz is installed properly and on your path.")
+        print("Once you've got it, try:\n     dot -Tpdf {0} -o {0}.pdf".format(dot_file))
+    else:
+        print("Graph of profile statistics saved to: \n {0}.pdf".format(dot_file))
+        
+if __name__ == '__main__':
+ 
+    import argparse
+     
+    parser = argparse.ArgumentParser(description="Creates a call graph with profiling information.")
+    parser.add_argument('FILE', type=str, default='RMG.profile', help='.profile file')
+    parser.add_argument('THRESH_NODE', type=float, default=0.8, help='threshold percentage value for nodes')
+    parser.add_argument('THRESH_EDGE', type=float, default=0.1, help='threshold percentage value for nodes') 
+    args = parser.parse_args()
+    stats_file=args.FILE
+    thresh_node=args.THRESH_NODE
+    thresh_edge=args.THRESH_EDGE
+    
+    makeProfileGraph(stats_file,thresh_node,thresh_edge)
+    
+    
diff --git a/thermoEstimator.py b/thermoEstimator.py
index 7be4b7247f..48823867da 100755
--- a/thermoEstimator.py
+++ b/thermoEstimator.py
@@ -80,6 +80,7 @@ def runThermoEstimator(inputFile):
    
     if args.postprocess:
         print "Postprocessing the profiler statistics (will be appended to thermo.log)"
+        print  "Use `dot -Tpdf thermo_profile.dot -o thermo_profile.pdf`"
         args.profile = True
     
     if args.profile:
@@ -98,4 +99,4 @@ def runThermoEstimator(inputFile):
         makeProfileGraph(stats_file)
         
     else:
-        runThermoEstimator(inputFile)
\ No newline at end of file
+        runThermoEstimator(inputFile)

From 5dd2c178e7f02b746be554d0dd5d85e341291fac Mon Sep 17 00:00:00 2001
From: keceli <keceli@gmail.com>
Date: Tue, 12 Nov 2013 02:02:07 -0500
Subject: [PATCH 22/39] Moved generateQMData to higher QMMolecule class along
 with minor modifications

---
 rmgpy/qm/gaussian.py | 27 ++-------------------------
 rmgpy/qm/main.py     |  7 ++++---
 rmgpy/qm/molecule.py | 23 ++++++++++++++++++++---
 rmgpy/qm/mopac.py    | 33 +++------------------------------
 rmgpy/rmg/model.py   |  2 +-
 5 files changed, 30 insertions(+), 62 deletions(-)

diff --git a/rmgpy/qm/gaussian.py b/rmgpy/qm/gaussian.py
index 5b640fa75a..0090e4ffe7 100644
--- a/rmgpy/qm/gaussian.py
+++ b/rmgpy/qm/gaussian.py
@@ -51,8 +51,7 @@ def run(self):
         # submits the input file to Gaussian
         process = Popen([self.executablePath, self.inputFilePath, self.outputFilePath])
         process.communicate()# necessary to wait for executable termination!
-        import time
-        time.sleep(1)
+        
         return self.verifyOutputFile()
         
     def verifyOutputFile(self):
@@ -185,28 +184,6 @@ def inputFileKeywords(self, attempt):
         """
         raise NotImplementedError("Should be defined by subclass, eg. GaussianMolPM3")
     
-    def generateQMData(self):
-        """
-        Calculate the QM data and return a QMData object.
-        """
-        self.createGeometry()
-        if self.verifyOutputFile():
-            logging.info("Found a successful output file already; using that.")
-        else:
-            success = False
-            for attempt in range(1, self.maxAttempts+1):
-                self.writeInputFile(attempt)
-                success = self.run()
-                if success:
-                    logging.info('Attempt {0} of {1} on species {2} succeeded.'.format(attempt, self.maxAttempts, self.molecule.toAugmentedInChI()))
-                    break
-            else:
-                logging.error('QM thermo calculation failed for {0}.'.format(self.molecule.toAugmentedInChI()))
-                return None
-        result = self.parse() # parsed in cclib
-        return result
-    
-
 
 class GaussianMolPM3(GaussianMol):
 
@@ -251,4 +228,4 @@ def inputFileKeywords(self, attempt):
         assert attempt <= self.maxAttempts
         if attempt > self.scriptAttempts:
             attempt -= self.scriptAttempts
-        return self.keywords[attempt-1]
+        return self.keywords[attempt-1]
\ No newline at end of file
diff --git a/rmgpy/qm/main.py b/rmgpy/qm/main.py
index 4d557f9f14..605ea0deab 100644
--- a/rmgpy/qm/main.py
+++ b/rmgpy/qm/main.py
@@ -96,7 +96,7 @@ def __init__(self,
         
     def setDefaultOutputDirectory(self, outputDirectory):
         """
-        IF the fileStore or scratchDirectory are not already set, put them in here.
+        If the fileStore or scratchDirectory are not already set, put them in here.
         """
         if not self.settings.fileStore:
             self.settings.fileStore = os.path.join(outputDirectory, 'QMfiles')
@@ -124,7 +124,8 @@ def checkPaths(self):
         """
         self.settings.fileStore = os.path.expandvars(self.settings.fileStore) # to allow things like $HOME or $RMGpy
         self.settings.scratchDirectory = os.path.expandvars(self.settings.scratchDirectory)
-        for path in [self.settings.fileStore, self.settings.scratchDirectory]:
+#        for path in [self.settings.fileStore, self.settings.scratchDirectory]:
+        for path in [self.settings.fileStore]:
             if not os.path.exists(path):
                 logging.info("Creating directory %s for QM files."%os.path.abspath(path))
                 os.makedirs(path)
@@ -148,7 +149,7 @@ def getThermoData(self, molecule):
         mopacPM7: PM7, excludes computational results from training set, might be better or slightly worse compared to PM6
         gaussian: Only PM3 is available. 
         """
-        if self.settings.software == 'mopac':
+        if self.settings.software == 'mopac' or self.settings.software == 'mopacPM3':
             qm_molecule_calculator = rmgpy.qm.mopac.MopacMolPM3(molecule, self.settings)
             thermo0 = qm_molecule_calculator.generateThermoData()
         elif self.settings.software == 'mopacPM6':
diff --git a/rmgpy/qm/molecule.py b/rmgpy/qm/molecule.py
index 97e8f98c2f..6df12c743a 100644
--- a/rmgpy/qm/molecule.py
+++ b/rmgpy/qm/molecule.py
@@ -202,10 +202,27 @@ def createGeometry(self):
     
     def generateQMData(self):
         """
-        Calculate the QM data somehow and return a CCLibData object, or None if it fails.
+        Calculate the QM data and return a QMData object, or None if it fails.
         """
-        raise NotImplementedError("This should be defined in a subclass that inherits from QMMolecule")
-        return qmdata.QMData() or None
+        if self.verifyOutputFile():
+            logging.info("Found a successful output file already; using that.")
+            source = "QM {0} result file found from previous run.".format(self.__class__.__name__)
+        else:
+            self.createGeometry()
+            success = False
+            for attempt in range(1, self.maxAttempts+1):
+                self.writeInputFile(attempt)
+                logging.info('Trying {3} attempt {0} of {1} on molecule {2}.'.format(attempt, self.maxAttempts, self.molecule.toSMILES(), self.__class__.__name__))
+                success = self.run()
+                if success:
+                    source = "QM {0} calculation attempt {1}".format(self.__class__.__name__, attempt )
+                    break
+            else:
+                logging.error('QM thermo calculation failed for {0}.'.format(self.molecule.toAugmentedInChI()))
+                return None
+        result = self.parse() # parsed in cclib
+        result.source = source
+        return result # a CCLibData object
     
     def generateThermoData(self):
         """
diff --git a/rmgpy/qm/mopac.py b/rmgpy/qm/mopac.py
index 3b116b3279..c5ef3339cf 100644
--- a/rmgpy/qm/mopac.py
+++ b/rmgpy/qm/mopac.py
@@ -63,9 +63,7 @@ def run(self):
         # submits the input file to mopac
         process = Popen([self.executablePath, self.inputFilePath])
         process.communicate()# necessary to wait for executable termination!
-        #Wait for OS to flush the buffer to disk. There should be a better way
- #       import time
- #       time.sleep(1)
+    
         return self.verifyOutputFile()
         
     def verifyOutputFile(self):
@@ -88,7 +86,7 @@ def verifyOutputFile(self):
         if not os.path.exists(self.outputFilePath):
             logging.debug("Output file {0} does not (yet) exist.".format(self.outputFilePath))
             return False
-        
+    
         InChIMatch=False #flag (1 or 0) indicating whether the InChI in the file matches InChIaug this can only be 1 if InChIFound is also 1
         InChIFound=False #flag (1 or 0) indicating whether an InChI was found in the log file
         
@@ -195,31 +193,6 @@ def inputFileKeywords(self, attempt):
         """
         raise NotImplementedError("Should be defined by subclass, eg. MopacMolPM3")
         
-    def generateQMData(self):
-        """
-        Calculate the QM data and return a QMData object, or None if it fails.
-        """
-        if self.verifyOutputFile():
-            logging.info("Found a successful output file already; using that.")
-            source = "QM MOPAC result file found from previous run."
-        else:
-            self.createGeometry()
-            success = False
-            for attempt in range(1, self.maxAttempts+1):
-                self.writeInputFile(attempt)
-                logging.info('Trying {3} attempt {0} of {1} on molecule {2}.'.format(attempt, self.maxAttempts, self.molecule.toSMILES(), self.__class__.__name__))
-                success = self.run()
-                if success:
-                    source = "QM {0} calculation attempt {1}".format(self.__class__.__name__, attempt )
-                    break
-            else:
-                logging.error('QM thermo calculation failed for {0}.'.format(self.molecule.toAugmentedInChI()))
-                return None
-        result = self.parse() # parsed in cclib
-        result.source = source
-        return result # a CCLibData object
-
-
 class MopacMolPM3(MopacMol):
 
     #: Keywords that will be added at the top and bottom of the qm input file
@@ -368,4 +341,4 @@ def inputFileKeywords(self, attempt):
                 multiplicity_keys,
                 )
 
-        return top_keys, bottom_keys, polar_keys    
+        return top_keys, bottom_keys, polar_keys    
\ No newline at end of file
diff --git a/rmgpy/rmg/model.py b/rmgpy/rmg/model.py
index 883cb590f3..909579da2c 100644
--- a/rmgpy/rmg/model.py
+++ b/rmgpy/rmg/model.py
@@ -74,7 +74,7 @@ def makeThermoForSpecies(spec,qmValue=None):
         import cPickle, logging
         filename = scoop.shared.getConst('databaseFile')
         database_hash = scoop.shared.getConst('databaseHash')
-        logging.info('Loading database pickle2 file from {0!r} on worker {1}'.format(filename, scoop.WORKER_NAME.decode() ))
+        #logging.info('Loading database pickle2 file from {0!r} on worker {1}'.format(filename, scoop.WORKER_NAME.decode() ))
         f = open(filename, 'rb')
         __database = cPickle.load(f)
         f.close()

From 1901b8a4add4ed2bc5b526e24eb5ac92620f7eca Mon Sep 17 00:00:00 2001
From: keceli <keceli@gmail.com>
Date: Wed, 13 Nov 2013 00:27:21 -0500
Subject: [PATCH 23/39] scoop jobs can now be run through SGE

Updated scoop from 0.62 to development version of 0.7RC1. With this new version of scoop you can pass environment variables to workers through bash scripts (prolog.sh).
Fixed the wrong usage of futures.map. Interestingly, older version was working correctly even with this bug. There are many debug loggings that should be deleted.
---
 prolog.sh            | 29 +++++++++++++++++++++++++++++
 rmgpy/qm/main.py     |  7 +++++++
 rmgpy/qm/molecule.py |  7 +++++++
 rmgpy/qm/mopac.py    |  4 ++++
 rmgpy/rmg/main.py    |  2 +-
 rmgpy/rmg/model.py   | 23 ++++++++++++++++-------
 thermoEstimator.py   | 24 +++++++++++++++++-------
 7 files changed, 81 insertions(+), 15 deletions(-)
 create mode 100755 prolog.sh

diff --git a/prolog.sh b/prolog.sh
new file mode 100755
index 0000000000..f85d242d58
--- /dev/null
+++ b/prolog.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+#The script that each workers of scoop runs.
+#Should be modified based on user profile.
+source ~/.bashrc
+export RMGQM="/opt/rmgqm"
+#set Gaussian03 environment variables
+g03root=/opt
+GAUSS_SCRDIR=/scratch/$USER
+export g03root GAUSS_SCRDIR
+GAUSS_EXEDIR="$g03root/g03/"
+GAUSS_LEXEDIR="$g03root/g03/linda-exe"
+GAUSS_ARCHDIR="$g03root/g03/arch"
+GMAIN=$GAUSS_EXEDIR/g03
+PATH=$PATH:$GMAIN
+LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$GMAIN
+G03BASIS="$g03root/g03/basis"
+F_ERROPT1="271,271,2,1,2,2,2,2"
+TRAP_FPE="OVERFL=ABORT;DIVZERO=ABORT;INT_OVERFL=ABORT"
+MP_STACK_OVERFLOW="OFF"
+# to partially avoid KAI stupidity
+KMP_DUPLICATE_LIB_OK="TRUE"
+export GAUSS_EXEDIR GAUSS_ARCHDIR PATH GMAIN LD_LIBRARY_PATH F_ERROPT1 TRAP_FPE MP_STACK_OVERFLOW \
+  KMP_DUPLICATE_LIB_OK G03BASIS GAUSS_LEXEDIR
+#set MOPAC
+export MOPAC_LICENSE=/opt/mopac/
+#
+export PYTHONPATH=/home/keceli/RMG/RMG-Py/PyDAS/build/lib.linux-x86_64-2.6:/home/keceli/RMG/RMG-Py/PyDQED:/home/keceli/local/lib/python2.6/site-packages:/opt/rmgqm/RDKit_2013_03_2:/usr/local/lib/python2.6/dist-packages:$PYTHONPATH
+export PATH=/home/keceli/kiler:/home/keceli/bin:/home/keceli/local/bin:/opt/mpich2-1.2.1p1/bin:/opt/intel/Compiler/11.0/074/bin/intel64:/opt/sge/bin/lx24-amd64:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/bin/mh:/opt/g03/g03:$PATH
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/intel/Compiler/11.0/074/ipp/em64t/sharedlib:/opt/intel/Compiler/11.0/074/mkl/lib/em64t:/opt/intel/Compiler/11.0/074/tbb/em64t/cc4.1.0_libc2.4_kernel2.6.16.21/lib:/opt/intel/Compiler/11.0/074/lib/intel64:/opt/g03/g03:/usr/local/lib:/opt/rmgqm/RDKit_2013_03_2/bin:/opt/rmgqm/boost_1_44_0/lib:/opt/rmgqm/RDKit_2013_03_2/lib
diff --git a/rmgpy/qm/main.py b/rmgpy/qm/main.py
index 605ea0deab..ef975f2fb5 100644
--- a/rmgpy/qm/main.py
+++ b/rmgpy/qm/main.py
@@ -69,6 +69,7 @@ def checkAllSet(self):
         assert type(self.onlyCyclics) is BooleanType
         assert self.maxRadicalNumber is not None # but it can be 0
         assert type(self.maxRadicalNumber) is IntType
+        logging.debug("QM settings are ok.")
 
 class QMCalculator():
     """
@@ -134,6 +135,7 @@ def checkPaths(self):
             raise Exception("RMG-Py 'bin' directory {0} does not exist.".format(self.settings.RMG_bin_path))
         if not os.path.isdir(self.settings.RMG_bin_path):
             raise Exception("RMG-Py 'bin' directory {0} is not a directory.".format(self.settings.RMG_bin_path))
+        logging.debug("QM paths are ok.")
             
         
     def getThermoData(self, molecule):
@@ -150,15 +152,20 @@ def getThermoData(self, molecule):
         gaussian: Only PM3 is available. 
         """
         if self.settings.software == 'mopac' or self.settings.software == 'mopacPM3':
+            logging.debug("Attempting for a {0} calculation.".format(self.settings.software))
             qm_molecule_calculator = rmgpy.qm.mopac.MopacMolPM3(molecule, self.settings)
             thermo0 = qm_molecule_calculator.generateThermoData()
+            logging.debug("{0} calculation attempted.".format(self.settings.software))
         elif self.settings.software == 'mopacPM6':
+            logging.debug("Attempting for a {0} calculation.".format(self.settings.software))
             qm_molecule_calculator = rmgpy.qm.mopac.MopacMolPM6(molecule, self.settings)
             thermo0 = qm_molecule_calculator.generateThermoData()
         elif self.settings.software == 'mopacPM7':
+            logging.debug("Attempting for a {0} calculation.".format(self.settings.software))
             qm_molecule_calculator = rmgpy.qm.mopac.MopacMolPM7(molecule, self.settings)
             thermo0 = qm_molecule_calculator.generateThermoData()
         elif self.settings.software == 'gaussian':
+            logging.debug("Attempting for a {0} calculation.".format(self.settings.software))
             qm_molecule_calculator = rmgpy.qm.gaussian.GaussianMolPM3(molecule, self.settings)
             thermo0 = qm_molecule_calculator.generateThermoData()
         else:
diff --git a/rmgpy/qm/molecule.py b/rmgpy/qm/molecule.py
index 6df12c743a..fb08d8a4c3 100644
--- a/rmgpy/qm/molecule.py
+++ b/rmgpy/qm/molecule.py
@@ -204,6 +204,7 @@ def generateQMData(self):
         """
         Calculate the QM data and return a QMData object, or None if it fails.
         """
+        logging.debug("{0} calculation".format(self.__class__.__name__))
         if self.verifyOutputFile():
             logging.info("Found a successful output file already; using that.")
             source = "QM {0} result file found from previous run.".format(self.__class__.__name__)
@@ -232,6 +233,7 @@ def generateThermoData(self):
         """
         # First, see if we already have it.
         if self.loadThermoData():
+            logging.debug("Already have thermo data")
             return self.thermo
         
         # If not, generate the QM data
@@ -239,20 +241,24 @@ def generateThermoData(self):
         
         # If that fails, give up and return None.
         if self.qmData  is None:
+            logging.debug("QM data is not found")
             return None
             
         self.determinePointGroup()
         
         # If that fails, give up and return None.
         if self.pointGroup is None:
+            logging.debug("No point group found")
             return None
             
         self.calculateThermoData()
+        logging.debug("Thermo data calculated")
         Cp0 = self.molecule.calculateCp0()
         CpInf = self.molecule.calculateCpInf()
         self.thermo.Cp0 = (Cp0,"J/(mol*K)")
         self.thermo.CpInf = (CpInf,"J/(mol*K)")
         self.saveThermoData()
+        logging.debug("Thermo data saved")
         return self.thermo
         
     def saveThermoData(self):
@@ -343,6 +349,7 @@ def calculateThermoData(self):
         
         trans = rmgpy.statmech.IdealGasTranslation( mass=self.qmData.molecularMass )
         if self.pointGroup.linear:
+            logging.debug("Linear molecule")
             rot = rmgpy.statmech.LinearRotor(
                                          rotationalConstant = self.qmData.rotationalConstants,
                                          symmetry = self.pointGroup.symmetryNumber,
diff --git a/rmgpy/qm/mopac.py b/rmgpy/qm/mopac.py
index c5ef3339cf..8c8eb3e0b8 100644
--- a/rmgpy/qm/mopac.py
+++ b/rmgpy/qm/mopac.py
@@ -20,10 +20,13 @@ class Mopac:
     mopacEnv = os.getenv('MOPAC_DIR', default="/opt/mopac")
     if os.path.exists(os.path.join(mopacEnv , 'MOPAC2012.exe')):
         executablePath = os.path.join(mopacEnv , 'MOPAC2012.exe')
+        logging.debug("{0} is found.".format(executablePath))
     elif os.path.exists(os.path.join(mopacEnv , 'MOPAC2009.exe')):
         executablePath = os.path.join(mopacEnv , 'MOPAC2009.exe')
+        logging.debug("{0} is found.".format(executablePath))
     else:
         executablePath = os.path.join(mopacEnv , '(MOPAC 2009 or 2012)')
+        logging.debug("{0} is found.".format(executablePath))
     
     usePolar = False #use polar keyword in MOPAC
     
@@ -56,6 +59,7 @@ class Mopac:
 
     def testReady(self):
         if not os.path.exists(self.executablePath):
+            logging.debug("{0} is not found.").format(self.executablePath)
             raise Exception("Couldn't find MOPAC executable at {0}. Try setting your MOPAC_DIR environment variable.".format(self.executablePath))
 
     def run(self):
diff --git a/rmgpy/rmg/main.py b/rmgpy/rmg/main.py
index fc9c39703b..6087c04272 100644
--- a/rmgpy/rmg/main.py
+++ b/rmgpy/rmg/main.py
@@ -1080,7 +1080,7 @@ def initializeLog(verbose, log_file_name):
     logging.addLevelName(logging.ERROR, 'Error: ')
     logging.addLevelName(logging.WARNING, 'Warning: ')
     logging.addLevelName(logging.INFO, '')
-    logging.addLevelName(logging.DEBUG, '')
+    logging.addLevelName(logging.DEBUG, 'Debug:')
     logging.addLevelName(0, '')
 
     # Create formatter and add to console handler
diff --git a/rmgpy/rmg/model.py b/rmgpy/rmg/model.py
index 909579da2c..9eb4b2f911 100644
--- a/rmgpy/rmg/model.py
+++ b/rmgpy/rmg/model.py
@@ -39,7 +39,7 @@
 import itertools
 
 import scoop
-from scoop import futures
+from scoop import futures,shared
 
 from rmgpy.display import display
 #import rmgpy.chemkin
@@ -64,22 +64,28 @@
 
 __database = None
 
-def makeThermoForSpecies(spec,qmValue=None):
+def makeThermoForSpecies(spec):
     """
     Make thermo for a species.
     """
+    import logging
+    qmValue=shared.getConst('qmValue')
+    if qmValue: logging.debug("qmValue fine @ makeThermoForSpecies")
     global __database
     if __database == None:
         """Load the database from some pickle file"""
-        import cPickle, logging
+        import cPickle
         filename = scoop.shared.getConst('databaseFile')
         database_hash = scoop.shared.getConst('databaseHash')
+        logging.debug('Loading database pickle2 file'.format(filename))
         #logging.info('Loading database pickle2 file from {0!r} on worker {1}'.format(filename, scoop.WORKER_NAME.decode() ))
         f = open(filename, 'rb')
         __database = cPickle.load(f)
         f.close()
         assert __database.hash == database_hash, "Database loaded from {0!r} doesn't match expected hash!".format(filename)
+    logging.debug("Generate thermo data in makeThermoForSpecies")
     spec.generateThermoData(__database,quantumMechanics=qmValue)
+    logging.debug("Thermo generated for {0}".format(spec.label))
     return spec.thermo
 
 ################################################################################
@@ -117,17 +123,18 @@ def generateThermoData(self, database, thermoClass=NASA, quantumMechanics=None):
         from rmgpy.data.thermo import saveEntry
 
         thermo0 = None
-        
         thermo0 = database.thermo.getThermoDataFromLibraries(self)
-        
+        if quantumMechanics is None : logging.debug("qmValue is None at generateThermoData in model.py")
         if thermo0 is not None:
-            logging.info("Found thermo for {0} in thermo library".format(self.label))
+            logging.debug("Found thermo for {0} in thermo library".format(self.label))
             assert len(thermo0) == 3, "thermo0 should be a tuple at this point: (thermoData, library, entry)"
             thermo0 = thermo0[0]
             
         elif quantumMechanics:
+            logging.debug("Generate thermo data with QM")
             molecule = self.molecule[0]
             if quantumMechanics.settings.onlyCyclics and not molecule.isCyclic():
+                logging.debug("Bypassing QM for ".format(self.label))
                 pass
             else: # try a QM calculation
                 if molecule.getRadicalCount() > quantumMechanics.settings.maxRadicalNumber:
@@ -162,10 +169,12 @@ def generateThermoData(self, database, thermoClass=NASA, quantumMechanics=None):
                         f.write('{0}\n'.format(molecule.toSMILES()))
                         f.write('{0}\n\n'.format(molecule.toAdjacencyList(removeH=True)))
                 else: # Not too many radicals: do a direct calculation.
+                    logging.debug("Generate thermo for {0} with QM".format(self.label))
                     thermo0 = quantumMechanics.getThermoData(molecule) # returns None if it fails
-                
+                    if thermo0 is None: logging.debug("QM for {0} failed.".format(self.label))
                 if thermo0 is not None:
                     # Write the QM molecule thermo to a library so that can be used in future RMG jobs.
+                    logging.debug("QM for {0} is successful.".format(self.label))
                     quantumMechanics.database.loadEntry(index = len(quantumMechanics.database.entries) + 1,
                                                         label = molecule.toSMILES(),
                                                         molecule = molecule.toAdjacencyList(),
diff --git a/thermoEstimator.py b/thermoEstimator.py
index 48823867da..594c8642b9 100755
--- a/thermoEstimator.py
+++ b/thermoEstimator.py
@@ -9,12 +9,12 @@
 """
 
 import os.path
-from rmgpy.rmg.main import RMG, processProfileStats, makeProfileGraph
+import logging
+from rmgpy.rmg.main import RMG, initializeLog, processProfileStats, makeProfileGraph
 from rmgpy.data.thermo import ThermoLibrary
 from rmgpy.chemkin import writeThermoEntry
 from rmgpy.rmg.model import makeThermoForSpecies
-import scoop
-from scoop import futures
+from scoop import futures,shared
 ################################################################################
 def chunks(l, n):
     """ Yield successive n-sized chunks from l.
@@ -34,6 +34,7 @@ def runThermoEstimator(inputFile):
     # initialize and load the database as well as any QM settings
     rmg.loadDatabase()
     if rmg.quantumMechanics:
+        logging.debug("Initialize QM")
         rmg.quantumMechanics.initialize()
     
     # Generate the thermo for all the species and write them to chemkin format as well as
@@ -44,9 +45,13 @@ def runThermoEstimator(inputFile):
  #       species.generateThermoData(rmg.database, quantumMechanics=rmg.reactionModel.quantumMechanics)
     listOfSpecies=rmg.initialSpecies
     chunksize=50
+    if rmg.reactionModel.quantumMechanics: logging.debug("qmValue fine @ runThermoEstimator")
+    shared.setConst(qmValue=rmg.reactionModel.quantumMechanics)
     for chunk in list(chunks(listOfSpecies,chunksize)):
-        outputList = futures.map(makeThermoForSpecies, chunk,qmValue=rmg.reactionModel.quantumMechanics)
+        outputList = futures.map(makeThermoForSpecies, chunk)
+        logging.debug("mapped")
         for species, thermo in zip(chunk, outputList):
+            logging.debug("specie {0}".format(species.label))
             species.thermo = thermo   
             library.loadEntry(
                 index = len(library.entries) + 1,
@@ -55,6 +60,7 @@ def runThermoEstimator(inputFile):
                 thermo = species.thermo.toThermoData(),
                 shortDesc = species.thermo.comment,
             )
+            logging.debug("chunk done")
             output.write(writeThermoEntry(species))
             output.write('\n')
     
@@ -69,7 +75,7 @@ def runThermoEstimator(inputFile):
     import argparse
     
     parser = argparse.ArgumentParser()
-    parser.add_argument('input', metavar='INPUT', type=str, nargs=1,
+    parser.add_argument('input', metavar='FILE', type=str, nargs=1,
         help='Thermo input file')
     parser.add_argument('-p', '--profile', action='store_true', help='run under cProfile to gather profiling statistics, and postprocess them if job completes')
     parser.add_argument('-P', '--postprocess', action='store_true', help='postprocess profiling statistics from previous [failed] run; does not run the simulation')
@@ -77,7 +83,8 @@ def runThermoEstimator(inputFile):
     args = parser.parse_args()
     
     inputFile = os.path.abspath(args.input[0])
-   
+    inputDirectory = os.path.abspath(os.path.dirname(args.input[0]))
+    
     if args.postprocess:
         print "Postprocessing the profiler statistics (will be appended to thermo.log)"
         print  "Use `dot -Tpdf thermo_profile.dot -o thermo_profile.pdf`"
@@ -94,9 +101,12 @@ def runThermoEstimator(inputFile):
         # actually run the program!
             cProfile.runctx(command, global_vars, local_vars, stats_file)
         # postprocess the stats
-        log_file = 'thermo.log'
+        log_file = os.path.join(inputDirectory,'RMG.log')
         processProfileStats(stats_file, log_file)
         makeProfileGraph(stats_file)
         
     else:
+        level = logging.DEBUG
+        initializeLog(level, 'thermo.log')
+        logging.debug("runThermoEstimator...")
         runThermoEstimator(inputFile)

From d3a292654cec1431a5b02e86a76cf379025c8338 Mon Sep 17 00:00:00 2001
From: keceli <keceli@gmail.com>
Date: Thu, 14 Nov 2013 03:28:46 -0500
Subject: [PATCH 24/39] Added examples for SCOOP and some minor improvements

---
 README.scoop                             |   16 -
 README2SCOOP.rst                         |   21 +
 examples/rmg/scoop/prolog.sh             |    2 +
 examples/rmg/scoop/sge.sh                |   25 +
 examples/thermoEstimator/scoop/input.py  | 1547 ++++++++++++++++++++++
 examples/thermoEstimator/scoop/lsf.sh    |   59 +
 examples/thermoEstimator/scoop/prolog.sh |    2 +
 examples/thermoEstimator/scoop/sge.sh    |   25 +
 prolog.sh                                |   29 -
 thermoEstimator.py                       |   18 +-
 10 files changed, 1690 insertions(+), 54 deletions(-)
 delete mode 100644 README.scoop
 create mode 100644 README2SCOOP.rst
 create mode 100755 examples/rmg/scoop/prolog.sh
 create mode 100755 examples/rmg/scoop/sge.sh
 create mode 100644 examples/thermoEstimator/scoop/input.py
 create mode 100755 examples/thermoEstimator/scoop/lsf.sh
 create mode 100755 examples/thermoEstimator/scoop/prolog.sh
 create mode 100755 examples/thermoEstimator/scoop/sge.sh
 delete mode 100755 prolog.sh

diff --git a/README.scoop b/README.scoop
deleted file mode 100644
index a72382cd05..0000000000
--- a/README.scoop
+++ /dev/null
@@ -1,16 +0,0 @@
-Installing scoop:
-Easiest way:
-If you have root access
-pip install scoop
-If not
-pip install --user scoop
-
-Using scoop:
-RMG-py with scoop doesn't work perfectly, yet. But, you can still use it for doing QM calculations in parallel.
-There are several catches.
-1)Job submission through grid engine (in pharos) doesn't work. You need to submit a fake job and then ssh to that node and run your job interactively.
-You can use sleep command for that. (sleep 1d, sleep 10h, etc.)
-2)For the job to continue when you exit from the node, you need to use nohup command. You can type:
-nohup python -m scoop.__main__ -vv -n 48 $RMGpy/rmg.py input.py > RMG.sdout.log &
--n 48 specifies that you will have 48 workers. You can change it based on the available number of processors.
-
diff --git a/README2SCOOP.rst b/README2SCOOP.rst
new file mode 100644
index 0000000000..5e07d8de83
--- /dev/null
+++ b/README2SCOOP.rst
@@ -0,0 +1,21 @@
+******************************************************
+SCOOP enabled RMG-Py
+******************************************************
+
+RMG-Py can be run in parallel (only for the thermochemical parameter 
+estimation part) using SCOOP module.
+More info on SCOOP: http://code.google.com/p/scoop/
+
+Running RMG-Py in parallel:
+
+python -m scoop.__main__  -n 8 $RMGpy/rmddg.py input.py > RMG.sdout.log &
+
+-n 8 specifies that you will have 8 workers. 
+Set it based on the available number of processors.
+For job submission scripts check examples/rmg/scoop.
+
+Installing SCOOP:
+
+You need the development version of SCOOP (tagged with 0.7RC2).
+Download link: http://scoop.googlecode.com/archive/0.7RC2.zip
+
diff --git a/examples/rmg/scoop/prolog.sh b/examples/rmg/scoop/prolog.sh
new file mode 100755
index 0000000000..8fb7afab28
--- /dev/null
+++ b/examples/rmg/scoop/prolog.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+source ~/.bash_profile
diff --git a/examples/rmg/scoop/sge.sh b/examples/rmg/scoop/sge.sh
new file mode 100755
index 0000000000..6fb3df6625
--- /dev/null
+++ b/examples/rmg/scoop/sge.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+#####################i################################################
+# This is a job submission file for a SGE queuing system to run
+# the SCOOP-enabled parallel version of RMG-Py across 48 CPUs on
+# a single node.
+#
+# Define RMGPy as the path to rmg.py in your ~/.bash_profile
+# NSLOTS is an SGE env. variable for total number of CPUs.
+# prolog.sh is a script used by SCOOP to pass env. variables
+#
+# You can run the jobs on different nodes as well, but it is not
+# recommended since you might have problems with SGE job termination.
+# Type `qconf -spl` to see available parallel environments and modify
+# the last #$ line if you really want to run it on many nodes.
+#####################i################################################
+#$ -S /bin/bash
+#$ -cwd
+#$ -notify
+#$ -o job.log -j y
+#$ -N RMGscoop
+#$ -l normal
+#$ -l h_rt=09:05:00 
+#$ -pe singlenode 48
+source ~/.bash_profile
+python -m scoop.__main__ --tunnel --prolog $RMGpy/examples/rmg/scoop/prolog.sh  -n $NSLOTS $RMGpy/rmg.py input.py > std.out
diff --git a/examples/thermoEstimator/scoop/input.py b/examples/thermoEstimator/scoop/input.py
new file mode 100644
index 0000000000..2e71d42a79
--- /dev/null
+++ b/examples/thermoEstimator/scoop/input.py
@@ -0,0 +1,1547 @@
+database(
+    thermoLibraries = ['KlippensteinH2O2']   
+)
+
+quantumMechanics(
+    software='mopac',
+    fileStore='QMfiles', 
+    scratchDirectory = None, # not currently used
+    onlyCyclics = True,
+    maxRadicalNumber = 0,
+)
+species(
+label="InChI=1/C10H18/c1-8-4-2-5-9-6-3-7-10(8)9/h8-10H,2-7H2,1H3",
+structure=adjacencyList("""
+1  C 0 {2,S} {9,S} {11,S} {27,S}
+2  C 0 {1,S} {3,S} {6,S} {12,S}
+3  C 0 {2,S} {4,S} {13,S} {14,S}
+4  C 0 {3,S} {5,S} {15,S} {16,S}
+5  C 0 {4,S} {6,S} {17,S} {18,S}
+6  C 0 {5,S} {2,S} {7,S} {19,S}
+7  C 0 {6,S} {8,S} {10,S} {20,S}
+8  C 0 {7,S} {9,S} {21,S} {22,S}
+9  C 0 {8,S} {1,S} {23,S} {24,S}
+10  C 0 {7,S} {25,S} {26,S} {28,S}
+11  H 0 {1,S}
+12  H 0 {2,S}
+13  H 0 {3,S}
+14  H 0 {3,S}
+15  H 0 {4,S}
+16  H 0 {4,S}
+17  H 0 {5,S}
+18  H 0 {5,S}
+19  H 0 {6,S}
+20  H 0 {7,S}
+21  H 0 {8,S}
+22  H 0 {8,S}
+23  H 0 {9,S}
+24  H 0 {9,S}
+25  H 0 {10,S}
+26  H 0 {10,S}
+27  H 0 {1,S}
+28  H 0 {10,S}
+"""))
+species(
+label="InChI=1/C10H18/c1-2-6-9(5-1)10-7-3-4-8-10/h9-10H,1-8H2",
+structure=adjacencyList("""
+1  C 0 {9,S} {10,S} {11,S} {27,S}
+2  C 0 {3,S} {6,S} {12,S} {28,S}
+3  C 0 {2,S} {4,S} {13,S} {14,S}
+4  C 0 {3,S} {5,S} {15,S} {16,S}
+5  C 0 {4,S} {6,S} {17,S} {18,S}
+6  C 0 {5,S} {2,S} {7,S} {19,S}
+7  C 0 {8,S} {10,S} {6,S} {20,S}
+8  C 0 {7,S} {9,S} {21,S} {22,S}
+9  C 0 {8,S} {1,S} {23,S} {24,S}
+10  C 0 {1,S} {7,S} {25,S} {26,S}
+11  H 0 {1,S}
+12  H 0 {2,S}
+13  H 0 {3,S}
+14  H 0 {3,S}
+15  H 0 {4,S}
+16  H 0 {4,S}
+17  H 0 {5,S}
+18  H 0 {5,S}
+19  H 0 {6,S}
+20  H 0 {7,S}
+21  H 0 {8,S}
+22  H 0 {8,S}
+23  H 0 {9,S}
+24  H 0 {9,S}
+25  H 0 {10,S}
+26  H 0 {10,S}
+27  H 0 {1,S}
+28  H 0 {2,S}
+"""))
+species(
+label="InChI=1/C10H18/c1-2-8-6-7-9-4-3-5-10(8)9/h8-10H,2-7H2,1H3",
+structure=adjacencyList("""
+1  C 0 {2,S} {10,S} {11,S} {27,S}
+2  C 0 {1,S} {3,S} {6,S} {12,S}
+3  C 0 {2,S} {4,S} {13,S} {14,S}
+4  C 0 {3,S} {5,S} {15,S} {16,S}
+5  C 0 {4,S} {6,S} {17,S} {18,S}
+6  C 0 {5,S} {2,S} {7,S} {19,S}
+7  C 0 {6,S} {10,S} {8,S} {20,S}
+8  C 0 {7,S} {9,S} {21,S} {22,S}
+9  C 0 {8,S} {23,S} {24,S} {28,S}
+10  C 0 {1,S} {7,S} {25,S} {26,S}
+11  H 0 {1,S}
+12  H 0 {2,S}
+13  H 0 {3,S}
+14  H 0 {3,S}
+15  H 0 {4,S}
+16  H 0 {4,S}
+17  H 0 {5,S}
+18  H 0 {5,S}
+19  H 0 {6,S}
+20  H 0 {7,S}
+21  H 0 {8,S}
+22  H 0 {8,S}
+23  H 0 {9,S}
+24  H 0 {9,S}
+25  H 0 {10,S}
+26  H 0 {10,S}
+27  H 0 {1,S}
+28  H 0 {9,S}
+"""))
+species(
+label="InChI=1/C10H18/c1-2-4-9-6-7-10(8-9)5-3-1/h9-10H,1-8H2",
+structure=adjacencyList("""
+1  C 0 {2,S} {9,S} {10,S} {11,S}
+2  C 0 {1,S} {3,S} {12,S} {27,S}
+3  C 0 {2,S} {4,S} {13,S} {14,S}
+4  C 0 {3,S} {5,S} {15,S} {16,S}
+5  C 0 {4,S} {6,S} {17,S} {18,S}
+6  C 0 {5,S} {7,S} {19,S} {28,S}
+7  C 0 {6,S} {8,S} {10,S} {20,S}
+8  C 0 {7,S} {9,S} {21,S} {22,S}
+9  C 0 {8,S} {1,S} {23,S} {24,S}
+10  C 0 {1,S} {7,S} {25,S} {26,S}
+11  H 0 {1,S}
+12  H 0 {2,S}
+13  H 0 {3,S}
+14  H 0 {3,S}
+15  H 0 {4,S}
+16  H 0 {4,S}
+17  H 0 {5,S}
+18  H 0 {5,S}
+19  H 0 {6,S}
+20  H 0 {7,S}
+21  H 0 {8,S}
+22  H 0 {8,S}
+23  H 0 {9,S}
+24  H 0 {9,S}
+25  H 0 {10,S}
+26  H 0 {10,S}
+27  H 0 {2,S}
+28  H 0 {6,S}
+"""))
+species(
+label="InChI=1/C10H18/c1-2-3-9-6-8-4-5-10(9)7-8/h8-10H,2-7H2,1H3",
+structure=adjacencyList("""
+1  C 0 {2,S} {9,S} {10,S} {11,S}
+2  C 0 {1,S} {6,S} {12,S} {27,S}
+3  C 0 {4,S} {13,S} {14,S} {28,S}
+4  C 0 {3,S} {5,S} {15,S} {16,S}
+5  C 0 {4,S} {6,S} {17,S} {18,S}
+6  C 0 {2,S} {7,S} {5,S} {19,S}
+7  C 0 {6,S} {8,S} {10,S} {20,S}
+8  C 0 {7,S} {9,S} {21,S} {22,S}
+9  C 0 {8,S} {1,S} {23,S} {24,S}
+10  C 0 {1,S} {7,S} {25,S} {26,S}
+11  H 0 {1,S}
+12  H 0 {2,S}
+13  H 0 {3,S}
+14  H 0 {3,S}
+15  H 0 {4,S}
+16  H 0 {4,S}
+17  H 0 {5,S}
+18  H 0 {5,S}
+19  H 0 {6,S}
+20  H 0 {7,S}
+21  H 0 {8,S}
+22  H 0 {8,S}
+23  H 0 {9,S}
+24  H 0 {9,S}
+25  H 0 {10,S}
+26  H 0 {10,S}
+27  H 0 {2,S}
+28  H 0 {3,S}
+"""))
+species(
+label="InChI=1/C10H18/c1-3-10-7(2)8-4-5-9(10)6-8/h7-10H,3-6H2,1-2H3",
+structure=adjacencyList("""
+1  C 0 {2,S} {9,S} {10,S} {11,S}
+2  C 0 {1,S} {6,S} {3,S} {12,S}
+3  C 0 {2,S} {13,S} {14,S} {27,S}
+4  C 0 {5,S} {15,S} {16,S} {28,S}
+5  C 0 {4,S} {6,S} {17,S} {18,S}
+6  C 0 {2,S} {7,S} {5,S} {19,S}
+7  C 0 {6,S} {8,S} {10,S} {20,S}
+8  C 0 {7,S} {9,S} {21,S} {22,S}
+9  C 0 {8,S} {1,S} {23,S} {24,S}
+10  C 0 {1,S} {7,S} {25,S} {26,S}
+11  H 0 {1,S}
+12  H 0 {2,S}
+13  H 0 {3,S}
+14  H 0 {3,S}
+15  H 0 {4,S}
+16  H 0 {4,S}
+17  H 0 {5,S}
+18  H 0 {5,S}
+19  H 0 {6,S}
+20  H 0 {7,S}
+21  H 0 {8,S}
+22  H 0 {8,S}
+23  H 0 {9,S}
+24  H 0 {9,S}
+25  H 0 {10,S}
+26  H 0 {10,S}
+27  H 0 {3,S}
+28  H 0 {4,S}
+"""))
+species(
+label="InChI=1/C10H18/c1-7-6-8(2)10-5-3-4-9(7)10/h7-10H,3-6H2,1-2H3",
+structure=adjacencyList("""
+1  C 0 {2,S} {10,S} {9,S} {11,S}
+2  C 0 {1,S} {3,S} {6,S} {12,S}
+3  C 0 {2,S} {4,S} {13,S} {14,S}
+4  C 0 {3,S} {5,S} {15,S} {16,S}
+5  C 0 {4,S} {6,S} {17,S} {18,S}
+6  C 0 {5,S} {2,S} {7,S} {19,S}
+7  C 0 {6,S} {10,S} {8,S} {20,S}
+8  C 0 {7,S} {21,S} {22,S} {27,S}
+9  C 0 {1,S} {23,S} {24,S} {28,S}
+10  C 0 {1,S} {7,S} {25,S} {26,S}
+11  H 0 {1,S}
+12  H 0 {2,S}
+13  H 0 {3,S}
+14  H 0 {3,S}
+15  H 0 {4,S}
+16  H 0 {4,S}
+17  H 0 {5,S}
+18  H 0 {5,S}
+19  H 0 {6,S}
+20  H 0 {7,S}
+21  H 0 {8,S}
+22  H 0 {8,S}
+23  H 0 {9,S}
+24  H 0 {9,S}
+25  H 0 {10,S}
+26  H 0 {10,S}
+27  H 0 {8,S}
+28  H 0 {9,S}
+"""))
+species(
+label="InChI=1/C10H18/c1-3-9(4-2)10-7-5-6-8-10/h3,9-10H,1,4-8H2,2H3",
+structure=adjacencyList("""
+1  C 0 {9,S} {10,S} {11,S} {27,S}
+2  C 0 {3,D} {6,S} {12,S}
+3  C 0 {2,D} {13,S} {14,S}
+4  C 0 {5,S} {15,S} {16,S} {28,S}
+5  C 0 {4,S} {6,S} {17,S} {18,S}
+6  C 0 {5,S} {2,S} {7,S} {19,S}
+7  C 0 {8,S} {10,S} {6,S} {20,S}
+8  C 0 {7,S} {9,S} {21,S} {22,S}
+9  C 0 {8,S} {1,S} {23,S} {24,S}
+10  C 0 {1,S} {7,S} {25,S} {26,S}
+11  H 0 {1,S}
+12  H 0 {2,S}
+13  H 0 {3,S}
+14  H 0 {3,S}
+15  H 0 {4,S}
+16  H 0 {4,S}
+17  H 0 {5,S}
+18  H 0 {5,S}
+19  H 0 {6,S}
+20  H 0 {7,S}
+21  H 0 {8,S}
+22  H 0 {8,S}
+23  H 0 {9,S}
+24  H 0 {9,S}
+25  H 0 {10,S}
+26  H 0 {10,S}
+27  H 0 {1,S}
+28  H 0 {4,S}
+"""))
+species(
+label="InChI=1/C10H18/c1-2-3-4-7-10-8-5-6-9-10/h4,7,10H,2-3,5-6,8-9H2,1H3",
+structure=adjacencyList("""
+1  C 0 {9,S} {10,S} {11,S} {27,S}
+2  C 0 {3,S} {6,D} {12,S}
+3  C 0 {2,S} {4,S} {13,S} {14,S}
+4  C 0 {3,S} {5,S} {15,S} {16,S}
+5  C 0 {4,S} {17,S} {18,S} {28,S}
+6  C 0 {2,D} {7,S} {19,S}
+7  C 0 {8,S} {10,S} {6,S} {20,S}
+8  C 0 {7,S} {9,S} {21,S} {22,S}
+9  C 0 {8,S} {1,S} {23,S} {24,S}
+10  C 0 {1,S} {7,S} {25,S} {26,S}
+11  H 0 {1,S}
+12  H 0 {2,S}
+13  H 0 {3,S}
+14  H 0 {3,S}
+15  H 0 {4,S}
+16  H 0 {4,S}
+17  H 0 {5,S}
+18  H 0 {5,S}
+19  H 0 {6,S}
+20  H 0 {7,S}
+21  H 0 {8,S}
+22  H 0 {8,S}
+23  H 0 {9,S}
+24  H 0 {9,S}
+25  H 0 {10,S}
+26  H 0 {10,S}
+27  H 0 {1,S}
+28  H 0 {5,S}
+"""))
+species(
+label="InChI=1/C10H18/c1-2-3-4-7-10-8-5-6-9-10/h2,10H,1,3-9H2",
+structure=adjacencyList("""
+1  C 0 {10,D} {9,S} {11,S}
+2  C 0 {3,S} {6,S} {12,S} {27,S}
+3  C 0 {2,S} {4,S} {13,S} {14,S}
+4  C 0 {3,S} {5,S} {15,S} {16,S}
+5  C 0 {4,S} {6,S} {17,S} {18,S}
+6  C 0 {5,S} {2,S} {7,S} {19,S}
+7  C 0 {6,S} {8,S} {20,S} {28,S}
+8  C 0 {7,S} {9,S} {21,S} {22,S}
+9  C 0 {8,S} {1,S} {23,S} {24,S}
+10  C 0 {1,D} {25,S} {26,S}
+11  H 0 {1,S}
+12  H 0 {2,S}
+13  H 0 {3,S}
+14  H 0 {3,S}
+15  H 0 {4,S}
+16  H 0 {4,S}
+17  H 0 {5,S}
+18  H 0 {5,S}
+19  H 0 {6,S}
+20  H 0 {7,S}
+21  H 0 {8,S}
+22  H 0 {8,S}
+23  H 0 {9,S}
+24  H 0 {9,S}
+25  H 0 {10,S}
+26  H 0 {10,S}
+27  H 0 {2,S}
+28  H 0 {7,S}
+"""))
+species(
+label="InChI=1/C10H18/c1-3-6-9(2)10-7-4-5-8-10/h3,9-10H,1,4-8H2,2H3",
+structure=adjacencyList("""
+1  C 0 {9,D} {10,S} {11,S}
+2  C 0 {3,S} {6,S} {12,S} {27,S}
+3  C 0 {2,S} {4,S} {13,S} {14,S}
+4  C 0 {3,S} {5,S} {15,S} {16,S}
+5  C 0 {4,S} {6,S} {17,S} {18,S}
+6  C 0 {5,S} {2,S} {7,S} {19,S}
+7  C 0 {6,S} {8,S} {10,S} {20,S}
+8  C 0 {7,S} {21,S} {22,S} {28,S}
+9  C 0 {1,D} {23,S} {24,S}
+10  C 0 {1,S} {7,S} {25,S} {26,S}
+11  H 0 {1,S}
+12  H 0 {2,S}
+13  H 0 {3,S}
+14  H 0 {3,S}
+15  H 0 {4,S}
+16  H 0 {4,S}
+17  H 0 {5,S}
+18  H 0 {5,S}
+19  H 0 {6,S}
+20  H 0 {7,S}
+21  H 0 {8,S}
+22  H 0 {8,S}
+23  H 0 {9,S}
+24  H 0 {9,S}
+25  H 0 {10,S}
+26  H 0 {10,S}
+27  H 0 {2,S}
+28  H 0 {8,S}
+"""))
+species(
+label="InChI=1/C10H18/c1-3-6-10-8-5-7-9(10)4-2/h5,8-10H,3-4,6-7H2,1-2H3",
+structure=adjacencyList("""
+1  C 0 {2,D} {10,S} {11,S}
+2  C 0 {1,D} {6,S} {12,S}
+3  C 0 {4,S} {13,S} {14,S} {27,S}
+4  C 0 {3,S} {5,S} {15,S} {16,S}
+5  C 0 {4,S} {6,S} {17,S} {18,S}
+6  C 0 {2,S} {7,S} {5,S} {19,S}
+7  C 0 {6,S} {10,S} {8,S} {20,S}
+8  C 0 {7,S} {9,S} {21,S} {22,S}
+9  C 0 {8,S} {23,S} {24,S} {28,S}
+10  C 0 {1,S} {7,S} {25,S} {26,S}
+11  H 0 {1,S}
+12  H 0 {2,S}
+13  H 0 {3,S}
+14  H 0 {3,S}
+15  H 0 {4,S}
+16  H 0 {4,S}
+17  H 0 {5,S}
+18  H 0 {5,S}
+19  H 0 {6,S}
+20  H 0 {7,S}
+21  H 0 {8,S}
+22  H 0 {8,S}
+23  H 0 {9,S}
+24  H 0 {9,S}
+25  H 0 {10,S}
+26  H 0 {10,S}
+27  H 0 {3,S}
+28  H 0 {9,S}
+"""))
+species(
+label="InChI=1/C10H18/c1-2-10-8-6-4-3-5-7-9-10/h4,6,10H,2-3,5,7-9H2,1H3",
+structure=adjacencyList("""
+1  C 0 {2,D} {10,S} {11,S}
+2  C 0 {1,D} {3,S} {12,S}
+3  C 0 {2,S} {4,S} {13,S} {14,S}
+4  C 0 {3,S} {5,S} {15,S} {16,S}
+5  C 0 {4,S} {6,S} {17,S} {18,S}
+6  C 0 {5,S} {7,S} {19,S} {27,S}
+7  C 0 {6,S} {10,S} {8,S} {20,S}
+8  C 0 {7,S} {9,S} {21,S} {22,S}
+9  C 0 {8,S} {23,S} {24,S} {28,S}
+10  C 0 {1,S} {7,S} {25,S} {26,S}
+11  H 0 {1,S}
+12  H 0 {2,S}
+13  H 0 {3,S}
+14  H 0 {3,S}
+15  H 0 {4,S}
+16  H 0 {4,S}
+17  H 0 {5,S}
+18  H 0 {5,S}
+19  H 0 {6,S}
+20  H 0 {7,S}
+21  H 0 {8,S}
+22  H 0 {8,S}
+23  H 0 {9,S}
+24  H 0 {9,S}
+25  H 0 {10,S}
+26  H 0 {10,S}
+27  H 0 {6,S}
+28  H 0 {9,S}
+"""))
+species(
+label="InChI=1/C10H18/c1-3-6-10-8-5-7-9(10)4-2/h4,9-10H,2-3,5-8H2,1H3",
+structure=adjacencyList("""
+1  C 0 {10,D} {2,S} {11,S}
+2  C 0 {3,S} {6,S} {1,S} {12,S}
+3  C 0 {2,S} {4,S} {13,S} {14,S}
+4  C 0 {3,S} {5,S} {15,S} {16,S}
+5  C 0 {4,S} {6,S} {17,S} {18,S}
+6  C 0 {5,S} {2,S} {7,S} {19,S}
+7  C 0 {6,S} {8,S} {20,S} {27,S}
+8  C 0 {7,S} {9,S} {21,S} {22,S}
+9  C 0 {8,S} {23,S} {24,S} {28,S}
+10  C 0 {1,D} {25,S} {26,S}
+11  H 0 {1,S}
+12  H 0 {2,S}
+13  H 0 {3,S}
+14  H 0 {3,S}
+15  H 0 {4,S}
+16  H 0 {4,S}
+17  H 0 {5,S}
+18  H 0 {5,S}
+19  H 0 {6,S}
+20  H 0 {7,S}
+21  H 0 {8,S}
+22  H 0 {8,S}
+23  H 0 {9,S}
+24  H 0 {9,S}
+25  H 0 {10,S}
+26  H 0 {10,S}
+27  H 0 {7,S}
+28  H 0 {9,S}
+"""))
+species(
+label="InChI=1/C10H18/c1-8-6-7-10(8)9-4-2-3-5-9/h8-10H,2-7H2,1H3",
+structure=adjacencyList("""
+1  C 0 {2,S} {5,S} {6,S} {14,S}
+2  C 0 {1,S} {3,S} {7,S} {8,S}
+3  C 0 {2,S} {4,S} {9,S} {10,S}
+4  C 0 {3,S} {5,S} {11,S} {12,S}
+5  C 0 {4,S} {1,S} {13,S} {27,S}
+6  H 0 {1,S}
+7  H 0 {2,S}
+8  H 0 {2,S}
+9  H 0 {3,S}
+10  H 0 {3,S}
+11  H 0 {4,S}
+12  H 0 {4,S}
+13  H 0 {5,S}
+14  C 0 {17,S} {15,S} {19,S} {1,S}
+15  C 0 {16,S} {14,S} {18,S} {20,S}
+16  C 0 {15,S} {17,S} {21,S} {22,S}
+17  C 0 {16,S} {14,S} {23,S} {24,S}
+18  C 0 {15,S} {25,S} {26,S} {28,S}
+19  H 0 {14,S}
+20  H 0 {15,S}
+21  H 0 {16,S}
+22  H 0 {16,S}
+23  H 0 {17,S}
+24  H 0 {17,S}
+25  H 0 {18,S}
+26  H 0 {18,S}
+27  H 0 {5,S}
+28  H 0 {18,S}
+"""))
+species(
+label="InChI=1/C10H18/c1-2-5-9(4-1)8-10-6-3-7-10/h9-10H,1-8H2",
+structure=adjacencyList("""
+1  C 0 {2,S} {5,S} {6,S} {18,S}
+2  C 0 {1,S} {3,S} {7,S} {8,S}
+3  C 0 {2,S} {4,S} {9,S} {10,S}
+4  C 0 {3,S} {5,S} {11,S} {12,S}
+5  C 0 {4,S} {1,S} {13,S} {27,S}
+6  H 0 {1,S}
+7  H 0 {2,S}
+8  H 0 {2,S}
+9  H 0 {3,S}
+10  H 0 {3,S}
+11  H 0 {4,S}
+12  H 0 {4,S}
+13  H 0 {5,S}
+14  C 0 {17,S} {15,S} {19,S} {28,S}
+15  C 0 {16,S} {14,S} {18,S} {20,S}
+16  C 0 {15,S} {17,S} {21,S} {22,S}
+17  C 0 {16,S} {14,S} {23,S} {24,S}
+18  C 0 {15,S} {25,S} {26,S} {1,S}
+19  H 0 {14,S}
+20  H 0 {15,S}
+21  H 0 {16,S}
+22  H 0 {16,S}
+23  H 0 {17,S}
+24  H 0 {17,S}
+25  H 0 {18,S}
+26  H 0 {18,S}
+27  H 0 {5,S}
+28  H 0 {14,S}
+"""))
+species(
+label="InChI=1/C10H18/c1-7-3-5-9(7)10-6-4-8(10)2/h7-10H,3-6H2,1-2H3",
+structure=adjacencyList("""
+1  C 0 {4,S} {2,S} {6,S} {14,S}
+2  C 0 {3,S} {1,S} {5,S} {7,S}
+3  C 0 {2,S} {4,S} {8,S} {9,S}
+4  C 0 {3,S} {1,S} {10,S} {11,S}
+5  C 0 {2,S} {12,S} {13,S} {27,S}
+6  H 0 {1,S}
+7  H 0 {2,S}
+8  H 0 {3,S}
+9  H 0 {3,S}
+10  H 0 {4,S}
+11  H 0 {4,S}
+12  H 0 {5,S}
+13  H 0 {5,S}
+14  C 0 {17,S} {15,S} {19,S} {1,S}
+15  C 0 {16,S} {14,S} {18,S} {20,S}
+16  C 0 {15,S} {17,S} {21,S} {22,S}
+17  C 0 {16,S} {14,S} {23,S} {24,S}
+18  C 0 {15,S} {25,S} {26,S} {28,S}
+19  H 0 {14,S}
+20  H 0 {15,S}
+21  H 0 {16,S}
+22  H 0 {16,S}
+23  H 0 {17,S}
+24  H 0 {17,S}
+25  H 0 {18,S}
+26  H 0 {18,S}
+27  H 0 {5,S}
+28  H 0 {18,S}
+"""))
+species(
+label="InChI=1/C10H18/c1-8-5-6-10(8)7-9-3-2-4-9/h8-10H,2-7H2,1H3",
+structure=adjacencyList("""
+1  C 0 {4,S} {2,S} {6,S} {18,S}
+2  C 0 {3,S} {1,S} {5,S} {7,S}
+3  C 0 {2,S} {4,S} {8,S} {9,S}
+4  C 0 {3,S} {1,S} {10,S} {11,S}
+5  C 0 {2,S} {12,S} {13,S} {27,S}
+6  H 0 {1,S}
+7  H 0 {2,S}
+8  H 0 {3,S}
+9  H 0 {3,S}
+10  H 0 {4,S}
+11  H 0 {4,S}
+12  H 0 {5,S}
+13  H 0 {5,S}
+14  C 0 {17,S} {15,S} {19,S} {28,S}
+15  C 0 {16,S} {14,S} {18,S} {20,S}
+16  C 0 {15,S} {17,S} {21,S} {22,S}
+17  C 0 {16,S} {14,S} {23,S} {24,S}
+18  C 0 {15,S} {25,S} {26,S} {1,S}
+19  H 0 {14,S}
+20  H 0 {15,S}
+21  H 0 {16,S}
+22  H 0 {16,S}
+23  H 0 {17,S}
+24  H 0 {17,S}
+25  H 0 {18,S}
+26  H 0 {18,S}
+27  H 0 {5,S}
+28  H 0 {14,S}
+"""))
+species(
+label="InChI=1/C10H18/c1-3-9(4-1)7-8-10-5-2-6-10/h9-10H,1-8H2",
+structure=adjacencyList("""
+1  C 0 {4,S} {2,S} {6,S} {27,S}
+2  C 0 {3,S} {1,S} {5,S} {7,S}
+3  C 0 {2,S} {4,S} {8,S} {9,S}
+4  C 0 {3,S} {1,S} {10,S} {11,S}
+5  C 0 {2,S} {12,S} {13,S} {18,S}
+6  H 0 {1,S}
+7  H 0 {2,S}
+8  H 0 {3,S}
+9  H 0 {3,S}
+10  H 0 {4,S}
+11  H 0 {4,S}
+12  H 0 {5,S}
+13  H 0 {5,S}
+14  C 0 {17,S} {15,S} {19,S} {28,S}
+15  C 0 {16,S} {14,S} {18,S} {20,S}
+16  C 0 {15,S} {17,S} {21,S} {22,S}
+17  C 0 {16,S} {14,S} {23,S} {24,S}
+18  C 0 {15,S} {25,S} {26,S} {5,S}
+19  H 0 {14,S}
+20  H 0 {15,S}
+21  H 0 {16,S}
+22  H 0 {16,S}
+23  H 0 {17,S}
+24  H 0 {17,S}
+25  H 0 {18,S}
+26  H 0 {18,S}
+27  H 0 {1,S}
+28  H 0 {14,S}
+"""))
+species(
+label="InChI=1/C10H18/c1-2-3-4-7-10-8-5-6-9-10/h3-4,10H,2,5-9H2,1H3",
+structure=adjacencyList("""
+1  C 0 {2,S} {5,S} {6,S} {15,S}
+2  C 0 {1,S} {3,S} {7,S} {8,S}
+3  C 0 {2,S} {4,S} {9,S} {10,S}
+4  C 0 {3,S} {5,S} {11,S} {12,S}
+5  C 0 {4,S} {1,S} {13,S} {27,S}
+6  H 0 {1,S}
+7  H 0 {2,S}
+8  H 0 {2,S}
+9  H 0 {3,S}
+10  H 0 {3,S}
+11  H 0 {4,S}
+12  H 0 {4,S}
+13  H 0 {5,S}
+14  C 0 {15,S} {18,D} {19,S}
+15  C 0 {14,S} {20,S} {21,S} {1,S}
+16  C 0 {17,S} {22,S} {23,S} {28,S}
+17  C 0 {16,S} {18,S} {24,S} {25,S}
+18  C 0 {14,D} {17,S} {26,S}
+19  H 0 {14,S}
+20  H 0 {15,S}
+21  H 0 {15,S}
+22  H 0 {16,S}
+23  H 0 {16,S}
+24  H 0 {17,S}
+25  H 0 {17,S}
+26  H 0 {18,S}
+27  H 0 {5,S}
+28  H 0 {16,S}
+"""))
+species(
+label="InChI=1/C10H18/c1-2-3-4-7-10-8-5-6-9-10/h2-3,10H,4-9H2,1H3",
+structure=adjacencyList("""
+1  C 0 {2,S} {5,S} {6,S} {16,S}
+2  C 0 {1,S} {3,S} {7,S} {8,S}
+3  C 0 {2,S} {4,S} {9,S} {10,S}
+4  C 0 {3,S} {5,S} {11,S} {12,S}
+5  C 0 {4,S} {1,S} {13,S} {27,S}
+6  H 0 {1,S}
+7  H 0 {2,S}
+8  H 0 {2,S}
+9  H 0 {3,S}
+10  H 0 {3,S}
+11  H 0 {4,S}
+12  H 0 {4,S}
+13  H 0 {5,S}
+14  C 0 {15,S} {18,D} {19,S}
+15  C 0 {14,S} {20,S} {21,S} {28,S}
+16  C 0 {17,S} {22,S} {23,S} {1,S}
+17  C 0 {16,S} {18,S} {24,S} {25,S}
+18  C 0 {14,D} {17,S} {26,S}
+19  H 0 {14,S}
+20  H 0 {15,S}
+21  H 0 {15,S}
+22  H 0 {16,S}
+23  H 0 {16,S}
+24  H 0 {17,S}
+25  H 0 {17,S}
+26  H 0 {18,S}
+27  H 0 {5,S}
+28  H 0 {15,S}
+"""))
+species(
+label="InChI=1/C10H18/c1-3-4-5-6-10-8-7-9(10)2/h4-5,9-10H,3,6-8H2,1-2H3",
+structure=adjacencyList("""
+1  C 0 {2,S} {5,D} {6,S}
+2  C 0 {1,S} {7,S} {8,S} {14,S}
+3  C 0 {4,S} {9,S} {10,S} {27,S}
+4  C 0 {3,S} {5,S} {11,S} {12,S}
+5  C 0 {1,D} {4,S} {13,S}
+6  H 0 {1,S}
+7  H 0 {2,S}
+8  H 0 {2,S}
+9  H 0 {3,S}
+10  H 0 {3,S}
+11  H 0 {4,S}
+12  H 0 {4,S}
+13  H 0 {5,S}
+14  C 0 {17,S} {15,S} {19,S} {2,S}
+15  C 0 {16,S} {14,S} {18,S} {20,S}
+16  C 0 {15,S} {17,S} {21,S} {22,S}
+17  C 0 {16,S} {14,S} {23,S} {24,S}
+18  C 0 {15,S} {25,S} {26,S} {28,S}
+19  H 0 {14,S}
+20  H 0 {15,S}
+21  H 0 {16,S}
+22  H 0 {16,S}
+23  H 0 {17,S}
+24  H 0 {17,S}
+25  H 0 {18,S}
+26  H 0 {18,S}
+27  H 0 {3,S}
+28  H 0 {18,S}
+"""))
+species(
+label="InChI=1/C10H18/c1-2-3-4-5-7-10-8-6-9-10/h3-4,10H,2,5-9H2,1H3",
+structure=adjacencyList("""
+1  C 0 {2,S} {5,D} {6,S}
+2  C 0 {1,S} {7,S} {8,S} {18,S}
+3  C 0 {4,S} {9,S} {10,S} {27,S}
+4  C 0 {3,S} {5,S} {11,S} {12,S}
+5  C 0 {1,D} {4,S} {13,S}
+6  H 0 {1,S}
+7  H 0 {2,S}
+8  H 0 {2,S}
+9  H 0 {3,S}
+10  H 0 {3,S}
+11  H 0 {4,S}
+12  H 0 {4,S}
+13  H 0 {5,S}
+14  C 0 {17,S} {15,S} {19,S} {28,S}
+15  C 0 {16,S} {14,S} {18,S} {20,S}
+16  C 0 {15,S} {17,S} {21,S} {22,S}
+17  C 0 {16,S} {14,S} {23,S} {24,S}
+18  C 0 {15,S} {25,S} {26,S} {2,S}
+19  H 0 {14,S}
+20  H 0 {15,S}
+21  H 0 {16,S}
+22  H 0 {16,S}
+23  H 0 {17,S}
+24  H 0 {17,S}
+25  H 0 {18,S}
+26  H 0 {18,S}
+27  H 0 {3,S}
+28  H 0 {14,S}
+"""))
+species(
+label="InChI=1/C10H18/c1-3-4-5-6-10-8-7-9(10)2/h3-4,9-10H,5-8H2,1-2H3",
+structure=adjacencyList("""
+1  C 0 {2,S} {5,D} {6,S}
+2  C 0 {1,S} {7,S} {8,S} {27,S}
+3  C 0 {4,S} {9,S} {10,S} {14,S}
+4  C 0 {3,S} {5,S} {11,S} {12,S}
+5  C 0 {1,D} {4,S} {13,S}
+6  H 0 {1,S}
+7  H 0 {2,S}
+8  H 0 {2,S}
+9  H 0 {3,S}
+10  H 0 {3,S}
+11  H 0 {4,S}
+12  H 0 {4,S}
+13  H 0 {5,S}
+14  C 0 {17,S} {15,S} {19,S} {3,S}
+15  C 0 {16,S} {14,S} {18,S} {20,S}
+16  C 0 {15,S} {17,S} {21,S} {22,S}
+17  C 0 {16,S} {14,S} {23,S} {24,S}
+18  C 0 {15,S} {25,S} {26,S} {28,S}
+19  H 0 {14,S}
+20  H 0 {15,S}
+21  H 0 {16,S}
+22  H 0 {16,S}
+23  H 0 {17,S}
+24  H 0 {17,S}
+25  H 0 {18,S}
+26  H 0 {18,S}
+27  H 0 {2,S}
+28  H 0 {18,S}
+"""))
+species(
+label="InChI=1/C10H18/c1-3-4-5-6-10-8-7-9(10)2/h3,9-10H,1,4-8H2,2H3",
+structure=adjacencyList("""
+1  C 0 {2,D} {5,S} {6,S}
+2  C 0 {1,D} {7,S} {8,S}
+3  C 0 {4,S} {9,S} {10,S} {14,S}
+4  C 0 {3,S} {5,S} {11,S} {12,S}
+5  C 0 {4,S} {1,S} {13,S} {27,S}
+6  H 0 {1,S}
+7  H 0 {2,S}
+8  H 0 {2,S}
+9  H 0 {3,S}
+10  H 0 {3,S}
+11  H 0 {4,S}
+12  H 0 {4,S}
+13  H 0 {5,S}
+14  C 0 {17,S} {15,S} {19,S} {3,S}
+15  C 0 {16,S} {14,S} {18,S} {20,S}
+16  C 0 {15,S} {17,S} {21,S} {22,S}
+17  C 0 {16,S} {14,S} {23,S} {24,S}
+18  C 0 {15,S} {25,S} {26,S} {28,S}
+19  H 0 {14,S}
+20  H 0 {15,S}
+21  H 0 {16,S}
+22  H 0 {16,S}
+23  H 0 {17,S}
+24  H 0 {17,S}
+25  H 0 {18,S}
+26  H 0 {18,S}
+27  H 0 {5,S}
+28  H 0 {18,S}
+"""))
+species(
+label="InChI=1/C10H18/c1-2-3-4-5-7-10-8-6-9-10/h2-3,10H,4-9H2,1H3",
+structure=adjacencyList("""
+1  C 0 {2,S} {5,D} {6,S}
+2  C 0 {1,S} {7,S} {8,S} {27,S}
+3  C 0 {4,S} {9,S} {10,S} {18,S}
+4  C 0 {3,S} {5,S} {11,S} {12,S}
+5  C 0 {1,D} {4,S} {13,S}
+6  H 0 {1,S}
+7  H 0 {2,S}
+8  H 0 {2,S}
+9  H 0 {3,S}
+10  H 0 {3,S}
+11  H 0 {4,S}
+12  H 0 {4,S}
+13  H 0 {5,S}
+14  C 0 {17,S} {15,S} {19,S} {28,S}
+15  C 0 {16,S} {14,S} {18,S} {20,S}
+16  C 0 {15,S} {17,S} {21,S} {22,S}
+17  C 0 {16,S} {14,S} {23,S} {24,S}
+18  C 0 {15,S} {25,S} {26,S} {3,S}
+19  H 0 {14,S}
+20  H 0 {15,S}
+21  H 0 {16,S}
+22  H 0 {16,S}
+23  H 0 {17,S}
+24  H 0 {17,S}
+25  H 0 {18,S}
+26  H 0 {18,S}
+27  H 0 {2,S}
+28  H 0 {14,S}
+"""))
+species(
+label="InChI=1/C10H18/c1-2-3-4-5-7-10-8-6-9-10/h2,10H,1,3-9H2",
+structure=adjacencyList("""
+1  C 0 {2,D} {5,S} {6,S}
+2  C 0 {1,D} {7,S} {8,S}
+3  C 0 {4,S} {9,S} {10,S} {18,S}
+4  C 0 {3,S} {5,S} {11,S} {12,S}
+5  C 0 {4,S} {1,S} {13,S} {27,S}
+6  H 0 {1,S}
+7  H 0 {2,S}
+8  H 0 {2,S}
+9  H 0 {3,S}
+10  H 0 {3,S}
+11  H 0 {4,S}
+12  H 0 {4,S}
+13  H 0 {5,S}
+14  C 0 {17,S} {15,S} {19,S} {28,S}
+15  C 0 {16,S} {14,S} {18,S} {20,S}
+16  C 0 {15,S} {17,S} {21,S} {22,S}
+17  C 0 {16,S} {14,S} {23,S} {24,S}
+18  C 0 {15,S} {25,S} {26,S} {3,S}
+19  H 0 {14,S}
+20  H 0 {15,S}
+21  H 0 {16,S}
+22  H 0 {16,S}
+23  H 0 {17,S}
+24  H 0 {17,S}
+25  H 0 {18,S}
+26  H 0 {18,S}
+27  H 0 {5,S}
+28  H 0 {14,S}
+"""))
+species(
+label="InChI=1/C10H18/c1-4-9(5-2)10-7-6-8(10)3/h4,8-10H,1,5-7H2,2-3H3",
+structure=adjacencyList("""
+1  C 0 {2,D} {5,S} {6,S}
+2  C 0 {1,D} {7,S} {8,S}
+3  C 0 {4,S} {9,S} {10,S} {27,S}
+4  C 0 {3,S} {5,S} {11,S} {12,S}
+5  C 0 {4,S} {1,S} {13,S} {14,S}
+6  H 0 {1,S}
+7  H 0 {2,S}
+8  H 0 {2,S}
+9  H 0 {3,S}
+10  H 0 {3,S}
+11  H 0 {4,S}
+12  H 0 {4,S}
+13  H 0 {5,S}
+14  C 0 {17,S} {15,S} {19,S} {5,S}
+15  C 0 {16,S} {14,S} {18,S} {20,S}
+16  C 0 {15,S} {17,S} {21,S} {22,S}
+17  C 0 {16,S} {14,S} {23,S} {24,S}
+18  C 0 {15,S} {25,S} {26,S} {28,S}
+19  H 0 {14,S}
+20  H 0 {15,S}
+21  H 0 {16,S}
+22  H 0 {16,S}
+23  H 0 {17,S}
+24  H 0 {17,S}
+25  H 0 {18,S}
+26  H 0 {18,S}
+27  H 0 {3,S}
+28  H 0 {18,S}
+"""))
+species(
+label="InChI=1/C10H18/c1-3-9(4-2)8-10-6-5-7-10/h3,9-10H,1,4-8H2,2H3",
+structure=adjacencyList("""
+1  C 0 {2,D} {5,S} {6,S}
+2  C 0 {1,D} {7,S} {8,S}
+3  C 0 {4,S} {9,S} {10,S} {27,S}
+4  C 0 {3,S} {5,S} {11,S} {12,S}
+5  C 0 {4,S} {1,S} {13,S} {18,S}
+6  H 0 {1,S}
+7  H 0 {2,S}
+8  H 0 {2,S}
+9  H 0 {3,S}
+10  H 0 {3,S}
+11  H 0 {4,S}
+12  H 0 {4,S}
+13  H 0 {5,S}
+14  C 0 {17,S} {15,S} {19,S} {28,S}
+15  C 0 {16,S} {14,S} {18,S} {20,S}
+16  C 0 {15,S} {17,S} {21,S} {22,S}
+17  C 0 {16,S} {14,S} {23,S} {24,S}
+18  C 0 {15,S} {25,S} {26,S} {5,S}
+19  H 0 {14,S}
+20  H 0 {15,S}
+21  H 0 {16,S}
+22  H 0 {16,S}
+23  H 0 {17,S}
+24  H 0 {17,S}
+25  H 0 {18,S}
+26  H 0 {18,S}
+27  H 0 {3,S}
+28  H 0 {14,S}
+"""))
+species(
+label="InChI=1/C10H18/c1-3-8-7(2)9-5-4-6-10(8)9/h7-10H,3-6H2,1-2H3",
+structure=adjacencyList("""
+1  C 0 {2,S} {5,S} {14,S} {6,S}
+2  C 0 {1,S} {3,S} {7,S} {8,S}
+3  C 0 {2,S} {4,S} {9,S} {10,S}
+4  C 0 {3,S} {5,S} {11,S} {12,S}
+5  C 0 {4,S} {1,S} {18,S} {13,S}
+6  H 0 {1,S}
+7  H 0 {2,S}
+8  H 0 {2,S}
+9  H 0 {3,S}
+10  H 0 {3,S}
+11  H 0 {4,S}
+12  H 0 {4,S}
+13  H 0 {5,S}
+14  C 0 {18,S} {1,S} {15,S} {19,S}
+15  C 0 {14,S} {20,S} {21,S} {27,S}
+16  C 0 {17,S} {22,S} {23,S} {28,S}
+17  C 0 {16,S} {18,S} {24,S} {25,S}
+18  C 0 {14,S} {5,S} {17,S} {26,S}
+19  H 0 {14,S}
+20  H 0 {15,S}
+21  H 0 {15,S}
+22  H 0 {16,S}
+23  H 0 {16,S}
+24  H 0 {17,S}
+25  H 0 {17,S}
+26  H 0 {18,S}
+27  H 0 {15,S}
+28  H 0 {16,S}
+"""))
+species(
+label="InChI=1/C10H18/c1-2-4-8-7-9-5-3-6-10(8)9/h8-10H,2-7H2,1H3",
+structure=adjacencyList("""
+1  C 0 {2,S} {18,S} {5,S} {6,S}
+2  C 0 {1,S} {14,S} {7,S} {8,S}
+3  C 0 {4,S} {9,S} {10,S} {27,S}
+4  C 0 {3,S} {5,S} {11,S} {12,S}
+5  C 0 {4,S} {1,S} {13,S} {28,S}
+6  H 0 {1,S}
+7  H 0 {2,S}
+8  H 0 {2,S}
+9  H 0 {3,S}
+10  H 0 {3,S}
+11  H 0 {4,S}
+12  H 0 {4,S}
+13  H 0 {5,S}
+14  C 0 {15,S} {18,S} {2,S} {19,S}
+15  C 0 {14,S} {16,S} {20,S} {21,S}
+16  C 0 {15,S} {17,S} {22,S} {23,S}
+17  C 0 {16,S} {18,S} {24,S} {25,S}
+18  C 0 {17,S} {14,S} {1,S} {26,S}
+19  H 0 {14,S}
+20  H 0 {15,S}
+21  H 0 {15,S}
+22  H 0 {16,S}
+23  H 0 {16,S}
+24  H 0 {17,S}
+25  H 0 {17,S}
+26  H 0 {18,S}
+27  H 0 {3,S}
+28  H 0 {5,S}
+"""))
+species(
+label="InChI=1/C10H18/c1-8(10-6-7-10)9-4-2-3-5-9/h8-10H,2-7H2,1H3",
+structure=adjacencyList("""
+1  C 0 {2,S} {5,S} {6,S} {14,S}
+2  C 0 {1,S} {7,S} {8,S} {27,S}
+3  C 0 {4,S} {5,S} {9,S} {10,S}
+4  C 0 {3,S} {5,S} {11,S} {12,S}
+5  C 0 {4,S} {3,S} {1,S} {13,S}
+6  H 0 {1,S}
+7  H 0 {2,S}
+8  H 0 {2,S}
+9  H 0 {3,S}
+10  H 0 {3,S}
+11  H 0 {4,S}
+12  H 0 {4,S}
+13  H 0 {5,S}
+14  C 0 {17,S} {18,S} {19,S} {1,S}
+15  C 0 {16,S} {18,S} {20,S} {28,S}
+16  C 0 {15,S} {17,S} {21,S} {22,S}
+17  C 0 {16,S} {14,S} {23,S} {24,S}
+18  C 0 {14,S} {15,S} {25,S} {26,S}
+19  H 0 {14,S}
+20  H 0 {15,S}
+21  H 0 {16,S}
+22  H 0 {16,S}
+23  H 0 {17,S}
+24  H 0 {17,S}
+25  H 0 {18,S}
+26  H 0 {18,S}
+27  H 0 {2,S}
+28  H 0 {15,S}
+"""))
+species(
+label="InChI=1/C10H18/c1-2-4-9(3-1)5-6-10-7-8-10/h9-10H,1-8H2",
+structure=adjacencyList("""
+1  C 0 {2,S} {5,S} {6,S} {27,S}
+2  C 0 {1,S} {7,S} {8,S} {14,S}
+3  C 0 {4,S} {5,S} {9,S} {10,S}
+4  C 0 {3,S} {5,S} {11,S} {12,S}
+5  C 0 {4,S} {3,S} {1,S} {13,S}
+6  H 0 {1,S}
+7  H 0 {2,S}
+8  H 0 {2,S}
+9  H 0 {3,S}
+10  H 0 {3,S}
+11  H 0 {4,S}
+12  H 0 {4,S}
+13  H 0 {5,S}
+14  C 0 {17,S} {18,S} {19,S} {2,S}
+15  C 0 {16,S} {18,S} {20,S} {28,S}
+16  C 0 {15,S} {17,S} {21,S} {22,S}
+17  C 0 {16,S} {14,S} {23,S} {24,S}
+18  C 0 {14,S} {15,S} {25,S} {26,S}
+19  H 0 {14,S}
+20  H 0 {15,S}
+21  H 0 {16,S}
+22  H 0 {16,S}
+23  H 0 {17,S}
+24  H 0 {17,S}
+25  H 0 {18,S}
+26  H 0 {18,S}
+27  H 0 {1,S}
+28  H 0 {15,S}
+"""))
+species(
+label="InChI=1/C10H18/c1-7-3-6-10(7)8(2)9-4-5-9/h7-10H,3-6H2,1-2H3",
+structure=adjacencyList("""
+1  C 0 {2,S} {5,S} {6,S} {14,S}
+2  C 0 {1,S} {7,S} {8,S} {27,S}
+3  C 0 {4,S} {5,S} {9,S} {10,S}
+4  C 0 {3,S} {5,S} {11,S} {12,S}
+5  C 0 {4,S} {3,S} {1,S} {13,S}
+6  H 0 {1,S}
+7  H 0 {2,S}
+8  H 0 {2,S}
+9  H 0 {3,S}
+10  H 0 {3,S}
+11  H 0 {4,S}
+12  H 0 {4,S}
+13  H 0 {5,S}
+14  C 0 {17,S} {15,S} {19,S} {1,S}
+15  C 0 {16,S} {14,S} {18,S} {20,S}
+16  C 0 {15,S} {17,S} {21,S} {22,S}
+17  C 0 {16,S} {14,S} {23,S} {24,S}
+18  C 0 {15,S} {25,S} {26,S} {28,S}
+19  H 0 {14,S}
+20  H 0 {15,S}
+21  H 0 {16,S}
+22  H 0 {16,S}
+23  H 0 {17,S}
+24  H 0 {17,S}
+25  H 0 {18,S}
+26  H 0 {18,S}
+27  H 0 {2,S}
+28  H 0 {18,S}
+"""))
+species(
+label="InChI=1/C10H18/c1-8(10-5-6-10)7-9-3-2-4-9/h8-10H,2-7H2,1H3",
+structure=adjacencyList("""
+1  C 0 {2,S} {5,S} {6,S} {18,S}
+2  C 0 {1,S} {7,S} {8,S} {27,S}
+3  C 0 {4,S} {5,S} {9,S} {10,S}
+4  C 0 {3,S} {5,S} {11,S} {12,S}
+5  C 0 {4,S} {3,S} {1,S} {13,S}
+6  H 0 {1,S}
+7  H 0 {2,S}
+8  H 0 {2,S}
+9  H 0 {3,S}
+10  H 0 {3,S}
+11  H 0 {4,S}
+12  H 0 {4,S}
+13  H 0 {5,S}
+14  C 0 {17,S} {15,S} {19,S} {28,S}
+15  C 0 {16,S} {14,S} {18,S} {20,S}
+16  C 0 {15,S} {17,S} {21,S} {22,S}
+17  C 0 {16,S} {14,S} {23,S} {24,S}
+18  C 0 {15,S} {25,S} {26,S} {1,S}
+19  H 0 {14,S}
+20  H 0 {15,S}
+21  H 0 {16,S}
+22  H 0 {16,S}
+23  H 0 {17,S}
+24  H 0 {17,S}
+25  H 0 {18,S}
+26  H 0 {18,S}
+27  H 0 {2,S}
+28  H 0 {14,S}
+"""))
+species(
+label="InChI=1/C10H18/c1-8-2-6-10(8)7-5-9-3-4-9/h8-10H,2-7H2,1H3",
+structure=adjacencyList("""
+1  C 0 {2,S} {5,S} {6,S} {27,S}
+2  C 0 {1,S} {7,S} {8,S} {14,S}
+3  C 0 {4,S} {5,S} {9,S} {10,S}
+4  C 0 {3,S} {5,S} {11,S} {12,S}
+5  C 0 {4,S} {3,S} {1,S} {13,S}
+6  H 0 {1,S}
+7  H 0 {2,S}
+8  H 0 {2,S}
+9  H 0 {3,S}
+10  H 0 {3,S}
+11  H 0 {4,S}
+12  H 0 {4,S}
+13  H 0 {5,S}
+14  C 0 {17,S} {15,S} {19,S} {2,S}
+15  C 0 {16,S} {14,S} {18,S} {20,S}
+16  C 0 {15,S} {17,S} {21,S} {22,S}
+17  C 0 {16,S} {14,S} {23,S} {24,S}
+18  C 0 {15,S} {25,S} {26,S} {28,S}
+19  H 0 {14,S}
+20  H 0 {15,S}
+21  H 0 {16,S}
+22  H 0 {16,S}
+23  H 0 {17,S}
+24  H 0 {17,S}
+25  H 0 {18,S}
+26  H 0 {18,S}
+27  H 0 {1,S}
+28  H 0 {18,S}
+"""))
+species(
+label="InChI=1/C10H18/c1-3-9(4-1)5-2-6-10-7-8-10/h9-10H,1-8H2",
+structure=adjacencyList("""
+1  C 0 {2,S} {5,S} {6,S} {27,S}
+2  C 0 {1,S} {7,S} {8,S} {18,S}
+3  C 0 {4,S} {5,S} {9,S} {10,S}
+4  C 0 {3,S} {5,S} {11,S} {12,S}
+5  C 0 {4,S} {3,S} {1,S} {13,S}
+6  H 0 {1,S}
+7  H 0 {2,S}
+8  H 0 {2,S}
+9  H 0 {3,S}
+10  H 0 {3,S}
+11  H 0 {4,S}
+12  H 0 {4,S}
+13  H 0 {5,S}
+14  C 0 {17,S} {15,S} {19,S} {28,S}
+15  C 0 {16,S} {14,S} {18,S} {20,S}
+16  C 0 {15,S} {17,S} {21,S} {22,S}
+17  C 0 {16,S} {14,S} {23,S} {24,S}
+18  C 0 {15,S} {25,S} {26,S} {2,S}
+19  H 0 {14,S}
+20  H 0 {15,S}
+21  H 0 {16,S}
+22  H 0 {16,S}
+23  H 0 {17,S}
+24  H 0 {17,S}
+25  H 0 {18,S}
+26  H 0 {18,S}
+27  H 0 {1,S}
+28  H 0 {14,S}
+"""))
+species(
+label="InChI=1/C10H18/c1-3-4-5-6-9(2)10-7-8-10/h4-5,9-10H,3,6-8H2,1-2H3",
+structure=adjacencyList("""
+1  C 0 {2,S} {5,S} {6,S} {15,S}
+2  C 0 {1,S} {7,S} {8,S} {27,S}
+3  C 0 {4,S} {5,S} {9,S} {10,S}
+4  C 0 {3,S} {5,S} {11,S} {12,S}
+5  C 0 {4,S} {3,S} {1,S} {13,S}
+6  H 0 {1,S}
+7  H 0 {2,S}
+8  H 0 {2,S}
+9  H 0 {3,S}
+10  H 0 {3,S}
+11  H 0 {4,S}
+12  H 0 {4,S}
+13  H 0 {5,S}
+14  C 0 {15,S} {18,D} {19,S}
+15  C 0 {14,S} {20,S} {21,S} {1,S}
+16  C 0 {17,S} {22,S} {23,S} {28,S}
+17  C 0 {16,S} {18,S} {24,S} {25,S}
+18  C 0 {14,D} {17,S} {26,S}
+19  H 0 {14,S}
+20  H 0 {15,S}
+21  H 0 {15,S}
+22  H 0 {16,S}
+23  H 0 {16,S}
+24  H 0 {17,S}
+25  H 0 {17,S}
+26  H 0 {18,S}
+27  H 0 {2,S}
+28  H 0 {16,S}
+"""))
+species(
+label="InChI=1/C10H18/c1-3-4-5-6-9(2)10-7-8-10/h3-4,9-10H,5-8H2,1-2H3",
+structure=adjacencyList("""
+1  C 0 {2,S} {5,S} {6,S} {16,S}
+2  C 0 {1,S} {7,S} {8,S} {27,S}
+3  C 0 {4,S} {5,S} {9,S} {10,S}
+4  C 0 {3,S} {5,S} {11,S} {12,S}
+5  C 0 {4,S} {3,S} {1,S} {13,S}
+6  H 0 {1,S}
+7  H 0 {2,S}
+8  H 0 {2,S}
+9  H 0 {3,S}
+10  H 0 {3,S}
+11  H 0 {4,S}
+12  H 0 {4,S}
+13  H 0 {5,S}
+14  C 0 {15,S} {18,D} {19,S}
+15  C 0 {14,S} {20,S} {21,S} {28,S}
+16  C 0 {17,S} {22,S} {23,S} {1,S}
+17  C 0 {16,S} {18,S} {24,S} {25,S}
+18  C 0 {14,D} {17,S} {26,S}
+19  H 0 {14,S}
+20  H 0 {15,S}
+21  H 0 {15,S}
+22  H 0 {16,S}
+23  H 0 {16,S}
+24  H 0 {17,S}
+25  H 0 {17,S}
+26  H 0 {18,S}
+27  H 0 {2,S}
+28  H 0 {15,S}
+"""))
+species(
+label="InChI=1/C10H18/c1-3-4-5-6-9(2)10-7-8-10/h3,9-10H,1,4-8H2,2H3",
+structure=adjacencyList("""
+1  C 0 {2,S} {5,S} {6,S} {16,S}
+2  C 0 {1,S} {7,S} {8,S} {27,S}
+3  C 0 {4,S} {5,S} {9,S} {10,S}
+4  C 0 {3,S} {5,S} {11,S} {12,S}
+5  C 0 {4,S} {3,S} {1,S} {13,S}
+6  H 0 {1,S}
+7  H 0 {2,S}
+8  H 0 {2,S}
+9  H 0 {3,S}
+10  H 0 {3,S}
+11  H 0 {4,S}
+12  H 0 {4,S}
+13  H 0 {5,S}
+14  C 0 {15,D} {18,S} {19,S}
+15  C 0 {14,D} {20,S} {21,S}
+16  C 0 {17,S} {22,S} {23,S} {1,S}
+17  C 0 {16,S} {18,S} {24,S} {25,S}
+18  C 0 {17,S} {14,S} {26,S} {28,S}
+19  H 0 {14,S}
+20  H 0 {15,S}
+21  H 0 {15,S}
+22  H 0 {16,S}
+23  H 0 {16,S}
+24  H 0 {17,S}
+25  H 0 {17,S}
+26  H 0 {18,S}
+27  H 0 {2,S}
+28  H 0 {18,S}
+"""))
+species(
+label="InChI=1/C10H18/c1-2-3-4-5-6-7-10-8-9-10/h3-4,10H,2,5-9H2,1H3",
+structure=adjacencyList("""
+1  C 0 {2,S} {5,S} {6,S} {27,S}
+2  C 0 {1,S} {7,S} {8,S} {15,S}
+3  C 0 {4,S} {5,S} {9,S} {10,S}
+4  C 0 {3,S} {5,S} {11,S} {12,S}
+5  C 0 {4,S} {3,S} {1,S} {13,S}
+6  H 0 {1,S}
+7  H 0 {2,S}
+8  H 0 {2,S}
+9  H 0 {3,S}
+10  H 0 {3,S}
+11  H 0 {4,S}
+12  H 0 {4,S}
+13  H 0 {5,S}
+14  C 0 {15,S} {18,D} {19,S}
+15  C 0 {14,S} {20,S} {21,S} {2,S}
+16  C 0 {17,S} {22,S} {23,S} {28,S}
+17  C 0 {16,S} {18,S} {24,S} {25,S}
+18  C 0 {14,D} {17,S} {26,S}
+19  H 0 {14,S}
+20  H 0 {15,S}
+21  H 0 {15,S}
+22  H 0 {16,S}
+23  H 0 {16,S}
+24  H 0 {17,S}
+25  H 0 {17,S}
+26  H 0 {18,S}
+27  H 0 {1,S}
+28  H 0 {16,S}
+"""))
+species(
+label="InChI=1/C10H18/c1-2-3-4-5-6-7-10-8-9-10/h2-3,10H,4-9H2,1H3",
+structure=adjacencyList("""
+1  C 0 {2,S} {5,S} {6,S} {27,S}
+2  C 0 {1,S} {7,S} {8,S} {16,S}
+3  C 0 {4,S} {5,S} {9,S} {10,S}
+4  C 0 {3,S} {5,S} {11,S} {12,S}
+5  C 0 {4,S} {3,S} {1,S} {13,S}
+6  H 0 {1,S}
+7  H 0 {2,S}
+8  H 0 {2,S}
+9  H 0 {3,S}
+10  H 0 {3,S}
+11  H 0 {4,S}
+12  H 0 {4,S}
+13  H 0 {5,S}
+14  C 0 {15,S} {18,D} {19,S}
+15  C 0 {14,S} {20,S} {21,S} {28,S}
+16  C 0 {17,S} {22,S} {23,S} {2,S}
+17  C 0 {16,S} {18,S} {24,S} {25,S}
+18  C 0 {14,D} {17,S} {26,S}
+19  H 0 {14,S}
+20  H 0 {15,S}
+21  H 0 {15,S}
+22  H 0 {16,S}
+23  H 0 {16,S}
+24  H 0 {17,S}
+25  H 0 {17,S}
+26  H 0 {18,S}
+27  H 0 {1,S}
+28  H 0 {15,S}
+"""))
+species(
+label="InChI=1/C10H18/c1-2-3-4-5-6-7-10-8-9-10/h2,10H,1,3-9H2",
+structure=adjacencyList("""
+1  C 0 {2,S} {5,S} {6,S} {27,S}
+2  C 0 {1,S} {7,S} {8,S} {16,S}
+3  C 0 {4,S} {5,S} {9,S} {10,S}
+4  C 0 {3,S} {5,S} {11,S} {12,S}
+5  C 0 {4,S} {3,S} {1,S} {13,S}
+6  H 0 {1,S}
+7  H 0 {2,S}
+8  H 0 {2,S}
+9  H 0 {3,S}
+10  H 0 {3,S}
+11  H 0 {4,S}
+12  H 0 {4,S}
+13  H 0 {5,S}
+14  C 0 {15,D} {18,S} {19,S}
+15  C 0 {14,D} {20,S} {21,S}
+16  C 0 {17,S} {22,S} {23,S} {2,S}
+17  C 0 {16,S} {18,S} {24,S} {25,S}
+18  C 0 {17,S} {14,S} {26,S} {28,S}
+19  H 0 {14,S}
+20  H 0 {15,S}
+21  H 0 {15,S}
+22  H 0 {16,S}
+23  H 0 {16,S}
+24  H 0 {17,S}
+25  H 0 {17,S}
+26  H 0 {18,S}
+27  H 0 {1,S}
+28  H 0 {18,S}
+"""))
+species(
+label="InChI=1/C10H18/c1-4-9(5-2)8(3)10-6-7-10/h4,8-10H,1,5-7H2,2-3H3",
+structure=adjacencyList("""
+1  C 0 {2,S} {5,S} {6,S} {18,S}
+2  C 0 {1,S} {7,S} {8,S} {27,S}
+3  C 0 {4,S} {5,S} {9,S} {10,S}
+4  C 0 {3,S} {5,S} {11,S} {12,S}
+5  C 0 {4,S} {3,S} {1,S} {13,S}
+6  H 0 {1,S}
+7  H 0 {2,S}
+8  H 0 {2,S}
+9  H 0 {3,S}
+10  H 0 {3,S}
+11  H 0 {4,S}
+12  H 0 {4,S}
+13  H 0 {5,S}
+14  C 0 {15,D} {18,S} {19,S}
+15  C 0 {14,D} {20,S} {21,S}
+16  C 0 {17,S} {22,S} {23,S} {28,S}
+17  C 0 {16,S} {18,S} {24,S} {25,S}
+18  C 0 {17,S} {14,S} {26,S} {1,S}
+19  H 0 {14,S}
+20  H 0 {15,S}
+21  H 0 {15,S}
+22  H 0 {16,S}
+23  H 0 {16,S}
+24  H 0 {17,S}
+25  H 0 {17,S}
+26  H 0 {18,S}
+27  H 0 {2,S}
+28  H 0 {16,S}
+"""))
+species(
+label="InChI=1/C10H18/c1-3-9(4-2)5-6-10-7-8-10/h3,9-10H,1,4-8H2,2H3",
+structure=adjacencyList("""
+1  C 0 {2,S} {5,S} {6,S} {27,S}
+2  C 0 {1,S} {7,S} {8,S} {18,S}
+3  C 0 {4,S} {5,S} {9,S} {10,S}
+4  C 0 {3,S} {5,S} {11,S} {12,S}
+5  C 0 {4,S} {3,S} {1,S} {13,S}
+6  H 0 {1,S}
+7  H 0 {2,S}
+8  H 0 {2,S}
+9  H 0 {3,S}
+10  H 0 {3,S}
+11  H 0 {4,S}
+12  H 0 {4,S}
+13  H 0 {5,S}
+14  C 0 {15,D} {18,S} {19,S}
+15  C 0 {14,D} {20,S} {21,S}
+16  C 0 {17,S} {22,S} {23,S} {28,S}
+17  C 0 {16,S} {18,S} {24,S} {25,S}
+18  C 0 {17,S} {14,S} {26,S} {2,S}
+19  H 0 {14,S}
+20  H 0 {15,S}
+21  H 0 {15,S}
+22  H 0 {16,S}
+23  H 0 {16,S}
+24  H 0 {17,S}
+25  H 0 {17,S}
+26  H 0 {18,S}
+27  H 0 {1,S}
+28  H 0 {16,S}
+"""))
+species(
+label="InChI=1/C10H18/c1-3-9-7(2)6-10(9)8-4-5-8/h7-10H,3-6H2,1-2H3",
+structure=adjacencyList("""
+1  C 0 {2,S} {18,S} {5,S} {6,S}
+2  C 0 {1,S} {14,S} {7,S} {8,S}
+3  C 0 {4,S} {5,S} {9,S} {10,S}
+4  C 0 {3,S} {5,S} {11,S} {12,S}
+5  C 0 {4,S} {3,S} {1,S} {13,S}
+6  H 0 {1,S}
+7  H 0 {2,S}
+8  H 0 {2,S}
+9  H 0 {3,S}
+10  H 0 {3,S}
+11  H 0 {4,S}
+12  H 0 {4,S}
+13  H 0 {5,S}
+14  C 0 {18,S} {2,S} {15,S} {19,S}
+15  C 0 {14,S} {20,S} {21,S} {27,S}
+16  C 0 {17,S} {22,S} {23,S} {28,S}
+17  C 0 {16,S} {18,S} {24,S} {25,S}
+18  C 0 {14,S} {1,S} {17,S} {26,S}
+19  H 0 {14,S}
+20  H 0 {15,S}
+21  H 0 {15,S}
+22  H 0 {16,S}
+23  H 0 {16,S}
+24  H 0 {17,S}
+25  H 0 {17,S}
+26  H 0 {18,S}
+27  H 0 {15,S}
+28  H 0 {16,S}
+"""))
+species(
+label="InChI=1/C10H18/c1-3-8-6-10(7(8)2)9-4-5-9/h7-10H,3-6H2,1-2H3",
+structure=adjacencyList("""
+1  C 0 {2,S} {14,S} {5,S} {6,S}
+2  C 0 {1,S} {18,S} {7,S} {8,S}
+3  C 0 {4,S} {5,S} {9,S} {10,S}
+4  C 0 {3,S} {5,S} {11,S} {12,S}
+5  C 0 {4,S} {3,S} {1,S} {13,S}
+6  H 0 {1,S}
+7  H 0 {2,S}
+8  H 0 {2,S}
+9  H 0 {3,S}
+10  H 0 {3,S}
+11  H 0 {4,S}
+12  H 0 {4,S}
+13  H 0 {5,S}
+14  C 0 {18,S} {1,S} {15,S} {19,S}
+15  C 0 {14,S} {20,S} {21,S} {27,S}
+16  C 0 {17,S} {22,S} {23,S} {28,S}
+17  C 0 {16,S} {18,S} {24,S} {25,S}
+18  C 0 {14,S} {2,S} {17,S} {26,S}
+19  H 0 {14,S}
+20  H 0 {15,S}
+21  H 0 {15,S}
+22  H 0 {16,S}
+23  H 0 {16,S}
+24  H 0 {17,S}
+25  H 0 {17,S}
+26  H 0 {18,S}
+27  H 0 {15,S}
+28  H 0 {16,S}
+"""))
+species(
+label="InChI=1/C10H18/c1-2-3-8-6-10(7-8)9-4-5-9/h8-10H,2-7H2,1H3",
+structure=adjacencyList("""
+1  C 0 {2,S} {15,S} {5,S} {6,S}
+2  C 0 {1,S} {14,S} {7,S} {8,S}
+3  C 0 {4,S} {5,S} {9,S} {10,S}
+4  C 0 {3,S} {5,S} {11,S} {12,S}
+5  C 0 {4,S} {3,S} {1,S} {13,S}
+6  H 0 {1,S}
+7  H 0 {2,S}
+8  H 0 {2,S}
+9  H 0 {3,S}
+10  H 0 {3,S}
+11  H 0 {4,S}
+12  H 0 {4,S}
+13  H 0 {5,S}
+14  C 0 {15,S} {2,S} {18,S} {19,S}
+15  C 0 {14,S} {1,S} {20,S} {21,S}
+16  C 0 {17,S} {22,S} {23,S} {27,S}
+17  C 0 {16,S} {18,S} {24,S} {25,S}
+18  C 0 {17,S} {14,S} {26,S} {28,S}
+19  H 0 {14,S}
+20  H 0 {15,S}
+21  H 0 {15,S}
+22  H 0 {16,S}
+23  H 0 {16,S}
+24  H 0 {17,S}
+25  H 0 {17,S}
+26  H 0 {18,S}
+27  H 0 {16,S}
+28  H 0 {18,S}
+"""))
diff --git a/examples/thermoEstimator/scoop/lsf.sh b/examples/thermoEstimator/scoop/lsf.sh
new file mode 100755
index 0000000000..b2061860dd
--- /dev/null
+++ b/examples/thermoEstimator/scoop/lsf.sh
@@ -0,0 +1,59 @@
+#!/bin/sh
+#BSUB -o RMG.out
+#BSUB -J RMGPyScoop
+#BSUB -n 8
+#BSUB -e error_log
+#BSUB -q medium_priority
+
+# This is a job submission file for a LSF queuing system to run
+# the SCOOP-enabled parallel version of RMG-Py across 8 CPUs on
+# a number of different compute nodes on a (potentially heterogeneous) cluster.
+
+source ~/.bash_profile
+
+LAMHOST_FILE=hosts
+
+# start a new host file from scratch
+rm -f $LAMHOST_FILE
+touch $LAMHOST_FILE
+# echo "# LAMMPI host file created by LSF on `date`" >> $LAMHOST_FILE
+# check if we were able to start writing the conf file
+if [ -f $LAMHOST_FILE ]; then
+	:
+else
+	echo "$0: can't create $LAMHOST_FILE"
+	exit 1
+fi
+HOST=""
+NUM_PROC=""
+FLAG=""
+TOTAL_CPUS=0
+for TOKEN in $LSB_MCPU_HOSTS
+do
+	if [ -z "$FLAG" ]; then
+		HOST="$TOKEN"
+		FLAG="0"
+	else
+		NUM_PROC="$TOKEN"
+		TOTAL_CPUS=`expr $TOTAL_CPUS + $NUM_PROC`
+		FLAG="1"
+	fi
+	if [ "$FLAG" = "1" ]; then
+		_x=0
+		while [ $_x -lt $NUM_PROC ]
+		do
+			echo "$HOST" >>$LAMHOST_FILE
+			_x=`expr $_x + 1`
+		done
+		# get ready for the next host
+		FLAG=""
+		HOST=""
+		NUM_PROC=""
+	fi
+done
+# last thing added to LAMHOST_FILE
+#echo "# end of LAMHOST file" >> $LAMHOST_FILE
+echo "Your lamboot hostfile looks like:"
+cat $LAMHOST_FILE
+
+python -m scoop -vv --hostfile $LAMHOST_FILE $RMGpy/thermoEstimator.py input.py > RMG.stdout.log
diff --git a/examples/thermoEstimator/scoop/prolog.sh b/examples/thermoEstimator/scoop/prolog.sh
new file mode 100755
index 0000000000..8fb7afab28
--- /dev/null
+++ b/examples/thermoEstimator/scoop/prolog.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+source ~/.bash_profile
diff --git a/examples/thermoEstimator/scoop/sge.sh b/examples/thermoEstimator/scoop/sge.sh
new file mode 100755
index 0000000000..652e66eee9
--- /dev/null
+++ b/examples/thermoEstimator/scoop/sge.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+#####################i################################################
+# This is a job submission file for a SGE queuing system to run
+# the SCOOP-enabled parallel version of RMG-Py across 48 CPUs on
+# a single node.
+#
+# Define RMGPy as the path to rmg.py in your ~/.bash_profile
+# NSLOTS is an SGE env. variable for total number of CPUs.
+# prolog.sh is a script used by SCOOP to pass env. variables
+#
+# You can run the jobs on different nodes as well, but it is not
+# recommended since you might have problems with SGE job termination.
+# Type `qconf -spl` to see available parallel environments and modify
+# the last #$ line if you really want to run it on many nodes.
+#####################i################################################
+#$ -S /bin/bash
+#$ -cwd
+#$ -notify
+#$ -o job.log -j y
+#$ -N RMGscoop
+#$ -l normal
+#$ -l h_rt=09:05:00 
+#$ -pe singlenode 48
+source ~/.bash_profile
+python -m scoop.__main__ --tunnel --prolog $RMGpy/examples/rmg/scoop/prolog.sh  -n $NSLOTS $RMGpy/thermoEstimator.py input.py > std.out
diff --git a/prolog.sh b/prolog.sh
deleted file mode 100755
index f85d242d58..0000000000
--- a/prolog.sh
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/bin/bash
-#The script that each workers of scoop runs.
-#Should be modified based on user profile.
-source ~/.bashrc
-export RMGQM="/opt/rmgqm"
-#set Gaussian03 environment variables
-g03root=/opt
-GAUSS_SCRDIR=/scratch/$USER
-export g03root GAUSS_SCRDIR
-GAUSS_EXEDIR="$g03root/g03/"
-GAUSS_LEXEDIR="$g03root/g03/linda-exe"
-GAUSS_ARCHDIR="$g03root/g03/arch"
-GMAIN=$GAUSS_EXEDIR/g03
-PATH=$PATH:$GMAIN
-LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$GMAIN
-G03BASIS="$g03root/g03/basis"
-F_ERROPT1="271,271,2,1,2,2,2,2"
-TRAP_FPE="OVERFL=ABORT;DIVZERO=ABORT;INT_OVERFL=ABORT"
-MP_STACK_OVERFLOW="OFF"
-# to partially avoid KAI stupidity
-KMP_DUPLICATE_LIB_OK="TRUE"
-export GAUSS_EXEDIR GAUSS_ARCHDIR PATH GMAIN LD_LIBRARY_PATH F_ERROPT1 TRAP_FPE MP_STACK_OVERFLOW \
-  KMP_DUPLICATE_LIB_OK G03BASIS GAUSS_LEXEDIR
-#set MOPAC
-export MOPAC_LICENSE=/opt/mopac/
-#
-export PYTHONPATH=/home/keceli/RMG/RMG-Py/PyDAS/build/lib.linux-x86_64-2.6:/home/keceli/RMG/RMG-Py/PyDQED:/home/keceli/local/lib/python2.6/site-packages:/opt/rmgqm/RDKit_2013_03_2:/usr/local/lib/python2.6/dist-packages:$PYTHONPATH
-export PATH=/home/keceli/kiler:/home/keceli/bin:/home/keceli/local/bin:/opt/mpich2-1.2.1p1/bin:/opt/intel/Compiler/11.0/074/bin/intel64:/opt/sge/bin/lx24-amd64:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/bin/mh:/opt/g03/g03:$PATH
-export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/intel/Compiler/11.0/074/ipp/em64t/sharedlib:/opt/intel/Compiler/11.0/074/mkl/lib/em64t:/opt/intel/Compiler/11.0/074/tbb/em64t/cc4.1.0_libc2.4_kernel2.6.16.21/lib:/opt/intel/Compiler/11.0/074/lib/intel64:/opt/g03/g03:/usr/local/lib:/opt/rmgqm/RDKit_2013_03_2/bin:/opt/rmgqm/boost_1_44_0/lib:/opt/rmgqm/RDKit_2013_03_2/lib
diff --git a/thermoEstimator.py b/thermoEstimator.py
index 594c8642b9..643d8fe809 100755
--- a/thermoEstimator.py
+++ b/thermoEstimator.py
@@ -17,10 +17,10 @@
 from scoop import futures,shared
 ################################################################################
 def chunks(l, n):
-    """ Yield successive n-sized chunks from l.
-        From http://stackoverflow.com/questions/312443/how-do-you-split-a-list-into-evenly-sized-chunks-in-python
+    """ 
+    Yield successive n-sized chunks from l.
     """
-    for i in xrange(0, len(l), n):
+    for i in range(0, len(l), n):
         yield l[i:i+n]
         
 def runThermoEstimator(inputFile):
@@ -41,17 +41,17 @@ def runThermoEstimator(inputFile):
     # ThermoLibrary format with values for H, S, and Cp's.
     output = open(os.path.join(rmg.outputDirectory, 'output.txt'),'wb')
     library = ThermoLibrary(name='Thermo Estimation Library')
- #   for species in rmg.initialSpecies:
- #       species.generateThermoData(rmg.database, quantumMechanics=rmg.reactionModel.quantumMechanics)
     listOfSpecies=rmg.initialSpecies
     chunksize=50
     if rmg.reactionModel.quantumMechanics: logging.debug("qmValue fine @ runThermoEstimator")
     shared.setConst(qmValue=rmg.reactionModel.quantumMechanics)
     for chunk in list(chunks(listOfSpecies,chunksize)):
+        logging.debug("Parallelized section starts...")
+        # There will be no stdout from workers except the main one.
         outputList = futures.map(makeThermoForSpecies, chunk)
-        logging.debug("mapped")
+        logging.debug("Parallelized section ends.")
         for species, thermo in zip(chunk, outputList):
-            logging.debug("specie {0}".format(species.label))
+            logging.debug("Species {0}".format(species.label))
             species.thermo = thermo   
             library.loadEntry(
                 index = len(library.entries) + 1,
@@ -106,7 +106,7 @@ def runThermoEstimator(inputFile):
         makeProfileGraph(stats_file)
         
     else:
-        level = logging.DEBUG
+        level = logging.INFO
         initializeLog(level, 'thermo.log')
-        logging.debug("runThermoEstimator...")
+        logging.debug("runThermoEstimator starts...")
         runThermoEstimator(inputFile)

From e08ce78bc891950d1f88842bb7a2a22395707e40 Mon Sep 17 00:00:00 2001
From: keceli <keceli@gmail.com>
Date: Fri, 15 Nov 2013 10:12:18 -0500
Subject: [PATCH 25/39] Revert "attempt to run RMG without thermolibrary"

Doesn't work.
This reverts commit 384db4e4e3ccfe02e1c1385672f268784a970961.
---
 rmgpy/data/thermo.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/rmgpy/data/thermo.py b/rmgpy/data/thermo.py
index cd3f12c08c..a5b5213d29 100644
--- a/rmgpy/data/thermo.py
+++ b/rmgpy/data/thermo.py
@@ -734,8 +734,6 @@ def getThermoDataFromLibrary(self, species, library):
         
         Returns a tuple: (ThermoData, library, entry)  or None.
         """
-        if library is None:
-            return None
         for label, entry in library.entries.iteritems():
             for molecule in species.molecule:
                 if molecule.isIsomorphic(entry.item) and entry.data is not None:

From f716b5bdceb2929126f8e73f6d2d8d8c7a113508 Mon Sep 17 00:00:00 2001
From: keceli <keceli@gmail.com>
Date: Fri, 15 Nov 2013 10:19:57 -0500
Subject: [PATCH 26/39] Fixed a comment and revert an unnecessary change.

---
 examples/thermoEstimator/run.sh | 2 +-
 rmgpy/qm/molecule.py            | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/thermoEstimator/run.sh b/examples/thermoEstimator/run.sh
index 8294763b04..d8dbeea875 100755
--- a/examples/thermoEstimator/run.sh
+++ b/examples/thermoEstimator/run.sh
@@ -1,4 +1,4 @@
 #!/bin/bash
 
 # Run the thermo estimator on the given thermo input file
-python $RMGpy/thermoEstimator.py input.py
+python ../../thermoEstimator.py input.py
diff --git a/rmgpy/qm/molecule.py b/rmgpy/qm/molecule.py
index fb08d8a4c3..72f5bc7302 100644
--- a/rmgpy/qm/molecule.py
+++ b/rmgpy/qm/molecule.py
@@ -202,7 +202,7 @@ def createGeometry(self):
     
     def generateQMData(self):
         """
-        Calculate the QM data and return a QMData object, or None if it fails.
+        Calculate the QM data somehow and return a CCLibData object, or None if it fails.
         """
         logging.debug("{0} calculation".format(self.__class__.__name__))
         if self.verifyOutputFile():

From dad02b4dc6a845bf91d8fc1d9750387a19c79fd0 Mon Sep 17 00:00:00 2001
From: keceli <keceli@gmail.com>
Date: Sat, 16 Nov 2013 01:26:28 -0500
Subject: [PATCH 27/39] Fixing some problems with makeProfileGraph

All arguments are now optional.

I was having problems with importing gprof2dot:
```
from external.gprof2dot import gprof2dot
```
works on Pharos with python 2.6
```
from external import gprof2dot
```
works on my Mac with python 2.7. There should be a better way of handling it.
---
 external/gprof2dot.py          | 2227 ++++++++++++++++++++++++++++++++
 external/gprof2dot/__init__.py |    0
 makeProfileGraph.py            |   17 +-
 3 files changed, 2237 insertions(+), 7 deletions(-)
 create mode 100755 external/gprof2dot.py
 create mode 100644 external/gprof2dot/__init__.py
 mode change 100644 => 100755 makeProfileGraph.py

diff --git a/external/gprof2dot.py b/external/gprof2dot.py
new file mode 100755
index 0000000000..55eb53ad84
--- /dev/null
+++ b/external/gprof2dot.py
@@ -0,0 +1,2227 @@
+#!/usr/bin/env python
+#
+# Copyright 2008-2009 Jose Fonseca
+#
+# This program is free software: you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+"""Generate a dot graph from the output of several profilers."""
+
+__author__ = "Jose Fonseca"
+
+__version__ = "1.0"
+
+
+import sys
+import math
+import os.path
+import re
+import textwrap
+import optparse
+import xml.parsers.expat
+
+
+try:
+    # Debugging helper module
+    import debug
+except ImportError:
+    pass
+
+
+def percentage(p):
+    return "%.02f%%" % (p*100.0,)
+
+def add(a, b):
+    return a + b
+
+def equal(a, b):
+    if a == b:
+        return a
+    else:
+        return None
+
+def fail(a, b):
+    assert False
+
+
+tol = 2 ** -23
+
+def ratio(numerator, denominator):
+    try:
+        ratio = float(numerator)/float(denominator)
+    except ZeroDivisionError:
+        # 0/0 is undefined, but 1.0 yields more useful results
+        return 1.0
+    if ratio < 0.0:
+        if ratio < -tol:
+            sys.stderr.write('warning: negative ratio (%s/%s)\n' % (numerator, denominator))
+        return 0.0
+    if ratio > 1.0:
+        if ratio > 1.0 + tol:
+            sys.stderr.write('warning: ratio greater than one (%s/%s)\n' % (numerator, denominator))
+        return 1.0
+    return ratio
+
+
+class UndefinedEvent(Exception):
+    """Raised when attempting to get an event which is undefined."""
+    
+    def __init__(self, event):
+        Exception.__init__(self)
+        self.event = event
+
+    def __str__(self):
+        return 'unspecified event %s' % self.event.name
+
+
+class Event(object):
+    """Describe a kind of event, and its basic operations."""
+
+    def __init__(self, name, null, aggregator, formatter = str):
+        self.name = name
+        self._null = null
+        self._aggregator = aggregator
+        self._formatter = formatter
+
+    def __eq__(self, other):
+        return self is other
+
+    def __hash__(self):
+        return id(self)
+
+    def null(self):
+        return self._null
+
+    def aggregate(self, val1, val2):
+        """Aggregate two event values."""
+        assert val1 is not None
+        assert val2 is not None
+        return self._aggregator(val1, val2)
+    
+    def format(self, val):
+        """Format an event value."""
+        assert val is not None
+        return self._formatter(val)
+
+
+MODULE = Event("Module", None, equal)
+PROCESS = Event("Process", None, equal)
+
+CALLS = Event("Calls", 0, add)
+SAMPLES = Event("Samples", 0, add)
+SAMPLES2 = Event("Samples", 0, add)
+
+TIME = Event("Time", 0.0, add, lambda x: '(' + str(x) + ')')
+TIME_RATIO = Event("Time ratio", 0.0, add, lambda x: '(' + percentage(x) + ')')
+TOTAL_TIME = Event("Total time", 0.0, fail)
+TOTAL_TIME_RATIO = Event("Total time ratio", 0.0, fail, percentage)
+
+CALL_RATIO = Event("Call ratio", 0.0, add, percentage)
+
+PRUNE_RATIO = Event("Prune ratio", 0.0, add, percentage)
+
+
+class Object(object):
+    """Base class for all objects in profile which can store events."""
+
+    def __init__(self, events=None):
+        if events is None:
+            self.events = {}
+        else:
+            self.events = events
+
+    def __hash__(self):
+        return id(self)
+
+    def __eq__(self, other):
+        return self is other
+
+    def __contains__(self, event):
+        return event in self.events
+    
+    def __getitem__(self, event):
+        try:
+            return self.events[event]
+        except KeyError:
+            raise UndefinedEvent(event)
+    
+    def __setitem__(self, event, value):
+        if value is None:
+            if event in self.events:
+                del self.events[event]
+        else:
+            self.events[event] = value
+
+
+class Call(Object):
+    """A call between functions.
+    
+    There should be at most one call object for every pair of functions.
+    """
+
+    def __init__(self, callee_id):
+        Object.__init__(self)
+        self.callee_id = callee_id    
+
+
+class Function(Object):
+    """A function."""
+
+    def __init__(self, id, name):
+        Object.__init__(self)
+        self.id = id
+        self.name = name
+        self.calls = {}
+        self.cycle = None
+    
+    def add_call(self, call):
+        if call.callee_id in self.calls:
+            sys.stderr.write('warning: overwriting call from function %s to %s\n' % (str(self.id), str(call.callee_id)))
+        self.calls[call.callee_id] = call
+
+    # TODO: write utility functions
+
+    def __repr__(self):
+        return self.name
+
+
+class Cycle(Object):
+    """A cycle made from recursive function calls."""
+
+    def __init__(self):
+        Object.__init__(self)
+        # XXX: Do cycles need an id?
+        self.functions = set()
+
+    def add_function(self, function):
+        assert function not in self.functions
+        self.functions.add(function)
+        # XXX: Aggregate events?
+        if function.cycle is not None:
+            for other in function.cycle.functions:
+                if function not in self.functions:
+                    self.add_function(other)
+        function.cycle = self
+
+
+class Profile(Object):
+    """The whole profile."""
+
+    def __init__(self):
+        Object.__init__(self)
+        self.functions = {}
+        self.cycles = []
+
+    def add_function(self, function):
+        if function.id in self.functions:
+            sys.stderr.write('warning: overwriting function %s (id %s)\n' % (function.name, str(function.id)))
+        self.functions[function.id] = function
+
+    def add_cycle(self, cycle):
+        self.cycles.append(cycle)
+
+    def validate(self):
+        """Validate the edges."""
+
+        for function in self.functions.itervalues():
+            for callee_id in function.calls.keys():
+                assert function.calls[callee_id].callee_id == callee_id
+                if callee_id not in self.functions:
+                    sys.stderr.write('warning: call to undefined function %s from function %s\n' % (str(callee_id), function.name))
+                    del function.calls[callee_id]
+
+    def find_cycles(self):
+        """Find cycles using Tarjan's strongly connected components algorithm."""
+
+        # Apply the Tarjan's algorithm successively until all functions are visited
+        visited = set()
+        for function in self.functions.itervalues():
+            if function not in visited:
+                self._tarjan(function, 0, [], {}, {}, visited)
+        cycles = []
+        for function in self.functions.itervalues():
+            if function.cycle is not None and function.cycle not in cycles:
+                cycles.append(function.cycle)
+        self.cycles = cycles
+        if 0:
+            for cycle in cycles:
+                sys.stderr.write("Cycle:\n")
+                for member in cycle.functions:
+                    sys.stderr.write("\tFunction %s\n" % member.name)
+    
+    def _tarjan(self, function, order, stack, orders, lowlinks, visited):
+        """Tarjan's strongly connected components algorithm.
+
+        See also:
+        - http://en.wikipedia.org/wiki/Tarjan's_strongly_connected_components_algorithm
+        """
+
+        visited.add(function)
+        orders[function] = order
+        lowlinks[function] = order
+        order += 1
+        pos = len(stack)
+        stack.append(function)
+        for call in function.calls.itervalues():
+            callee = self.functions[call.callee_id]
+            # TODO: use a set to optimize lookup
+            if callee not in orders:
+                order = self._tarjan(callee, order, stack, orders, lowlinks, visited)
+                lowlinks[function] = min(lowlinks[function], lowlinks[callee])
+            elif callee in stack:
+                lowlinks[function] = min(lowlinks[function], orders[callee])
+        if lowlinks[function] == orders[function]:
+            # Strongly connected component found
+            members = stack[pos:]
+            del stack[pos:]
+            if len(members) > 1:
+                cycle = Cycle()
+                for member in members:
+                    cycle.add_function(member)
+        return order
+
+    def call_ratios(self, event):
+        # Aggregate for incoming calls
+        cycle_totals = {}
+        for cycle in self.cycles:
+            cycle_totals[cycle] = 0.0
+        function_totals = {}
+        for function in self.functions.itervalues():
+            function_totals[function] = 0.0
+        for function in self.functions.itervalues():
+            for call in function.calls.itervalues():
+                if call.callee_id != function.id:
+                    callee = self.functions[call.callee_id]
+                    function_totals[callee] += call[event]
+                    if callee.cycle is not None and callee.cycle is not function.cycle:
+                        cycle_totals[callee.cycle] += call[event]
+
+        # Compute the ratios
+        for function in self.functions.itervalues():
+            for call in function.calls.itervalues():
+                assert CALL_RATIO not in call
+                if call.callee_id != function.id:
+                    callee = self.functions[call.callee_id]
+                    if callee.cycle is not None and callee.cycle is not function.cycle:
+                        total = cycle_totals[callee.cycle]
+                    else:
+                        total = function_totals[callee]
+                    call[CALL_RATIO] = ratio(call[event], total)
+
+    def integrate(self, outevent, inevent):
+        """Propagate function time ratio allong the function calls.
+
+        Must be called after finding the cycles.
+
+        See also:
+        - http://citeseer.ist.psu.edu/graham82gprof.html
+        """
+
+        # Sanity checking
+        assert outevent not in self
+        for function in self.functions.itervalues():
+            assert outevent not in function
+            assert inevent in function
+            for call in function.calls.itervalues():
+                assert outevent not in call
+                if call.callee_id != function.id:
+                    assert CALL_RATIO in call
+
+        # Aggregate the input for each cycle 
+        for cycle in self.cycles:
+            total = inevent.null()
+            for function in self.functions.itervalues():
+                total = inevent.aggregate(total, function[inevent])
+            self[inevent] = total
+
+        # Integrate along the edges
+        total = inevent.null()
+        for function in self.functions.itervalues():
+            total = inevent.aggregate(total, function[inevent])
+            self._integrate_function(function, outevent, inevent)
+        self[outevent] = total
+
+    def _integrate_function(self, function, outevent, inevent):
+        if function.cycle is not None:
+            return self._integrate_cycle(function.cycle, outevent, inevent)
+        else:
+            if outevent not in function:
+                total = function[inevent]
+                for call in function.calls.itervalues():
+                    if call.callee_id != function.id:
+                        total += self._integrate_call(call, outevent, inevent)
+                function[outevent] = total
+            return function[outevent]
+    
+    def _integrate_call(self, call, outevent, inevent):
+        assert outevent not in call
+        assert CALL_RATIO in call
+        callee = self.functions[call.callee_id]
+        subtotal = call[CALL_RATIO]*self._integrate_function(callee, outevent, inevent)
+        call[outevent] = subtotal
+        return subtotal
+
+    def _integrate_cycle(self, cycle, outevent, inevent):
+        if outevent not in cycle:
+
+            # Compute the outevent for the whole cycle
+            total = inevent.null()
+            for member in cycle.functions:
+                subtotal = member[inevent]
+                for call in member.calls.itervalues():
+                    callee = self.functions[call.callee_id]
+                    if callee.cycle is not cycle:
+                        subtotal += self._integrate_call(call, outevent, inevent)
+                total += subtotal
+            cycle[outevent] = total
+            
+            # Compute the time propagated to callers of this cycle
+            callees = {}
+            for function in self.functions.itervalues():
+                if function.cycle is not cycle:
+                    for call in function.calls.itervalues():
+                        callee = self.functions[call.callee_id]
+                        if callee.cycle is cycle:
+                            try:
+                                callees[callee] += call[CALL_RATIO]
+                            except KeyError:
+                                callees[callee] = call[CALL_RATIO]
+            
+            for member in cycle.functions:
+                member[outevent] = outevent.null()
+
+            for callee, call_ratio in callees.iteritems():
+                ranks = {}
+                call_ratios = {}
+                partials = {}
+                self._rank_cycle_function(cycle, callee, 0, ranks)
+                self._call_ratios_cycle(cycle, callee, ranks, call_ratios, set())
+                partial = self._integrate_cycle_function(cycle, callee, call_ratio, partials, ranks, call_ratios, outevent, inevent)
+                assert partial == max(partials.values())
+                assert not total or abs(1.0 - partial/(call_ratio*total)) <= 0.001
+
+        return cycle[outevent]
+
+    def _rank_cycle_function(self, cycle, function, rank, ranks):
+        if function not in ranks or ranks[function] > rank:
+            ranks[function] = rank
+            for call in function.calls.itervalues():
+                if call.callee_id != function.id:
+                    callee = self.functions[call.callee_id]
+                    if callee.cycle is cycle:
+                        self._rank_cycle_function(cycle, callee, rank + 1, ranks)
+
+    def _call_ratios_cycle(self, cycle, function, ranks, call_ratios, visited):
+        if function not in visited:
+            visited.add(function)
+            for call in function.calls.itervalues():
+                if call.callee_id != function.id:
+                    callee = self.functions[call.callee_id]
+                    if callee.cycle is cycle:
+                        if ranks[callee] > ranks[function]:
+                            call_ratios[callee] = call_ratios.get(callee, 0.0) + call[CALL_RATIO]
+                            self._call_ratios_cycle(cycle, callee, ranks, call_ratios, visited)
+
+    def _integrate_cycle_function(self, cycle, function, partial_ratio, partials, ranks, call_ratios, outevent, inevent):
+        if function not in partials:
+            partial = partial_ratio*function[inevent]
+            for call in function.calls.itervalues():
+                if call.callee_id != function.id:
+                    callee = self.functions[call.callee_id]
+                    if callee.cycle is not cycle:
+                        assert outevent in call
+                        partial += partial_ratio*call[outevent]
+                    else:
+                        if ranks[callee] > ranks[function]:
+                            callee_partial = self._integrate_cycle_function(cycle, callee, partial_ratio, partials, ranks, call_ratios, outevent, inevent)
+                            call_ratio = ratio(call[CALL_RATIO], call_ratios[callee])
+                            call_partial = call_ratio*callee_partial
+                            try:
+                                call[outevent] += call_partial
+                            except UndefinedEvent:
+                                call[outevent] = call_partial
+                            partial += call_partial
+            partials[function] = partial
+            try:
+                function[outevent] += partial
+            except UndefinedEvent:
+                function[outevent] = partial
+        return partials[function]
+
+    def aggregate(self, event):
+        """Aggregate an event for the whole profile."""
+
+        total = event.null()
+        for function in self.functions.itervalues():
+            try:
+                total = event.aggregate(total, function[event])
+            except UndefinedEvent:
+                return
+        self[event] = total
+
+    def ratio(self, outevent, inevent):
+        assert outevent not in self
+        assert inevent in self
+        for function in self.functions.itervalues():
+            assert outevent not in function
+            assert inevent in function
+            function[outevent] = ratio(function[inevent], self[inevent])
+            for call in function.calls.itervalues():
+                assert outevent not in call
+                if inevent in call:
+                    call[outevent] = ratio(call[inevent], self[inevent])
+        self[outevent] = 1.0
+
+    def prune(self, node_thres, edge_thres):
+        """Prune the profile"""
+
+        # compute the prune ratios
+        for function in self.functions.itervalues():
+            try:
+                function[PRUNE_RATIO] = function[TOTAL_TIME_RATIO]
+            except UndefinedEvent:
+                pass
+
+            for call in function.calls.itervalues():
+                callee = self.functions[call.callee_id]
+
+                if TOTAL_TIME_RATIO in call:
+                    # handle exact cases first
+                    call[PRUNE_RATIO] = call[TOTAL_TIME_RATIO] 
+                else:
+                    try:
+                        # make a safe estimate
+                        call[PRUNE_RATIO] = min(function[TOTAL_TIME_RATIO], callee[TOTAL_TIME_RATIO]) 
+                    except UndefinedEvent:
+                        pass
+
+        # prune the nodes
+        for function_id in self.functions.keys():
+            function = self.functions[function_id]
+            try:
+                if function[PRUNE_RATIO] < node_thres:
+                    del self.functions[function_id]
+            except UndefinedEvent:
+                pass
+
+        # prune the egdes
+        for function in self.functions.itervalues():
+            for callee_id in function.calls.keys():
+                call = function.calls[callee_id]
+                try:
+                    if callee_id not in self.functions or call[PRUNE_RATIO] < edge_thres:
+                        del function.calls[callee_id]
+                except UndefinedEvent:
+                    pass
+    
+    def dump(self):
+        for function in self.functions.itervalues():
+            sys.stderr.write('Function %s:\n' % (function.name,))
+            self._dump_events(function.events)
+            for call in function.calls.itervalues():
+                callee = self.functions[call.callee_id]
+                sys.stderr.write('  Call %s:\n' % (callee.name,))
+                self._dump_events(call.events)
+        for cycle in self.cycles:
+            sys.stderr.write('Cycle:\n')
+            self._dump_events(cycle.events)
+            for function in cycle.functions:
+                sys.stderr.write('  Function %s\n' % (function.name,))
+
+    def _dump_events(self, events):
+        for event, value in events.iteritems():
+            sys.stderr.write('    %s: %s\n' % (event.name, event.format(value)))
+
+
+class Struct:
+    """Masquerade a dictionary with a structure-like behavior."""
+
+    def __init__(self, attrs = None):
+        if attrs is None:
+            attrs = {}
+        self.__dict__['_attrs'] = attrs
+    
+    def __getattr__(self, name):
+        try:
+            return self._attrs[name]
+        except KeyError:
+            raise AttributeError(name)
+
+    def __setattr__(self, name, value):
+        self._attrs[name] = value
+
+    def __str__(self):
+        return str(self._attrs)
+
+    def __repr__(self):
+        return repr(self._attrs)
+    
+
+class ParseError(Exception):
+    """Raised when parsing to signal mismatches."""
+
+    def __init__(self, msg, line):
+        self.msg = msg
+        # TODO: store more source line information
+        self.line = line
+
+    def __str__(self):
+        return '%s: %r' % (self.msg, self.line)
+
+
+class Parser:
+    """Parser interface."""
+
+    def __init__(self):
+        pass
+
+    def parse(self):
+        raise NotImplementedError
+
+    
+class LineParser(Parser):
+    """Base class for parsers that read line-based formats."""
+
+    def __init__(self, file):
+        Parser.__init__(self)
+        self._file = file
+        self.__line = None
+        self.__eof = False
+
+    def readline(self):
+        line = self._file.readline()
+        if not line:
+            self.__line = ''
+            self.__eof = True
+        self.__line = line.rstrip('\r\n')
+
+    def lookahead(self):
+        assert self.__line is not None
+        return self.__line
+
+    def consume(self):
+        assert self.__line is not None
+        line = self.__line
+        self.readline()
+        return line
+
+    def eof(self):
+        assert self.__line is not None
+        return self.__eof
+
+
+XML_ELEMENT_START, XML_ELEMENT_END, XML_CHARACTER_DATA, XML_EOF = range(4)
+
+
+class XmlToken:
+
+    def __init__(self, type, name_or_data, attrs = None, line = None, column = None):
+        assert type in (XML_ELEMENT_START, XML_ELEMENT_END, XML_CHARACTER_DATA, XML_EOF)
+        self.type = type
+        self.name_or_data = name_or_data
+        self.attrs = attrs
+        self.line = line
+        self.column = column
+
+    def __str__(self):
+        if self.type == XML_ELEMENT_START:
+            return '<' + self.name_or_data + ' ...>'
+        if self.type == XML_ELEMENT_END:
+            return '</' + self.name_or_data + '>'
+        if self.type == XML_CHARACTER_DATA:
+            return self.name_or_data
+        if self.type == XML_EOF:
+            return 'end of file'
+        assert 0
+
+
+class XmlTokenizer:
+    """Expat based XML tokenizer."""
+
+    def __init__(self, fp, skip_ws = True):
+        self.fp = fp
+        self.tokens = []
+        self.index = 0
+        self.final = False
+        self.skip_ws = skip_ws
+        
+        self.character_pos = 0, 0
+        self.character_data = ''
+        
+        self.parser = xml.parsers.expat.ParserCreate()
+        self.parser.StartElementHandler  = self.handle_element_start
+        self.parser.EndElementHandler    = self.handle_element_end
+        self.parser.CharacterDataHandler = self.handle_character_data
+    
+    def handle_element_start(self, name, attributes):
+        self.finish_character_data()
+        line, column = self.pos()
+        token = XmlToken(XML_ELEMENT_START, name, attributes, line, column)
+        self.tokens.append(token)
+    
+    def handle_element_end(self, name):
+        self.finish_character_data()
+        line, column = self.pos()
+        token = XmlToken(XML_ELEMENT_END, name, None, line, column)
+        self.tokens.append(token)
+
+    def handle_character_data(self, data):
+        if not self.character_data:
+            self.character_pos = self.pos()
+        self.character_data += data
+    
+    def finish_character_data(self):
+        if self.character_data:
+            if not self.skip_ws or not self.character_data.isspace(): 
+                line, column = self.character_pos
+                token = XmlToken(XML_CHARACTER_DATA, self.character_data, None, line, column)
+                self.tokens.append(token)
+            self.character_data = ''
+    
+    def next(self):
+        size = 16*1024
+        while self.index >= len(self.tokens) and not self.final:
+            self.tokens = []
+            self.index = 0
+            data = self.fp.read(size)
+            self.final = len(data) < size
+            try:
+                self.parser.Parse(data, self.final)
+            except xml.parsers.expat.ExpatError, e:
+                #if e.code == xml.parsers.expat.errors.XML_ERROR_NO_ELEMENTS:
+                if e.code == 3:
+                    pass
+                else:
+                    raise e
+        if self.index >= len(self.tokens):
+            line, column = self.pos()
+            token = XmlToken(XML_EOF, None, None, line, column)
+        else:
+            token = self.tokens[self.index]
+            self.index += 1
+        return token
+
+    def pos(self):
+        return self.parser.CurrentLineNumber, self.parser.CurrentColumnNumber
+
+
+class XmlTokenMismatch(Exception):
+
+    def __init__(self, expected, found):
+        self.expected = expected
+        self.found = found
+
+    def __str__(self):
+        return '%u:%u: %s expected, %s found' % (self.found.line, self.found.column, str(self.expected), str(self.found))
+
+
+class XmlParser(Parser):
+    """Base XML document parser."""
+
+    def __init__(self, fp):
+        Parser.__init__(self)
+        self.tokenizer = XmlTokenizer(fp)
+        self.consume()
+    
+    def consume(self):
+        self.token = self.tokenizer.next()
+
+    def match_element_start(self, name):
+        return self.token.type == XML_ELEMENT_START and self.token.name_or_data == name
+    
+    def match_element_end(self, name):
+        return self.token.type == XML_ELEMENT_END and self.token.name_or_data == name
+
+    def element_start(self, name):
+        while self.token.type == XML_CHARACTER_DATA:
+            self.consume()
+        if self.token.type != XML_ELEMENT_START:
+            raise XmlTokenMismatch(XmlToken(XML_ELEMENT_START, name), self.token)
+        if self.token.name_or_data != name:
+            raise XmlTokenMismatch(XmlToken(XML_ELEMENT_START, name), self.token)
+        attrs = self.token.attrs
+        self.consume()
+        return attrs
+    
+    def element_end(self, name):
+        while self.token.type == XML_CHARACTER_DATA:
+            self.consume()
+        if self.token.type != XML_ELEMENT_END:
+            raise XmlTokenMismatch(XmlToken(XML_ELEMENT_END, name), self.token)
+        if self.token.name_or_data != name:
+            raise XmlTokenMismatch(XmlToken(XML_ELEMENT_END, name), self.token)
+        self.consume()
+
+    def character_data(self, strip = True):
+        data = ''
+        while self.token.type == XML_CHARACTER_DATA:
+            data += self.token.name_or_data
+            self.consume()
+        if strip:
+            data = data.strip()
+        return data
+
+
+class GprofParser(Parser):
+    """Parser for GNU gprof output.
+
+    See also:
+    - Chapter "Interpreting gprof's Output" from the GNU gprof manual
+      http://sourceware.org/binutils/docs-2.18/gprof/Call-Graph.html#Call-Graph
+    - File "cg_print.c" from the GNU gprof source code
+      http://sourceware.org/cgi-bin/cvsweb.cgi/~checkout~/src/gprof/cg_print.c?rev=1.12&cvsroot=src
+    """
+
+    def __init__(self, fp):
+        Parser.__init__(self)
+        self.fp = fp
+        self.functions = {}
+        self.cycles = {}
+
+    def readline(self):
+        line = self.fp.readline()
+        if not line:
+            sys.stderr.write('error: unexpected end of file\n')
+            sys.exit(1)
+        line = line.rstrip('\r\n')
+        return line
+
+    _int_re = re.compile(r'^\d+$')
+    _float_re = re.compile(r'^\d+\.\d+$')
+
+    def translate(self, mo):
+        """Extract a structure from a match object, while translating the types in the process."""
+        attrs = {}
+        groupdict = mo.groupdict()
+        for name, value in groupdict.iteritems():
+            if value is None:
+                value = None
+            elif self._int_re.match(value):
+                value = int(value)
+            elif self._float_re.match(value):
+                value = float(value)
+            attrs[name] = (value)
+        return Struct(attrs)
+
+    _cg_header_re = re.compile(
+        # original gprof header
+        r'^\s+called/total\s+parents\s*$|' +
+        r'^index\s+%time\s+self\s+descendents\s+called\+self\s+name\s+index\s*$|' +
+        r'^\s+called/total\s+children\s*$|' +
+        # GNU gprof header
+        r'^index\s+%\s+time\s+self\s+children\s+called\s+name\s*$'
+    )
+
+    _cg_ignore_re = re.compile(
+        # spontaneous
+        r'^\s+<spontaneous>\s*$|'
+        # internal calls (such as "mcount")
+        r'^.*\((\d+)\)$'
+    )
+
+    _cg_primary_re = re.compile(
+        r'^\[(?P<index>\d+)\]?' + 
+        r'\s+(?P<percentage_time>\d+\.\d+)' + 
+        r'\s+(?P<self>\d+\.\d+)' + 
+        r'\s+(?P<descendants>\d+\.\d+)' + 
+        r'\s+(?:(?P<called>\d+)(?:\+(?P<called_self>\d+))?)?' + 
+        r'\s+(?P<name>\S.*?)' +
+        r'(?:\s+<cycle\s(?P<cycle>\d+)>)?' +
+        r'\s\[(\d+)\]$'
+    )
+
+    _cg_parent_re = re.compile(
+        r'^\s+(?P<self>\d+\.\d+)?' + 
+        r'\s+(?P<descendants>\d+\.\d+)?' + 
+        r'\s+(?P<called>\d+)(?:/(?P<called_total>\d+))?' + 
+        r'\s+(?P<name>\S.*?)' +
+        r'(?:\s+<cycle\s(?P<cycle>\d+)>)?' +
+        r'\s\[(?P<index>\d+)\]$'
+    )
+
+    _cg_child_re = _cg_parent_re
+
+    _cg_cycle_header_re = re.compile(
+        r'^\[(?P<index>\d+)\]?' + 
+        r'\s+(?P<percentage_time>\d+\.\d+)' + 
+        r'\s+(?P<self>\d+\.\d+)' + 
+        r'\s+(?P<descendants>\d+\.\d+)' + 
+        r'\s+(?:(?P<called>\d+)(?:\+(?P<called_self>\d+))?)?' + 
+        r'\s+<cycle\s(?P<cycle>\d+)\sas\sa\swhole>' +
+        r'\s\[(\d+)\]$'
+    )
+
+    _cg_cycle_member_re = re.compile(
+        r'^\s+(?P<self>\d+\.\d+)?' + 
+        r'\s+(?P<descendants>\d+\.\d+)?' + 
+        r'\s+(?P<called>\d+)(?:\+(?P<called_self>\d+))?' + 
+        r'\s+(?P<name>\S.*?)' +
+        r'(?:\s+<cycle\s(?P<cycle>\d+)>)?' +
+        r'\s\[(?P<index>\d+)\]$'
+    )
+
+    _cg_sep_re = re.compile(r'^--+$')
+
+    def parse_function_entry(self, lines):
+        parents = []
+        children = []
+
+        while True:
+            if not lines:
+                sys.stderr.write('warning: unexpected end of entry\n')
+            line = lines.pop(0)
+            if line.startswith('['):
+                break
+        
+            # read function parent line
+            mo = self._cg_parent_re.match(line)
+            if not mo:
+                if self._cg_ignore_re.match(line):
+                    continue
+                sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line)
+            else:
+                parent = self.translate(mo)
+                parents.append(parent)
+
+        # read primary line
+        mo = self._cg_primary_re.match(line)
+        if not mo:
+            sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line)
+            return
+        else:
+            function = self.translate(mo)
+
+        while lines:
+            line = lines.pop(0)
+            
+            # read function subroutine line
+            mo = self._cg_child_re.match(line)
+            if not mo:
+                if self._cg_ignore_re.match(line):
+                    continue
+                sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line)
+            else:
+                child = self.translate(mo)
+                children.append(child)
+        
+        function.parents = parents
+        function.children = children
+
+        self.functions[function.index] = function
+
+    def parse_cycle_entry(self, lines):
+
+        # read cycle header line
+        line = lines[0]
+        mo = self._cg_cycle_header_re.match(line)
+        if not mo:
+            sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line)
+            return
+        cycle = self.translate(mo)
+
+        # read cycle member lines
+        cycle.functions = []
+        for line in lines[1:]:
+            mo = self._cg_cycle_member_re.match(line)
+            if not mo:
+                sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line)
+                continue
+            call = self.translate(mo)
+            cycle.functions.append(call)
+        
+        self.cycles[cycle.cycle] = cycle
+
+    def parse_cg_entry(self, lines):
+        if lines[0].startswith("["):
+            self.parse_cycle_entry(lines)
+        else:
+            self.parse_function_entry(lines)
+
+    def parse_cg(self):
+        """Parse the call graph."""
+
+        # skip call graph header
+        while not self._cg_header_re.match(self.readline()):
+            pass
+        line = self.readline()
+        while self._cg_header_re.match(line):
+            line = self.readline()
+
+        # process call graph entries
+        entry_lines = []
+        while line != '\014': # form feed
+            if line and not line.isspace():
+                if self._cg_sep_re.match(line):
+                    self.parse_cg_entry(entry_lines)
+                    entry_lines = []
+                else:
+                    entry_lines.append(line)            
+            line = self.readline()
+    
+    def parse(self):
+        self.parse_cg()
+        self.fp.close()
+
+        profile = Profile()
+        profile[TIME] = 0.0
+        
+        cycles = {}
+        for index in self.cycles.iterkeys():
+            cycles[index] = Cycle()
+
+        for entry in self.functions.itervalues():
+            # populate the function
+            function = Function(entry.index, entry.name)
+            function[TIME] = entry.self
+            if entry.called is not None:
+                function[CALLS] = entry.called
+            if entry.called_self is not None:
+                call = Call(entry.index)
+                call[CALLS] = entry.called_self
+                function[CALLS] += entry.called_self
+            
+            # populate the function calls
+            for child in entry.children:
+                call = Call(child.index)
+                
+                assert child.called is not None
+                call[CALLS] = child.called
+
+                if child.index not in self.functions:
+                    # NOTE: functions that were never called but were discovered by gprof's 
+                    # static call graph analysis dont have a call graph entry so we need
+                    # to add them here
+                    missing = Function(child.index, child.name)
+                    function[TIME] = 0.0
+                    function[CALLS] = 0
+                    profile.add_function(missing)
+
+                function.add_call(call)
+
+            profile.add_function(function)
+
+            if entry.cycle is not None:
+                try:
+                    cycle = cycles[entry.cycle]
+                except KeyError:
+                    sys.stderr.write('warning: <cycle %u as a whole> entry missing\n' % entry.cycle) 
+                    cycle = Cycle()
+                    cycles[entry.cycle] = cycle
+                cycle.add_function(function)
+
+            profile[TIME] = profile[TIME] + function[TIME]
+
+        for cycle in cycles.itervalues():
+            profile.add_cycle(cycle)
+
+        # Compute derived events
+        profile.validate()
+        profile.ratio(TIME_RATIO, TIME)
+        profile.call_ratios(CALLS)
+        profile.integrate(TOTAL_TIME, TIME)
+        profile.ratio(TOTAL_TIME_RATIO, TOTAL_TIME)
+
+        return profile
+
+
+class OprofileParser(LineParser):
+    """Parser for oprofile callgraph output.
+    
+    See also:
+    - http://oprofile.sourceforge.net/doc/opreport.html#opreport-callgraph
+    """
+
+    _fields_re = {
+        'samples': r'(?P<samples>\d+)',
+        '%': r'(?P<percentage>\S+)',
+        'linenr info': r'(?P<source>\(no location information\)|\S+:\d+)',
+        'image name': r'(?P<image>\S+(?:\s\(tgid:[^)]*\))?)',
+        'app name': r'(?P<application>\S+)',
+        'symbol name': r'(?P<symbol>\(no symbols\)|.+?)',
+    }
+
+    def __init__(self, infile):
+        LineParser.__init__(self, infile)
+        self.entries = {}
+        self.entry_re = None
+
+    def add_entry(self, callers, function, callees):
+        try:
+            entry = self.entries[function.id]
+        except KeyError:
+            self.entries[function.id] = (callers, function, callees)
+        else:
+            callers_total, function_total, callees_total = entry
+            self.update_subentries_dict(callers_total, callers)
+            function_total.samples += function.samples
+            self.update_subentries_dict(callees_total, callees)
+    
+    def update_subentries_dict(self, totals, partials):
+        for partial in partials.itervalues():
+            try:
+                total = totals[partial.id]
+            except KeyError:
+                totals[partial.id] = partial
+            else:
+                total.samples += partial.samples
+        
+    def parse(self):
+        # read lookahead
+        self.readline()
+
+        self.parse_header()
+        while self.lookahead():
+            self.parse_entry()
+
+        profile = Profile()
+
+        reverse_call_samples = {}
+        
+        # populate the profile
+        profile[SAMPLES] = 0
+        for _callers, _function, _callees in self.entries.itervalues():
+            function = Function(_function.id, _function.name)
+            function[SAMPLES] = _function.samples
+            profile.add_function(function)
+            profile[SAMPLES] += _function.samples
+
+            if _function.application:
+                function[PROCESS] = os.path.basename(_function.application)
+            if _function.image:
+                function[MODULE] = os.path.basename(_function.image)
+
+            total_callee_samples = 0
+            for _callee in _callees.itervalues():
+                total_callee_samples += _callee.samples
+
+            for _callee in _callees.itervalues():
+                if not _callee.self:
+                    call = Call(_callee.id)
+                    call[SAMPLES2] = _callee.samples
+                    function.add_call(call)
+                
+        # compute derived data
+        profile.validate()
+        profile.find_cycles()
+        profile.ratio(TIME_RATIO, SAMPLES)
+        profile.call_ratios(SAMPLES2)
+        profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO)
+
+        return profile
+
+    def parse_header(self):
+        while not self.match_header():
+            self.consume()
+        line = self.lookahead()
+        fields = re.split(r'\s\s+', line)
+        entry_re = r'^\s*' + r'\s+'.join([self._fields_re[field] for field in fields]) + r'(?P<self>\s+\[self\])?$'
+        self.entry_re = re.compile(entry_re)
+        self.skip_separator()
+
+    def parse_entry(self):
+        callers = self.parse_subentries()
+        if self.match_primary():
+            function = self.parse_subentry()
+            if function is not None:
+                callees = self.parse_subentries()
+                self.add_entry(callers, function, callees)
+        self.skip_separator()
+
+    def parse_subentries(self):
+        subentries = {}
+        while self.match_secondary():
+            subentry = self.parse_subentry()
+            subentries[subentry.id] = subentry
+        return subentries
+
+    def parse_subentry(self):
+        entry = Struct()
+        line = self.consume()
+        mo = self.entry_re.match(line)
+        if not mo:
+            raise ParseError('failed to parse', line)
+        fields = mo.groupdict()
+        entry.samples = int(fields.get('samples', 0))
+        entry.percentage = float(fields.get('percentage', 0.0))
+        if 'source' in fields and fields['source'] != '(no location information)':
+            source = fields['source']
+            filename, lineno = source.split(':')
+            entry.filename = filename
+            entry.lineno = int(lineno)
+        else:
+            source = ''
+            entry.filename = None
+            entry.lineno = None
+        entry.image = fields.get('image', '')
+        entry.application = fields.get('application', '')
+        if 'symbol' in fields and fields['symbol'] != '(no symbols)':
+            entry.symbol = fields['symbol']
+        else:
+            entry.symbol = ''
+        if entry.symbol.startswith('"') and entry.symbol.endswith('"'):
+            entry.symbol = entry.symbol[1:-1]
+        entry.id = ':'.join((entry.application, entry.image, source, entry.symbol))
+        entry.self = fields.get('self', None) != None
+        if entry.self:
+            entry.id += ':self'
+        if entry.symbol:
+            entry.name = entry.symbol
+        else:
+            entry.name = entry.image
+        return entry
+
+    def skip_separator(self):
+        while not self.match_separator():
+            self.consume()
+        self.consume()
+
+    def match_header(self):
+        line = self.lookahead()
+        return line.startswith('samples')
+
+    def match_separator(self):
+        line = self.lookahead()
+        return line == '-'*len(line)
+
+    def match_primary(self):
+        line = self.lookahead()
+        return not line[:1].isspace()
+    
+    def match_secondary(self):
+        line = self.lookahead()
+        return line[:1].isspace()
+
+
+class SysprofParser(XmlParser):
+
+    def __init__(self, stream):
+        XmlParser.__init__(self, stream)
+
+    def parse(self):
+        objects = {}
+        nodes = {}
+
+        self.element_start('profile')
+        while self.token.type == XML_ELEMENT_START:
+            if self.token.name_or_data == 'objects':
+                assert not objects
+                objects = self.parse_items('objects')
+            elif self.token.name_or_data == 'nodes':
+                assert not nodes
+                nodes = self.parse_items('nodes')
+            else:
+                self.parse_value(self.token.name_or_data)
+        self.element_end('profile')
+
+        return self.build_profile(objects, nodes)
+
+    def parse_items(self, name):
+        assert name[-1] == 's'
+        items = {}
+        self.element_start(name)
+        while self.token.type == XML_ELEMENT_START:
+            id, values = self.parse_item(name[:-1])
+            assert id not in items
+            items[id] = values
+        self.element_end(name)
+        return items
+
+    def parse_item(self, name):
+        attrs = self.element_start(name)
+        id = int(attrs['id'])
+        values = self.parse_values()
+        self.element_end(name)
+        return id, values
+
+    def parse_values(self):
+        values = {}
+        while self.token.type == XML_ELEMENT_START:
+            name = self.token.name_or_data
+            value = self.parse_value(name)
+            assert name not in values
+            values[name] = value
+        return values
+
+    def parse_value(self, tag):
+        self.element_start(tag)
+        value = self.character_data()
+        self.element_end(tag)
+        if value.isdigit():
+            return int(value)
+        if value.startswith('"') and value.endswith('"'):
+            return value[1:-1]
+        return value
+
+    def build_profile(self, objects, nodes):
+        profile = Profile()
+        
+        profile[SAMPLES] = 0
+        for id, object in objects.iteritems():
+            # Ignore fake objects (process names, modules, "Everything", "kernel", etc.)
+            if object['self'] == 0:
+                continue
+
+            function = Function(id, object['name'])
+            function[SAMPLES] = object['self']
+            profile.add_function(function)
+            profile[SAMPLES] += function[SAMPLES]
+
+        for id, node in nodes.iteritems():
+            # Ignore fake calls
+            if node['self'] == 0:
+                continue
+
+            # Find a non-ignored parent
+            parent_id = node['parent']
+            while parent_id != 0:
+                parent = nodes[parent_id]
+                caller_id = parent['object']
+                if objects[caller_id]['self'] != 0:
+                    break
+                parent_id = parent['parent']
+            if parent_id == 0:
+                continue
+
+            callee_id = node['object']
+
+            assert objects[caller_id]['self']
+            assert objects[callee_id]['self']
+
+            function = profile.functions[caller_id]
+
+            samples = node['self']
+            try:
+                call = function.calls[callee_id]
+            except KeyError:
+                call = Call(callee_id)
+                call[SAMPLES2] = samples
+                function.add_call(call)
+            else:
+                call[SAMPLES2] += samples
+
+        # Compute derived events
+        profile.validate()
+        profile.find_cycles()
+        profile.ratio(TIME_RATIO, SAMPLES)
+        profile.call_ratios(SAMPLES2)
+        profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO)
+
+        return profile
+
+
+class SharkParser(LineParser):
+    """Parser for MacOSX Shark output.
+
+    Author: tom@dbservice.com
+    """
+
+    def __init__(self, infile):
+        LineParser.__init__(self, infile)
+        self.stack = []
+        self.entries = {}
+
+    def add_entry(self, function):
+        try:
+            entry = self.entries[function.id]
+        except KeyError:
+            self.entries[function.id] = (function, { })
+        else:
+            function_total, callees_total = entry
+            function_total.samples += function.samples
+    
+    def add_callee(self, function, callee):
+        func, callees = self.entries[function.id]
+        try:
+            entry = callees[callee.id]
+        except KeyError:
+            callees[callee.id] = callee
+        else:
+            entry.samples += callee.samples
+        
+    def parse(self):
+        self.readline()
+        self.readline()
+        self.readline()
+        self.readline()
+
+        match = re.compile(r'(?P<prefix>[|+ ]*)(?P<samples>\d+), (?P<symbol>[^,]+), (?P<image>.*)')
+
+        while self.lookahead():
+            line = self.consume()
+            mo = match.match(line)
+            if not mo:
+                raise ParseError('failed to parse', line)
+
+            fields = mo.groupdict()
+            prefix = len(fields.get('prefix', 0)) / 2 - 1
+
+            symbol = str(fields.get('symbol', 0))
+            image = str(fields.get('image', 0))
+
+            entry = Struct()
+            entry.id = ':'.join([symbol, image])
+            entry.samples = int(fields.get('samples', 0))
+
+            entry.name = symbol
+            entry.image = image
+
+            # adjust the callstack
+            if prefix < len(self.stack):
+                del self.stack[prefix:]
+
+            if prefix == len(self.stack):
+                self.stack.append(entry)
+
+            # if the callstack has had an entry, it's this functions caller
+            if prefix > 0:
+                self.add_callee(self.stack[prefix - 1], entry)
+                
+            self.add_entry(entry)
+                
+        profile = Profile()
+        profile[SAMPLES] = 0
+        for _function, _callees in self.entries.itervalues():
+            function = Function(_function.id, _function.name)
+            function[SAMPLES] = _function.samples
+            profile.add_function(function)
+            profile[SAMPLES] += _function.samples
+
+            if _function.image:
+                function[MODULE] = os.path.basename(_function.image)
+
+            for _callee in _callees.itervalues():
+                call = Call(_callee.id)
+                call[SAMPLES] = _callee.samples
+                function.add_call(call)
+                
+        # compute derived data
+        profile.validate()
+        profile.find_cycles()
+        profile.ratio(TIME_RATIO, SAMPLES)
+        profile.call_ratios(SAMPLES)
+        profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO)
+
+        return profile
+
+
+class SleepyParser(Parser):
+    """Parser for GNU gprof output.
+
+    See also:
+    - http://www.codersnotes.com/sleepy/
+    - http://sleepygraph.sourceforge.net/
+    """
+
+    def __init__(self, filename):
+        Parser.__init__(self)
+
+        from zipfile import ZipFile
+
+        self.database = ZipFile(filename)
+
+        self.symbols = {}
+        self.calls = {}
+
+        self.profile = Profile()
+    
+    _symbol_re = re.compile(
+        r'^(?P<id>\w+)' + 
+        r'\s+"(?P<module>[^"]*)"' + 
+        r'\s+"(?P<procname>[^"]*)"' + 
+        r'\s+"(?P<sourcefile>[^"]*)"' + 
+        r'\s+(?P<sourceline>\d+)$'
+    )
+
+    def parse_symbols(self):
+        lines = self.database.read('symbols.txt').splitlines()
+        for line in lines:
+            mo = self._symbol_re.match(line)
+            if mo:
+                symbol_id, module, procname, sourcefile, sourceline = mo.groups()
+    
+                function_id = ':'.join([module, procname])
+
+                try:
+                    function = self.profile.functions[function_id]
+                except KeyError:
+                    function = Function(function_id, procname)
+                    function[SAMPLES] = 0
+                    self.profile.add_function(function)
+
+                self.symbols[symbol_id] = function
+
+    def parse_callstacks(self):
+        lines = self.database.read("callstacks.txt").splitlines()
+        for line in lines:
+            fields = line.split()
+            samples = int(fields[0])
+            callstack = fields[1:]
+
+            callstack = [self.symbols[symbol_id] for symbol_id in callstack]
+
+            callee = callstack[0]
+
+            callee[SAMPLES] += samples
+            self.profile[SAMPLES] += samples
+            
+            for caller in callstack[1:]:
+                try:
+                    call = caller.calls[callee.id]
+                except KeyError:
+                    call = Call(callee.id)
+                    call[SAMPLES2] = samples
+                    caller.add_call(call)
+                else:
+                    call[SAMPLES2] += samples
+
+                callee = caller
+
+    def parse(self):
+        profile = self.profile
+        profile[SAMPLES] = 0
+
+        self.parse_symbols()
+        self.parse_callstacks()
+
+        # Compute derived events
+        profile.validate()
+        profile.find_cycles()
+        profile.ratio(TIME_RATIO, SAMPLES)
+        profile.call_ratios(SAMPLES2)
+        profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO)
+
+        return profile
+
+
+class AQtimeTable:
+
+    def __init__(self, name, fields):
+        self.name = name
+
+        self.fields = fields
+        self.field_column = {}
+        for column in range(len(fields)):
+            self.field_column[fields[column]] = column
+        self.rows = []
+
+    def __len__(self):
+        return len(self.rows)
+
+    def __iter__(self):
+        for values, children in self.rows:
+            fields = {}
+            for name, value in zip(self.fields, values):
+                fields[name] = value
+            children = dict([(child.name, child) for child in children])
+            yield fields, children
+        raise StopIteration
+
+    def add_row(self, values, children=()):
+        self.rows.append((values, children))
+
+
+class AQtimeParser(XmlParser):
+
+    def __init__(self, stream):
+        XmlParser.__init__(self, stream)
+        self.tables = {}
+
+    def parse(self):
+        self.element_start('AQtime_Results')
+        self.parse_headers()
+        results = self.parse_results()
+        self.element_end('AQtime_Results')
+        return self.build_profile(results) 
+
+    def parse_headers(self):
+        self.element_start('HEADERS')
+        while self.token.type == XML_ELEMENT_START:
+            self.parse_table_header()
+        self.element_end('HEADERS')
+
+    def parse_table_header(self):
+        attrs = self.element_start('TABLE_HEADER')
+        name = attrs['NAME']
+        id = int(attrs['ID'])
+        field_types = []
+        field_names = []
+        while self.token.type == XML_ELEMENT_START:
+            field_type, field_name = self.parse_table_field()
+            field_types.append(field_type)
+            field_names.append(field_name)
+        self.element_end('TABLE_HEADER')
+        self.tables[id] = name, field_types, field_names
+
+    def parse_table_field(self):
+        attrs = self.element_start('TABLE_FIELD')
+        type = attrs['TYPE']
+        name = self.character_data()
+        self.element_end('TABLE_FIELD')
+        return type, name
+
+    def parse_results(self):
+        self.element_start('RESULTS')
+        table = self.parse_data()
+        self.element_end('RESULTS')
+        return table
+
+    def parse_data(self):
+        rows = []
+        attrs = self.element_start('DATA')
+        table_id = int(attrs['TABLE_ID'])
+        table_name, field_types, field_names = self.tables[table_id]
+        table = AQtimeTable(table_name, field_names)
+        while self.token.type == XML_ELEMENT_START:
+            row, children = self.parse_row(field_types)
+            table.add_row(row, children)
+        self.element_end('DATA')
+        return table
+
+    def parse_row(self, field_types):
+        row = [None]*len(field_types)
+        children = []
+        self.element_start('ROW')
+        while self.token.type == XML_ELEMENT_START:
+            if self.token.name_or_data == 'FIELD':
+                field_id, field_value = self.parse_field(field_types)
+                row[field_id] = field_value
+            elif self.token.name_or_data == 'CHILDREN':
+                children = self.parse_children()
+            else:
+                raise XmlTokenMismatch("<FIELD ...> or <CHILDREN ...>", self.token)
+        self.element_end('ROW')
+        return row, children
+
+    def parse_field(self, field_types):
+        attrs = self.element_start('FIELD')
+        id = int(attrs['ID'])
+        type = field_types[id]
+        value = self.character_data()
+        if type == 'Integer':
+            value = int(value)
+        elif type == 'Float':
+            value = float(value)
+        elif type == 'Address':
+            value = int(value)
+        elif type == 'String':
+            pass
+        else:
+            assert False
+        self.element_end('FIELD')
+        return id, value
+
+    def parse_children(self):
+        children = []
+        self.element_start('CHILDREN')
+        while self.token.type == XML_ELEMENT_START:
+            table = self.parse_data()
+            assert table.name not in children
+            children.append(table)
+        self.element_end('CHILDREN')
+        return children
+
+    def build_profile(self, results):
+        assert results.name == 'Routines'
+        profile = Profile()
+        profile[TIME] = 0.0
+        for fields, tables in results:
+            function = self.build_function(fields)
+            children = tables['Children']
+            for fields, _ in children:
+                call = self.build_call(fields)
+                function.add_call(call)
+            profile.add_function(function)
+            profile[TIME] = profile[TIME] + function[TIME]
+        profile[TOTAL_TIME] = profile[TIME]
+        profile.ratio(TOTAL_TIME_RATIO, TOTAL_TIME)
+        return profile
+    
+    def build_function(self, fields):
+        function = Function(self.build_id(fields), self.build_name(fields))
+        function[TIME] = fields['Time']
+        function[TOTAL_TIME] = fields['Time with Children']
+        #function[TIME_RATIO] = fields['% Time']/100.0
+        #function[TOTAL_TIME_RATIO] = fields['% with Children']/100.0
+        return function
+
+    def build_call(self, fields):
+        call = Call(self.build_id(fields))
+        call[TIME] = fields['Time']
+        call[TOTAL_TIME] = fields['Time with Children']
+        #call[TIME_RATIO] = fields['% Time']/100.0
+        #call[TOTAL_TIME_RATIO] = fields['% with Children']/100.0
+        return call
+
+    def build_id(self, fields):
+        return ':'.join([fields['Module Name'], fields['Unit Name'], fields['Routine Name']])
+
+    def build_name(self, fields):
+        # TODO: use more fields
+        return fields['Routine Name']
+
+
+class PstatsParser:
+    """Parser python profiling statistics saved with te pstats module."""
+
+    def __init__(self, *filename):
+        import pstats
+        try:
+            self.stats = pstats.Stats(*filename)
+        except ValueError:
+            import hotshot.stats
+            self.stats = hotshot.stats.load(filename[0])
+        self.profile = Profile()
+        self.function_ids = {}
+
+    def get_function_name(self, (filename, line, name)):
+        module = os.path.splitext(filename)[0]
+        module = os.path.basename(module)
+        return "%s:%d:%s" % (module, line, name)
+
+    def get_function(self, key):
+        try:
+            id = self.function_ids[key]
+        except KeyError:
+            id = len(self.function_ids)
+            name = self.get_function_name(key)
+            function = Function(id, name)
+            self.profile.functions[id] = function
+            self.function_ids[key] = id
+        else:
+            function = self.profile.functions[id]
+        return function
+
+    def parse(self):
+        self.profile[TIME] = 0.0
+        self.profile[TOTAL_TIME] = self.stats.total_tt
+        for fn, (cc, nc, tt, ct, callers) in self.stats.stats.iteritems():
+            callee = self.get_function(fn)
+            callee[CALLS] = nc
+            callee[TOTAL_TIME] = ct
+            callee[TIME] = tt
+            self.profile[TIME] += tt
+            self.profile[TOTAL_TIME] = max(self.profile[TOTAL_TIME], ct)
+            for fn, value in callers.iteritems():
+                caller = self.get_function(fn)
+                call = Call(callee.id)
+                if isinstance(value, tuple):
+                    for i in xrange(0, len(value), 4):
+                        nc, cc, tt, ct = value[i:i+4]
+                        if CALLS in call:
+                            call[CALLS] += cc
+                        else:
+                            call[CALLS] = cc
+
+                        if TOTAL_TIME in call:
+                            call[TOTAL_TIME] += ct
+                        else:
+                            call[TOTAL_TIME] = ct
+
+                else:
+                    call[CALLS] = value
+                    call[TOTAL_TIME] = ratio(value, nc)*ct
+
+                caller.add_call(call)
+        #self.stats.print_stats()
+        #self.stats.print_callees()
+
+        # Compute derived events
+        self.profile.validate()
+        self.profile.ratio(TIME_RATIO, TIME)
+        self.profile.ratio(TOTAL_TIME_RATIO, TOTAL_TIME)
+
+        return self.profile
+
+
+class Theme:
+
+    def __init__(self, 
+            bgcolor = (0.0, 0.0, 1.0),
+            mincolor = (0.0, 0.0, 0.0),
+            maxcolor = (0.0, 0.0, 1.0),
+            fontname = "Arial",
+            minfontsize = 10.0,
+            maxfontsize = 10.0,
+            minpenwidth = 0.5,
+            maxpenwidth = 4.0,
+            gamma = 2.2,
+            skew = 1.0):
+        self.bgcolor = bgcolor
+        self.mincolor = mincolor
+        self.maxcolor = maxcolor
+        self.fontname = fontname
+        self.minfontsize = minfontsize
+        self.maxfontsize = maxfontsize
+        self.minpenwidth = minpenwidth
+        self.maxpenwidth = maxpenwidth
+        self.gamma = gamma
+        self.skew = skew
+
+    def graph_bgcolor(self):
+        return self.hsl_to_rgb(*self.bgcolor)
+
+    def graph_fontname(self):
+        return self.fontname
+
+    def graph_fontsize(self):
+        return self.minfontsize
+
+    def node_bgcolor(self, weight):
+        return self.color(weight)
+
+    def node_fgcolor(self, weight):
+        return self.graph_bgcolor()
+
+    def node_fontsize(self, weight):
+        return self.fontsize(weight)
+
+    def edge_color(self, weight):
+        return self.color(weight)
+
+    def edge_fontsize(self, weight):
+        return self.fontsize(weight)
+
+    def edge_penwidth(self, weight):
+        return max(weight*self.maxpenwidth, self.minpenwidth)
+
+    def edge_arrowsize(self, weight):
+        return 0.5 * math.sqrt(self.edge_penwidth(weight))
+
+    def fontsize(self, weight):
+        return max(weight**2 * self.maxfontsize, self.minfontsize)
+
+    def color(self, weight):
+        weight = min(max(weight, 0.0), 1.0)
+    
+        hmin, smin, lmin = self.mincolor
+        hmax, smax, lmax = self.maxcolor
+        
+        if self.skew < 0:
+            raise ValueError("Skew must be greater than 0")
+        elif self.skew == 1.0:
+            h = hmin + weight*(hmax - hmin)
+            s = smin + weight*(smax - smin)
+            l = lmin + weight*(lmax - lmin)
+        else:
+            base = self.skew
+            h = hmin + ((hmax-hmin)*(-1.0 + (base ** weight)) / (base - 1.0))
+            s = smin + ((smax-smin)*(-1.0 + (base ** weight)) / (base - 1.0))
+            l = lmin + ((lmax-lmin)*(-1.0 + (base ** weight)) / (base - 1.0))
+
+        return self.hsl_to_rgb(h, s, l)
+
+    def hsl_to_rgb(self, h, s, l):
+        """Convert a color from HSL color-model to RGB.
+
+        See also:
+        - http://www.w3.org/TR/css3-color/#hsl-color
+        """
+
+        h = h % 1.0
+        s = min(max(s, 0.0), 1.0)
+        l = min(max(l, 0.0), 1.0)
+
+        if l <= 0.5:
+            m2 = l*(s + 1.0)
+        else:
+            m2 = l + s - l*s
+        m1 = l*2.0 - m2
+        r = self._hue_to_rgb(m1, m2, h + 1.0/3.0)
+        g = self._hue_to_rgb(m1, m2, h)
+        b = self._hue_to_rgb(m1, m2, h - 1.0/3.0)
+
+        # Apply gamma correction
+        r **= self.gamma
+        g **= self.gamma
+        b **= self.gamma
+
+        return (r, g, b)
+
+    def _hue_to_rgb(self, m1, m2, h):
+        if h < 0.0:
+            h += 1.0
+        elif h > 1.0:
+            h -= 1.0
+        if h*6 < 1.0:
+            return m1 + (m2 - m1)*h*6.0
+        elif h*2 < 1.0:
+            return m2
+        elif h*3 < 2.0:
+            return m1 + (m2 - m1)*(2.0/3.0 - h)*6.0
+        else:
+            return m1
+
+
+TEMPERATURE_COLORMAP = Theme(
+    mincolor = (2.0/3.0, 0.80, 0.25), # dark blue
+    maxcolor = (0.0, 1.0, 0.5), # satured red
+    gamma = 1.0
+)
+
+PINK_COLORMAP = Theme(
+    mincolor = (0.0, 1.0, 0.90), # pink
+    maxcolor = (0.0, 1.0, 0.5), # satured red
+)
+
+GRAY_COLORMAP = Theme(
+    mincolor = (0.0, 0.0, 0.85), # light gray
+    maxcolor = (0.0, 0.0, 0.0), # black
+)
+
+BW_COLORMAP = Theme(
+    minfontsize = 8.0,
+    maxfontsize = 24.0,
+    mincolor = (0.0, 0.0, 0.0), # black
+    maxcolor = (0.0, 0.0, 0.0), # black
+    minpenwidth = 0.1,
+    maxpenwidth = 8.0,
+)
+
+
+class DotWriter:
+    """Writer for the DOT language.
+
+    See also:
+    - "The DOT Language" specification
+      http://www.graphviz.org/doc/info/lang.html
+    """
+
+    def __init__(self, fp):
+        self.fp = fp
+
+    def graph(self, profile, theme):
+        self.begin_graph()
+
+        fontname = theme.graph_fontname()
+
+        self.attr('graph', fontname=fontname, ranksep=0.25, nodesep=0.125)
+        self.attr('node', fontname=fontname, shape="box", style="filled", fontcolor="white", width=0, height=0)
+        self.attr('edge', fontname=fontname)
+
+        for function in profile.functions.itervalues():
+            labels = []
+            for event in PROCESS, MODULE:
+                if event in function.events:
+                    label = event.format(function[event])
+                    labels.append(label)
+            labels.append(function.name)
+            for event in TOTAL_TIME_RATIO, TIME_RATIO, CALLS:
+                if event in function.events:
+                    label = event.format(function[event])
+                    labels.append(label)
+
+            try:
+                weight = function[PRUNE_RATIO]
+            except UndefinedEvent:
+                weight = 0.0
+
+            label = '\n'.join(labels)
+            self.node(function.id, 
+                label = label, 
+                color = self.color(theme.node_bgcolor(weight)), 
+                fontcolor = self.color(theme.node_fgcolor(weight)), 
+                fontsize = "%.2f" % theme.node_fontsize(weight),
+            )
+
+            for call in function.calls.itervalues():
+                callee = profile.functions[call.callee_id]
+
+                labels = []
+                for event in TOTAL_TIME_RATIO, CALLS:
+                    if event in call.events:
+                        label = event.format(call[event])
+                        labels.append(label)
+
+                try:
+                    weight = call[PRUNE_RATIO]
+                except UndefinedEvent:
+                    try:
+                        weight = callee[PRUNE_RATIO]
+                    except UndefinedEvent:
+                        weight = 0.0
+
+                label = '\n'.join(labels)
+
+                self.edge(function.id, call.callee_id, 
+                    label = label, 
+                    color = self.color(theme.edge_color(weight)), 
+                    fontcolor = self.color(theme.edge_color(weight)),
+                    fontsize = "%.2f" % theme.edge_fontsize(weight), 
+                    penwidth = "%.2f" % theme.edge_penwidth(weight), 
+                    labeldistance = "%.2f" % theme.edge_penwidth(weight), 
+                    arrowsize = "%.2f" % theme.edge_arrowsize(weight),
+                )
+
+        self.end_graph()
+
+    def begin_graph(self):
+        self.write('digraph {\n')
+
+    def end_graph(self):
+        self.write('}\n')
+
+    def attr(self, what, **attrs):
+        self.write("\t")
+        self.write(what)
+        self.attr_list(attrs)
+        self.write(";\n")
+
+    def node(self, node, **attrs):
+        self.write("\t")
+        self.id(node)
+        self.attr_list(attrs)
+        self.write(";\n")
+
+    def edge(self, src, dst, **attrs):
+        self.write("\t")
+        self.id(src)
+        self.write(" -> ")
+        self.id(dst)
+        self.attr_list(attrs)
+        self.write(";\n")
+
+    def attr_list(self, attrs):
+        if not attrs:
+            return
+        self.write(' [')
+        first = True
+        for name, value in attrs.iteritems():
+            if first:
+                first = False
+            else:
+                self.write(", ")
+            self.id(name)
+            self.write('=')
+            self.id(value)
+        self.write(']')
+
+    def id(self, id):
+        if isinstance(id, (int, float)):
+            s = str(id)
+        elif isinstance(id, basestring):
+            if id.isalnum():
+                s = id
+            else:
+                s = self.escape(id)
+        else:
+            raise TypeError
+        self.write(s)
+
+    def color(self, (r, g, b)):
+
+        def float2int(f):
+            if f <= 0.0:
+                return 0
+            if f >= 1.0:
+                return 255
+            return int(255.0*f + 0.5)
+
+        return "#" + "".join(["%02x" % float2int(c) for c in (r, g, b)])
+
+    def escape(self, s):
+        s = s.encode('utf-8')
+        s = s.replace('\\', r'\\')
+        s = s.replace('\n', r'\n')
+        s = s.replace('\t', r'\t')
+        s = s.replace('"', r'\"')
+        return '"' + s + '"'
+
+    def write(self, s):
+        self.fp.write(s)
+
+
+class Main:
+    """Main program."""
+
+    themes = {
+            "color": TEMPERATURE_COLORMAP,
+            "pink": PINK_COLORMAP,
+            "gray": GRAY_COLORMAP,
+            "bw": BW_COLORMAP,
+    }
+
+    def main(self):
+        """Main program."""
+
+        parser = optparse.OptionParser(
+            usage="\n\t%prog [options] [file] ...",
+            version="%%prog %s" % __version__)
+        parser.add_option(
+            '-o', '--output', metavar='FILE',
+            type="string", dest="output",
+            help="output filename [stdout]")
+        parser.add_option(
+            '-n', '--node-thres', metavar='PERCENTAGE',
+            type="float", dest="node_thres", default=0.5,
+            help="eliminate nodes below this threshold [default: %default]")
+        parser.add_option(
+            '-e', '--edge-thres', metavar='PERCENTAGE',
+            type="float", dest="edge_thres", default=0.1,
+            help="eliminate edges below this threshold [default: %default]")
+        parser.add_option(
+            '-f', '--format',
+            type="choice", choices=('prof', 'oprofile', 'sysprof', 'pstats', 'shark', 'sleepy', 'aqtime'),
+            dest="format", default="prof",
+            help="profile format: prof, oprofile, sysprof, shark, sleepy, aqtime, or pstats [default: %default]")
+        parser.add_option(
+            '-c', '--colormap',
+            type="choice", choices=('color', 'pink', 'gray', 'bw'),
+            dest="theme", default="color",
+            help="color map: color, pink, gray, or bw [default: %default]")
+        parser.add_option(
+            '-s', '--strip',
+            action="store_true",
+            dest="strip", default=False,
+            help="strip function parameters, template parameters, and const modifiers from demangled C++ function names")
+        parser.add_option(
+            '-w', '--wrap',
+            action="store_true",
+            dest="wrap", default=False,
+            help="wrap function names")
+        # add a new option to control skew of the colorization curve
+        parser.add_option(
+            '--skew',
+            type="float", dest="theme_skew", default=1.0,
+            help="skew the colorization curve.  Values < 1.0 give more variety to lower percentages.  Value > 1.0 give less variety to lower percentages")
+        (self.options, self.args) = parser.parse_args(sys.argv[1:])
+
+        if len(self.args) > 1 and self.options.format != 'pstats':
+            parser.error('incorrect number of arguments')
+
+        try:
+            self.theme = self.themes[self.options.theme]
+        except KeyError:
+            parser.error('invalid colormap \'%s\'' % self.options.theme)
+        
+        # set skew on the theme now that it has been picked.
+        if self.options.theme_skew:
+            self.theme.skew = self.options.theme_skew
+
+        if self.options.format == 'prof':
+            if not self.args:
+                fp = sys.stdin
+            else:
+                fp = open(self.args[0], 'rt')
+            parser = GprofParser(fp)
+        elif self.options.format == 'oprofile':
+            if not self.args:
+                fp = sys.stdin
+            else:
+                fp = open(self.args[0], 'rt')
+            parser = OprofileParser(fp)
+        elif self.options.format == 'sysprof':
+            if not self.args:
+                fp = sys.stdin
+            else:
+                fp = open(self.args[0], 'rt')
+            parser = SysprofParser(fp)
+        elif self.options.format == 'pstats':
+            if not self.args:
+                parser.error('at least a file must be specified for pstats input')
+            parser = PstatsParser(*self.args)
+        elif self.options.format == 'shark':
+            if not self.args:
+                fp = sys.stdin
+            else:
+                fp = open(self.args[0], 'rt')
+            parser = SharkParser(fp)
+        elif self.options.format == 'sleepy':
+            if len(self.args) != 1:
+                parser.error('exactly one file must be specified for sleepy input')
+            parser = SleepyParser(self.args[0])
+        elif self.options.format == 'aqtime':
+            if not self.args:
+                fp = sys.stdin
+            else:
+                fp = open(self.args[0], 'rt')
+            parser = AQtimeParser(fp)
+        else:
+            parser.error('invalid format \'%s\'' % self.options.format)
+
+        self.profile = parser.parse()
+        
+        if self.options.output is None:
+            self.output = sys.stdout
+        else:
+            self.output = open(self.options.output, 'wt')
+
+        self.write_graph()
+
+    _parenthesis_re = re.compile(r'\([^()]*\)')
+    _angles_re = re.compile(r'<[^<>]*>')
+    _const_re = re.compile(r'\s+const$')
+
+    def strip_function_name(self, name):
+        """Remove extraneous information from C++ demangled function names."""
+
+        # Strip function parameters from name by recursively removing paired parenthesis
+        while True:
+            name, n = self._parenthesis_re.subn('', name)
+            if not n:
+                break
+
+        # Strip const qualifier
+        name = self._const_re.sub('', name)
+
+        # Strip template parameters from name by recursively removing paired angles
+        while True:
+            name, n = self._angles_re.subn('', name)
+            if not n:
+                break
+
+        return name
+
+    def wrap_function_name(self, name):
+        """Split the function name on multiple lines."""
+
+        if len(name) > 32:
+            ratio = 2.0/3.0
+            height = max(int(len(name)/(1.0 - ratio) + 0.5), 1)
+            width = max(len(name)/height, 32)
+            # TODO: break lines in symbols
+            name = textwrap.fill(name, width, break_long_words=False)
+
+        # Take away spaces
+        name = name.replace(", ", ",")
+        name = name.replace("> >", ">>")
+        name = name.replace("> >", ">>") # catch consecutive
+
+        return name
+
+    def compress_function_name(self, name):
+        """Compress function name according to the user preferences."""
+
+        if self.options.strip:
+            name = self.strip_function_name(name)
+
+        if self.options.wrap:
+            name = self.wrap_function_name(name)
+
+        # TODO: merge functions with same resulting name
+
+        return name
+
+    def write_graph(self):
+        dot = DotWriter(self.output)
+        profile = self.profile
+        profile.prune(self.options.node_thres/100.0, self.options.edge_thres/100.0)
+
+        for function in profile.functions.itervalues():
+            function.name = self.compress_function_name(function.name)
+
+        dot.graph(profile, self.theme)
+
+
+if __name__ == '__main__':
+    Main().main()
diff --git a/external/gprof2dot/__init__.py b/external/gprof2dot/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/makeProfileGraph.py b/makeProfileGraph.py
old mode 100644
new mode 100755
index d49ea55da9..e63f517b33
--- a/makeProfileGraph.py
+++ b/makeProfileGraph.py
@@ -10,11 +10,14 @@ def makeProfileGraph(stats_file,thresh_node,thresh_edge):
     `dot -Tpdf input.dot -o output.pdf`.
     """
     try:
-        from gprof2dot import gprof2dot
+        from external.gprof2dot import gprof2dot
     except ImportError:
-        print('Package gprof2dot not found. Unable to create a graph of the profile statistics.')
-        print("`pip install gprof2dot` if you don't have it.")
-        return
+        try:
+            from external import gprof2dot
+        except ImportError:    
+            print('Package gprof2dot not found. Unable to create a graph of the profile statistics.')
+            print("`pip install gprof2dot` if you don't have it.")
+            return
     import subprocess
     m = gprof2dot.Main()
     class Options:
@@ -47,9 +50,9 @@ class Options:
     import argparse
      
     parser = argparse.ArgumentParser(description="Creates a call graph with profiling information.")
-    parser.add_argument('FILE', type=str, default='RMG.profile', help='.profile file')
-    parser.add_argument('THRESH_NODE', type=float, default=0.8, help='threshold percentage value for nodes')
-    parser.add_argument('THRESH_EDGE', type=float, default=0.1, help='threshold percentage value for nodes') 
+    parser.add_argument('FILE', type=str, default='RMG.profile',nargs='?', help='.profile file (default file is RMG.profile)')
+    parser.add_argument('THRESH_NODE', type=float, default=0.8,nargs='?', help='threshold percentage value for nodes (default value is 0.8)')
+    parser.add_argument('THRESH_EDGE', type=float, default=0.1, nargs='?', help='threshold percentage value for nodes (default value is 0.1)') 
     args = parser.parse_args()
     stats_file=args.FILE
     thresh_node=args.THRESH_NODE

From 4fadd4435831708fd6ff21d04090da260d29a85f Mon Sep 17 00:00:00 2001
From: keceli <keceli@gmail.com>
Date: Fri, 22 Nov 2013 11:26:44 -0500
Subject: [PATCH 28/39] Improved argument parsing and increased chunk size

---
 thermoEstimator.py | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/thermoEstimator.py b/thermoEstimator.py
index 643d8fe809..9d21013351 100755
--- a/thermoEstimator.py
+++ b/thermoEstimator.py
@@ -42,14 +42,12 @@ def runThermoEstimator(inputFile):
     output = open(os.path.join(rmg.outputDirectory, 'output.txt'),'wb')
     library = ThermoLibrary(name='Thermo Estimation Library')
     listOfSpecies=rmg.initialSpecies
-    chunksize=50
+    chunksize=1000
     if rmg.reactionModel.quantumMechanics: logging.debug("qmValue fine @ runThermoEstimator")
     shared.setConst(qmValue=rmg.reactionModel.quantumMechanics)
     for chunk in list(chunks(listOfSpecies,chunksize)):
-        logging.debug("Parallelized section starts...")
         # There will be no stdout from workers except the main one.
         outputList = futures.map(makeThermoForSpecies, chunk)
-        logging.debug("Parallelized section ends.")
         for species, thermo in zip(chunk, outputList):
             logging.debug("Species {0}".format(species.label))
             species.thermo = thermo   
@@ -60,7 +58,6 @@ def runThermoEstimator(inputFile):
                 thermo = species.thermo.toThermoData(),
                 shortDesc = species.thermo.comment,
             )
-            logging.debug("chunk done")
             output.write(writeThermoEntry(species))
             output.write('\n')
     
@@ -74,12 +71,22 @@ def runThermoEstimator(inputFile):
 
     import argparse
     
-    parser = argparse.ArgumentParser()
+    parser = argparse.ArgumentParser(description=
+     """
+     thermoEstimator.py generates thermochemical parameters based on Benson group additivity
+     or quantum mechanical calculations. \n
+     Generates three output files.
+     RMG.log: Contains information about the process.
+     output.txt: Contains string representations of the NASA model for each species, readable by Chemkin.
+     ThermoLibrary.py: Thermo library that can be used in RMG simulations. Can be uploaded to RMG-database.
+     """)
     parser.add_argument('input', metavar='FILE', type=str, nargs=1,
         help='Thermo input file')
     parser.add_argument('-p', '--profile', action='store_true', help='run under cProfile to gather profiling statistics, and postprocess them if job completes')
     parser.add_argument('-P', '--postprocess', action='store_true', help='postprocess profiling statistics from previous [failed] run; does not run the simulation')
-
+    group = parser.add_mutually_exclusive_group()
+    group.add_argument('-d', '--debug', action='store_true', help='print debug information')
+    group.add_argument('-q', '--quiet', action='store_true', help='only print warnings and errors')
     args = parser.parse_args()
     
     inputFile = os.path.abspath(args.input[0])
@@ -106,7 +113,10 @@ def runThermoEstimator(inputFile):
         makeProfileGraph(stats_file)
         
     else:
-        level = logging.INFO
-        initializeLog(level, 'thermo.log')
+        
+        if args.debug: level = logging.DEBUG
+        elif args.quiet: level = logging.WARNING
+        else: level = logging.INFO
+        initializeLog(level, 'RMG.log')
         logging.debug("runThermoEstimator starts...")
         runThermoEstimator(inputFile)

From fcb5c20c3a1dabfeabd8034794a2161087cafe5e Mon Sep 17 00:00:00 2001
From: keceli <keceli@gmail.com>
Date: Sun, 24 Nov 2013 01:14:46 -0500
Subject: [PATCH 29/39] Load only thermo libraries

To reduce the database pickle size, thermoEstimator now only loads thermo libraries.
---
 rmgpy/rmg/main.py  | 71 ++++++++++++++++++++++++++++++++++++++++++++++
 thermoEstimator.py |  7 +++--
 2 files changed, 76 insertions(+), 2 deletions(-)

diff --git a/rmgpy/rmg/main.py b/rmgpy/rmg/main.py
index 6087c04272..674a6d1013 100644
--- a/rmgpy/rmg/main.py
+++ b/rmgpy/rmg/main.py
@@ -306,6 +306,77 @@ def loadDatabase(self):
         with open(cache_hash_file,'w') as f:
             f.write(database_hash)
         
+    def loadThermoDatabase(self):
+        """
+        Load the RMG Database.
+        
+        The data is loaded from self.databaseDirectory, according to settings in:
+        
+        * self.thermoLibraries
+        * self.reactionLibraries
+        * self.seedMechanisms
+        * self.kineticsFamilies
+        * self.kineticsDepositories
+         
+        If `self.kineticsEstimator == 'rate rules'` then the training set values are 
+        added and the blanks are filled in by averaging.
+        
+        If self.outputDirectory contains :file:`database.pkl` and :file:`database.hash` files then
+        these are checked for validity and used as a cache. Once loaded (and averages filled 
+        in if necessary) then a cache (pickle and hash) is saved.
+        """
+        import inspect, hashlib, cPickle, rmgpy.utilities, scoop.shared
+        
+        # Make a hash of everything that could alter the contents of the database once it is fully loaded.
+        # Then we can compare this hash to the cached file to see if the cache is valid.
+        database_metadata = {
+            'path': self.databaseDirectory,
+            'database hash': rmgpy.utilities.path_checksum([self.databaseDirectory]),
+            'thermoLibraries': self.thermoLibraries,
+            'rmgpy.data source hash': rmgpy.data.getSourceHash(),
+            'this source hash': hashlib.sha1(inspect.getsource(self.__class__)).hexdigest(),
+            }
+        database_hash = hashlib.sha1(cPickle.dumps(database_metadata)).hexdigest()
+        cache_hash_file = os.path.join(self.outputDirectory,'database.hash')
+        cache_pickle_file = os.path.join(self.outputDirectory,'database.pkl')
+        scoop.shared.setConst(databaseFile=cache_pickle_file, databaseHash=database_hash)
+        if not os.path.exists(cache_pickle_file):
+            logging.info("Couldn't find a database cache file {0!r} so will reload from source.".format(cache_pickle_file))
+        elif not os.path.exists(cache_hash_file):
+            logging.info("Couldn't find database cache hash file {0!r} to validate cache so will reload from source.".format(cache_hash_file))
+        else:
+            if database_hash != open(cache_hash_file,'r').read():
+                logging.info("According to hash file, it looks like database cache is not valid. Will clear it and reload.")
+                os.unlink(cache_hash_file)
+                os.unlink(cache_pickle_file)
+            else:
+                logging.info("According to hash file, it looks like database cache is valid.")
+                database = cPickle.load(open(cache_pickle_file, 'rb'))
+                # Check the database from the pickle really does have the hash in the database.hash file.
+                if database.hash == database_hash:
+                    logging.info("Database loaded from {0} has correct hash. Will use this cache.".format(cache_pickle_file))
+                    self.database = database
+                    rmgpy.data.rmg.database = database # we need to store it in this module level variable too!
+                    return
+                else:
+                    logging.info("Database loaded from {0} has INCORRECT hash. Will clear the cache and reload.".format(cache_pickle_file))
+                    os.unlink(cache_hash_file)
+                    os.unlink(cache_pickle_file)
+
+        self.database = RMGDatabase()
+        self.database.loadThermo(
+            path = os.path.join(self.databaseDirectory, 'thermo'),
+            thermoLibraries = self.thermoLibraries,
+            depository = False, # Don't bother loading the depository information, as we don't use it
+        )
+                
+        self.database.hash = database_hash # store the hash in the database so we can check it when it is next pickled.
+        logging.info("Saving database cache in {0!r}".format(cache_pickle_file))
+        self.database.saveToPickle(cache_pickle_file)
+        with open(cache_hash_file,'w') as f:
+            f.write(database_hash)
+        
+    
     
     def initialize(self, args):
         """
diff --git a/thermoEstimator.py b/thermoEstimator.py
index 9d21013351..d36def9cbf 100755
--- a/thermoEstimator.py
+++ b/thermoEstimator.py
@@ -32,7 +32,7 @@ def runThermoEstimator(inputFile):
     rmg.loadThermoInput(inputFile)
     
     # initialize and load the database as well as any QM settings
-    rmg.loadDatabase()
+    rmg.loadThermoDatabase()
     if rmg.quantumMechanics:
         logging.debug("Initialize QM")
         rmg.quantumMechanics.initialize()
@@ -60,9 +60,10 @@ def runThermoEstimator(inputFile):
             )
             output.write(writeThermoEntry(species))
             output.write('\n')
+        library.save(os.path.join(rmg.outputDirectory,'ThermoLibrary.py'))
     
     output.close()
-    library.save(os.path.join(rmg.outputDirectory,'ThermoLibrary.py'))
+    
 
 
 ################################################################################
@@ -87,6 +88,8 @@ def runThermoEstimator(inputFile):
     group = parser.add_mutually_exclusive_group()
     group.add_argument('-d', '--debug', action='store_true', help='print debug information')
     group.add_argument('-q', '--quiet', action='store_true', help='only print warnings and errors')
+    
+    
     args = parser.parse_args()
     
     inputFile = os.path.abspath(args.input[0])

From f9dea822823a319a43c72644564a886df875bb0f Mon Sep 17 00:00:00 2001
From: keceli <keceli@gmail.com>
Date: Mon, 25 Nov 2013 12:48:38 -0500
Subject: [PATCH 30/39] Allows RMG to continue when there is a duplicate
 species

Originally, RMG stops if any two species are identical in the initial species list. Since this can happen frequently when using thermoEstimator with a large list of spcies, I changed it, so that RMG
ignores the duplicate, and continue execution.
---
 rmgpy/rmg/input.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/rmgpy/rmg/input.py b/rmgpy/rmg/input.py
index 6fe74c9ed3..4824f4331e 100644
--- a/rmgpy/rmg/input.py
+++ b/rmgpy/rmg/input.py
@@ -92,9 +92,12 @@ def database(
 def species(label, structure, reactive=True):
     logging.debug('Found {0} species "{1}" ({2})'.format('reactive' if reactive else 'nonreactive', label, structure.toSMILES()))
     spec, isNew = rmg.reactionModel.makeNewSpecies(structure, label=label, reactive=reactive)
-    assert isNew, "Species {0} is a duplicate of {1}. Species in input file must be unique".format(label,spec.label)
-    rmg.initialSpecies.append(spec)
-    speciesDict[label] = spec
+    #assert isNew, "Species {0} is a duplicate of {1}. Species in input file must be unique".format(label,spec.label)
+    if isNew:
+    	rmg.initialSpecies.append(spec)
+    	speciesDict[label] = spec
+    else:
+	logging.info("Species {0} is a duplicate of {1}. Avoid it and continue calculation ...".format(label,spec.label))
     
 def SMARTS(string):
     return Molecule().fromSMARTS(string)

From 71d948d5fe08e6b12470ae8960aa51ef9ae52156 Mon Sep 17 00:00:00 2001
From: keceli <keceli@gmail.com>
Date: Mon, 25 Nov 2013 14:23:29 -0500
Subject: [PATCH 31/39] Added memory usage information in thermoEstimator

Optional positional argument is added to change chunk size.
---
 thermoEstimator.py | 46 ++++++++++++++++++++++++++++++++--------------
 1 file changed, 32 insertions(+), 14 deletions(-)

diff --git a/thermoEstimator.py b/thermoEstimator.py
index d36def9cbf..5e1da390f6 100755
--- a/thermoEstimator.py
+++ b/thermoEstimator.py
@@ -15,6 +15,7 @@
 from rmgpy.chemkin import writeThermoEntry
 from rmgpy.rmg.model import makeThermoForSpecies
 from scoop import futures,shared
+import resource # to see memory usage
 ################################################################################
 def chunks(l, n):
     """ 
@@ -23,31 +24,43 @@ def chunks(l, n):
     for i in range(0, len(l), n):
         yield l[i:i+n]
         
-def runThermoEstimator(inputFile):
+def runThermoEstimator(inputFile,chunkSize):
     """
     Estimate thermo for a list of species using RMG and the settings chosen inside a thermo input file.
     """
-    
+    logging.debug("Maximum memory usage:{0} MBs.".format(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1000))
     rmg = RMG()
+
+    logging.debug("RMG object created...")
+    logging.debug("Maximum memory usage:{0} MBs.".format(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1000))
     rmg.loadThermoInput(inputFile)
+    logging.debug("Input file loaded...")
+    logging.debug("Maximum memory usage:{0} MBs.".format(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1000))
     
     # initialize and load the database as well as any QM settings
     rmg.loadThermoDatabase()
+    logging.debug("Thermo database loaded...")
+    logging.debug("Maximum memory usage:{0} MBs.".format(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1000))
     if rmg.quantumMechanics:
-        logging.debug("Initialize QM")
         rmg.quantumMechanics.initialize()
+        logging.debug("QM module initialized...")
+        logging.debug("Maximum memory usage:{0} MBs.".format(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1000))
     
     # Generate the thermo for all the species and write them to chemkin format as well as
     # ThermoLibrary format with values for H, S, and Cp's.
     output = open(os.path.join(rmg.outputDirectory, 'output.txt'),'wb')
-    library = ThermoLibrary(name='Thermo Estimation Library')
     listOfSpecies=rmg.initialSpecies
-    chunksize=1000
-    if rmg.reactionModel.quantumMechanics: logging.debug("qmValue fine @ runThermoEstimator")
+    logging.debug("Initial species loaded...")
+    logging.debug("Maximum memory usage:{0} MBs.".format(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1000))
+
+    chunkIndex=0
     shared.setConst(qmValue=rmg.reactionModel.quantumMechanics)
-    for chunk in list(chunks(listOfSpecies,chunksize)):
+    for chunk in list(chunks(listOfSpecies,chunkSize)):
         # There will be no stdout from workers except the main one.
         outputList = futures.map(makeThermoForSpecies, chunk)
+        if chunkIndex == 0: libraryName = 'ThermoLibrary'
+        else: libraryName = 'ThermoLibrary'+ str(chunkIndex)
+        library = ThermoLibrary(name=libraryName)
         for species, thermo in zip(chunk, outputList):
             logging.debug("Species {0}".format(species.label))
             species.thermo = thermo   
@@ -60,16 +73,19 @@ def runThermoEstimator(inputFile):
             )
             output.write(writeThermoEntry(species))
             output.write('\n')
-        library.save(os.path.join(rmg.outputDirectory,'ThermoLibrary.py'))
-    
+        logging.debug("Thermo library created...")
+        logging.debug("Maximum memory usage:{0} MBs.".format(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1000))
+        library.save(os.path.join(rmg.outputDirectory, libraryName + '.py'))
+        del library
+        chunkIndex += 1
     output.close()
+    logging.debug("runThermoEstimator is done.")
     
 
 
 ################################################################################
 
 if __name__ == '__main__':
-
     import argparse
     
     parser = argparse.ArgumentParser(description=
@@ -83,6 +99,8 @@ def runThermoEstimator(inputFile):
      """)
     parser.add_argument('input', metavar='FILE', type=str, nargs=1,
         help='Thermo input file')
+    parser.add_argument('CHUNKSIZE', type=int, default=10000,nargs='?', help='''chunk size that determines number of species passed to 
+         workers at once, should be larger than the number of processors. (default value is 10000)''')
     parser.add_argument('-p', '--profile', action='store_true', help='run under cProfile to gather profiling statistics, and postprocess them if job completes')
     parser.add_argument('-P', '--postprocess', action='store_true', help='postprocess profiling statistics from previous [failed] run; does not run the simulation')
     group = parser.add_mutually_exclusive_group()
@@ -94,10 +112,10 @@ def runThermoEstimator(inputFile):
     
     inputFile = os.path.abspath(args.input[0])
     inputDirectory = os.path.abspath(os.path.dirname(args.input[0]))
-    
+    chunkSize = args.CHUNKSIZE
     if args.postprocess:
         print "Postprocessing the profiler statistics (will be appended to thermo.log)"
-        print  "Use `dot -Tpdf thermo_profile.dot -o thermo_profile.pdf`"
+        print  "Use `dot -Tpdf RMG.profile.dot -o RMG.profile.pdf`"
         args.profile = True
     
     if args.profile:
@@ -105,7 +123,7 @@ def runThermoEstimator(inputFile):
         global_vars = {}
         local_vars = {'inputFile': inputFile,'runThermoEstimator':runThermoEstimator}
         command = """runThermoEstimator(inputFile)"""
-        stats_file = 'thermo.profile'
+        stats_file = 'RMG.profile'
         print("Running under cProfile")
         if not args.postprocess:
         # actually run the program!
@@ -122,4 +140,4 @@ def runThermoEstimator(inputFile):
         else: level = logging.INFO
         initializeLog(level, 'RMG.log')
         logging.debug("runThermoEstimator starts...")
-        runThermoEstimator(inputFile)
+        runThermoEstimator(inputFile,chunkSize)

From 8e63e0f9262fdc2a024f7928b679f1a21b097601 Mon Sep 17 00:00:00 2001
From: keceli <keceli@gmail.com>
Date: Mon, 25 Nov 2013 14:25:59 -0500
Subject: [PATCH 32/39] Changed the scope of shared variable qmValue.

I am still not sure the best way to do it.
---
 rmgpy/rmg/model.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/rmgpy/rmg/model.py b/rmgpy/rmg/model.py
index 9eb4b2f911..9596286498 100644
--- a/rmgpy/rmg/model.py
+++ b/rmgpy/rmg/model.py
@@ -63,15 +63,14 @@
 # generateThermoDataFromQM under the Species class imports the qm package
 
 __database = None
+qmValue = None
 
 def makeThermoForSpecies(spec):
     """
     Make thermo for a species.
     """
-    import logging
-    qmValue=shared.getConst('qmValue')
-    if qmValue: logging.debug("qmValue fine @ makeThermoForSpecies")
-    global __database
+    global __database, qmValue
+    if qmValue == None: qmValue = scoop.shared.getConst('qmValue')
     if __database == None:
         """Load the database from some pickle file"""
         import cPickle

From 7cc203ca98fc7b6183148ac3021d2d803223f03d Mon Sep 17 00:00:00 2001
From: keceli <keceli@gmail.com>
Date: Mon, 25 Nov 2013 14:56:55 -0500
Subject: [PATCH 33/39] Fixed the problem in naming the Thermo Library

---
 thermoEstimator.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/thermoEstimator.py b/thermoEstimator.py
index 5e1da390f6..abc0017c12 100755
--- a/thermoEstimator.py
+++ b/thermoEstimator.py
@@ -73,9 +73,10 @@ def runThermoEstimator(inputFile,chunkSize):
             )
             output.write(writeThermoEntry(species))
             output.write('\n')
-        logging.debug("Thermo library created...")
         logging.debug("Maximum memory usage:{0} MBs.".format(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1000))
-        library.save(os.path.join(rmg.outputDirectory, libraryName + '.py'))
+        libraryFile = libraryName + '.py'
+        library.save(os.path.join(rmg.outputDirectory, libraryFile))
+        logging.debug("{0} created.".format(libraryFile))
         del library
         chunkIndex += 1
     output.close()

From fa87afe592e0227a47453d73a8c4c38d41a0bd2c Mon Sep 17 00:00:00 2001
From: keceli <keceli@gmail.com>
Date: Mon, 2 Dec 2013 01:01:06 -0500
Subject: [PATCH 34/39] Improved argument parsing in thermoEstimator

---
 thermoEstimator.py | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/thermoEstimator.py b/thermoEstimator.py
index abc0017c12..5f079e82a1 100755
--- a/thermoEstimator.py
+++ b/thermoEstimator.py
@@ -98,21 +98,22 @@ def runThermoEstimator(inputFile,chunkSize):
      output.txt: Contains string representations of the NASA model for each species, readable by Chemkin.
      ThermoLibrary.py: Thermo library that can be used in RMG simulations. Can be uploaded to RMG-database.
      """)
-    parser.add_argument('input', metavar='FILE', type=str, nargs=1,
-        help='Thermo input file')
+    parser.add_argument('input', metavar='FILE', type=str, default='input.py', nargs='?',
+        help='Thermo input file. (Default file is input.py)')
     parser.add_argument('CHUNKSIZE', type=int, default=10000,nargs='?', help='''chunk size that determines number of species passed to 
          workers at once, should be larger than the number of processors. (default value is 10000)''')
-    parser.add_argument('-p', '--profile', action='store_true', help='run under cProfile to gather profiling statistics, and postprocess them if job completes')
-    parser.add_argument('-P', '--postprocess', action='store_true', help='postprocess profiling statistics from previous [failed] run; does not run the simulation')
-    group = parser.add_mutually_exclusive_group()
-    group.add_argument('-d', '--debug', action='store_true', help='print debug information')
-    group.add_argument('-q', '--quiet', action='store_true', help='only print warnings and errors')
+    group1 = parser.add_mutually_exclusive_group()
+    group1.add_argument('-p', '--profile', action='store_true', help='run under cProfile to gather profiling statistics, and postprocess them if job completes')
+    group1.add_argument('-P', '--postprocess', action='store_true', help='postprocess profiling statistics from previous [failed] run; does not run the simulation')
+    group2 = parser.add_mutually_exclusive_group()
+    group2.add_argument('-d', '--debug', action='store_true', help='print debug information')
+    group2.add_argument('-q', '--quiet', action='store_true', help='only print warnings and errors')
     
     
     args = parser.parse_args()
     
-    inputFile = os.path.abspath(args.input[0])
-    inputDirectory = os.path.abspath(os.path.dirname(args.input[0]))
+    inputFile = os.path.abspath(args.input)
+    inputDirectory = os.path.abspath(os.path.dirname(args.input))
     chunkSize = args.CHUNKSIZE
     if args.postprocess:
         print "Postprocessing the profiler statistics (will be appended to thermo.log)"
@@ -122,8 +123,8 @@ def runThermoEstimator(inputFile,chunkSize):
     if args.profile:
         import cProfile, sys, pstats, os
         global_vars = {}
-        local_vars = {'inputFile': inputFile,'runThermoEstimator':runThermoEstimator}
-        command = """runThermoEstimator(inputFile)"""
+        local_vars = {'inputFile': inputFile,'chunkSize':chunkSize,'runThermoEstimator':runThermoEstimator}
+        command = """runThermoEstimator(inputFile,chunkSize)"""
         stats_file = 'RMG.profile'
         print("Running under cProfile")
         if not args.postprocess:

From 210ea35697c725594e96a9c56e019aa61a8256b8 Mon Sep 17 00:00:00 2001
From: keceli <keceli@gmail.com>
Date: Wed, 18 Dec 2013 11:09:32 -0500
Subject: [PATCH 35/39] Fixed the problem with linear molecules

QM calculations fail for linear molecules with an error '''only length-1 arrays can be converted to Python scalars'''.
The problem is that we only need a single rotational constant for a linear rotor, while three of them were being passed.
---
 rmgpy/statmech/rotation.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rmgpy/statmech/rotation.pyx b/rmgpy/statmech/rotation.pyx
index c39b4feac3..826a235531 100644
--- a/rmgpy/statmech/rotation.pyx
+++ b/rmgpy/statmech/rotation.pyx
@@ -159,7 +159,7 @@ cdef class LinearRotor(Rotation):
         def __set__(self, B):
             cdef double I
             B = quantity.Frequency(B)
-            I = constants.h / (8 * constants.pi * constants.pi * (B.value_si * constants.c * 100.))
+            I = constants.h / (8 * constants.pi * constants.pi * (max(B.value_si) * constants.c * 100.))
             self._inertia = quantity.ScalarQuantity(I / (constants.amu * 1e-20), "amu*angstrom^2")
 
     cpdef double getLevelEnergy(self, int J) except -1:

From 0c31eb0ad55e888f38a5951d11b88881fa4813e5 Mon Sep 17 00:00:00 2001
From: Connie Gao <connieg@mit.edu>
Date: Thu, 19 Dec 2013 11:38:47 -0500
Subject: [PATCH 36/39] Limit lone pair drawing to Nitrogen atoms only.

There are still some issues with positioning of the lone pairs.  For instance
1 N 1 1 {2,S} {3,S}
2 H 0 0 {1,S}
3 H 0 0 {1,S}

Will draw the long pair on the hydrogen rather than the N.  But for now, we can
avoid drawing lone pairs on oxygens and such where it is not needed.
---
 rmgpy/molecule/draw.py | 78 ++++++++++++++++++++++--------------------
 1 file changed, 41 insertions(+), 37 deletions(-)

diff --git a/rmgpy/molecule/draw.py b/rmgpy/molecule/draw.py
index eb961c2812..eb8829a7df 100644
--- a/rmgpy/molecule/draw.py
+++ b/rmgpy/molecule/draw.py
@@ -1249,25 +1249,27 @@ def __renderAtom(self, symbol, atom, x0, y0, cr, heavyFirst=True):
                 cr.set_source_rgba(0.0, 0.0, 0.0, 1.0)
                 cr.show_text(text)
                 
-            # Draw lone electron pairs
-            for i in range (atom.lonePairs):
-                cr.new_sub_path()
-                if i == 0:
-                    x1lp = x-2
-                    y1lp = y-8
-                    x2lp = x+2
-                    y2lp = y-12
-                elif i == 1:
-                    x1lp = x+12
-                    y1lp = y-8
-                    x2lp = x+8
-                    y2lp = y-12
-                elif i == 2:
-                    x1lp = x-2
-                    y1lp = y-1
-                    x2lp = x+2
-                    y2lp = y+3
-                self.__drawLine(cr, x1lp, y1lp, x2lp, y2lp)
+            # Draw lone electron pairs            
+            # Draw them for nitrogen atoms only
+            if atom.symbol == 'N':
+                for i in range (atom.lonePairs):
+                    cr.new_sub_path()
+                    if i == 0:
+                        x1lp = x-2
+                        y1lp = y-8
+                        x2lp = x+2
+                        y2lp = y-12
+                    elif i == 1:
+                        x1lp = x+12
+                        y1lp = y-8
+                        x2lp = x+8
+                        y2lp = y-12
+                    elif i == 2:
+                        x1lp = x-2
+                        y1lp = y-1
+                        x2lp = x+2
+                        y2lp = y+3
+                    self.__drawLine(cr, x1lp, y1lp, x2lp, y2lp)
                 
         elif orientation[0] == 'l' or orientation[0] == 'r':
             # Draw charges first
@@ -1289,24 +1291,26 @@ def __renderAtom(self, symbol, atom, x0, y0, cr, heavyFirst=True):
                 cr.set_source_rgba(0.0, 0.0, 0.0, 1.0)
                 cr.fill()
             # Draw lone electron pairs
-            for i in range (atom.lonePairs):
-                cr.new_sub_path()
-                if i == 0:
-                    x1lp = x-2
-                    y1lp = y-8
-                    x2lp = x+2
-                    y2lp = y-12
-                elif i == 1:
-                    x1lp = x+12
-                    y1lp = y-8
-                    x2lp = x+8
-                    y2lp = y-12
-                elif i == 2:
-                    x1lp = x-2
-                    y1lp = y-1
-                    x2lp = x+2
-                    y2lp = y+3
-                self.__drawLine(cr, x1lp, y1lp, x2lp, y2lp)
+            # Draw them for nitrogen atoms only
+            if atom.symbol == 'N':
+                for i in range (atom.lonePairs):
+                    cr.new_sub_path()
+                    if i == 0:
+                        x1lp = x-2
+                        y1lp = y-8
+                        x2lp = x+2
+                        y2lp = y-12
+                    elif i == 1:
+                        x1lp = x+12
+                        y1lp = y-8
+                        x2lp = x+8
+                        y2lp = y-12
+                    elif i == 2:
+                        x1lp = x-2
+                        y1lp = y-1
+                        x2lp = x+2
+                        y2lp = y+3
+                    self.__drawLine(cr, x1lp, y1lp, x2lp, y2lp)
                 
         # Update bounding rect to ensure atoms are included
         if boundingRect[0] < self.left:

From 67d1cf692930efa130174ee7998934a53201318f Mon Sep 17 00:00:00 2001
From: Beat Buesser <bbuesser@mit.edu>
Date: Thu, 19 Dec 2013 17:18:41 -0500
Subject: [PATCH 37/39] Draw lone electron pairs only for nitrogen containing
 species

---
 rmgpy/molecule/draw.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/rmgpy/molecule/draw.py b/rmgpy/molecule/draw.py
index eb8829a7df..b82381845a 100644
--- a/rmgpy/molecule/draw.py
+++ b/rmgpy/molecule/draw.py
@@ -873,6 +873,12 @@ def render(self, cr, offset=None):
         coordinates = self.coordinates
         atoms = self.molecule.atoms
         symbols = self.symbols
+        
+        drawLonePairs = False
+        
+        for atom in atoms:
+            if atom.isNitrogen():
+                drawLonePairs = True
     
         left = 0.0
         top = 0.0
@@ -931,7 +937,7 @@ def render(self, cr, offset=None):
                 heavyFirst = False
                 cr.set_font_size(self.options['fontSizeNormal'])
                 x0 += cr.text_extents(symbols[0])[2] / 2.0
-            atomBoundingRect = self.__renderAtom(symbol, atom, x0, y0, cr, heavyFirst)
+            atomBoundingRect = self.__renderAtom(symbol, atom, x0, y0, cr, heavyFirst, drawLonePairs)
         
         # Add a small amount of whitespace on all sides
         padding = self.options['padding']
@@ -997,7 +1003,7 @@ def __renderBond(self, atom1, atom2, bond, cr):
                 self.__drawLine(cr, x1 - du + dx, y1 - dv + dy, x2 - du - dx, y2 - dv - dy)
                 self.__drawLine(cr, x1 + du + dx, y1 + dv + dy, x2 + du - dx, y2 + dv - dy)
         
-    def __renderAtom(self, symbol, atom, x0, y0, cr, heavyFirst=True):
+    def __renderAtom(self, symbol, atom, x0, y0, cr, heavyFirst=True, drawLonePairs=False):
         """
         Render the `label` for an atom centered around the coordinates (`x0`, `y0`)
         onto the Cairo context `cr`. If `heavyFirst` is ``False``, then the order
@@ -1250,8 +1256,8 @@ def __renderAtom(self, symbol, atom, x0, y0, cr, heavyFirst=True):
                 cr.show_text(text)
                 
             # Draw lone electron pairs            
-            # Draw them for nitrogen atoms only
-            if atom.symbol == 'N':
+            # Draw them for nitrogen containing molecules only
+            if drawLonePairs:
                 for i in range (atom.lonePairs):
                     cr.new_sub_path()
                     if i == 0:
@@ -1292,7 +1298,7 @@ def __renderAtom(self, symbol, atom, x0, y0, cr, heavyFirst=True):
                 cr.fill()
             # Draw lone electron pairs
             # Draw them for nitrogen atoms only
-            if atom.symbol == 'N':
+            if drawLonePairs:
                 for i in range (atom.lonePairs):
                     cr.new_sub_path()
                     if i == 0:

From 50480341f0f33ec11cb4df5b01c8635b84823653 Mon Sep 17 00:00:00 2001
From: Richard West <r.west@neu.edu>
Date: Thu, 19 Dec 2013 20:51:25 -1000
Subject: [PATCH 38/39] Make transport writing compatible with Python 2.6

The format syntax where you omit the field numbers
was introduced in Python 2.7.
---
 rmgpy/chemkin.py   | 6 +++---
 rmgpy/rmg/model.py | 4 ----
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/rmgpy/chemkin.py b/rmgpy/chemkin.py
index ec4bb05aec..b7cf253e0e 100644
--- a/rmgpy/chemkin.py
+++ b/rmgpy/chemkin.py
@@ -1576,8 +1576,8 @@ def saveTransportFile(path, species):
     (from the chemkin TRANSPORT manual)
     """
     with open(path, 'w') as f:
-        f.write("! {:15} {:8} {:9} {:9} {:9} {:9} {:9} {:9}\n".format('Species','Shape', 'LJ-depth', 'LJ-diam', 'DiplMom', 'Polzblty', 'RotRelaxNum','Data'))
-        f.write("! {:15} {:8} {:9} {:9} {:9} {:9} {:9} {:9}\n".format('Name','Index', 'epsilon/k_B', 'sigma', 'mu', 'alpha', 'Zrot','Source'))
+        f.write("! {0:15} {1:8} {2:9} {3:9} {4:9} {5:9} {6:9} {7:9}\n".format('Species','Shape', 'LJ-depth', 'LJ-diam', 'DiplMom', 'Polzblty', 'RotRelaxNum','Data'))
+        f.write("! {0:15} {1:8} {2:9} {3:9} {4:9} {5:9} {6:9} {7:9}\n".format('Name','Index', 'epsilon/k_B', 'sigma', 'mu', 'alpha', 'Zrot','Source'))
         for spec in species:            
             if (not spec.transportData or
                 len(spec.molecule) == 0):
@@ -1596,7 +1596,7 @@ def saveTransportFile(path, species):
                 shapeIndex = 2
             
             if missingData:
-                f.write('! {:19s} {!r}\n'.format(label, spec.transportData))
+                f.write('! {0:19s} {1!r}\n'.format(label, spec.transportData))
             else:
                 f.write('{0:19} {1:d}   {2:9.3f} {3:9.3f} {4:9.3f} {5:9.3f} {6:9.3f}    ! {7:s}\n'.format(
                     label,
diff --git a/rmgpy/rmg/model.py b/rmgpy/rmg/model.py
index fb520c63b1..389dc57fbb 100644
--- a/rmgpy/rmg/model.py
+++ b/rmgpy/rmg/model.py
@@ -746,13 +746,9 @@ def enlarge(self, newObject):
             
         # Generate thermodynamics of new species
         logging.info('Generating thermodynamics for new species...')
-<<<<<<< HEAD
         self.generateThermoDataForListOfSpecies(newSpeciesList)
-=======
         for spec in newSpeciesList:
-            spec.generateThermoData(database, quantumMechanics=self.quantumMechanics)
             spec.generateTransportData(database)
->>>>>>> c31e9d05281c7a1f1a16d0236c2a90ae816090eb
         
         # Generate kinetics of new reactions
         logging.info('Generating kinetics for new reactions...')

From b2f809f54f043eef3d0e011878beb07f30e52e96 Mon Sep 17 00:00:00 2001
From: keceli <keceli@gmail.com>
Date: Sat, 21 Dec 2013 03:10:55 -0500
Subject: [PATCH 39/39] Do not load salvation groups by default.

There is a bug in pickling of the salvation groups, and there is no reason to load them by default.
---
 rmgpy/data/rmg.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rmgpy/data/rmg.py b/rmgpy/data/rmg.py
index 0783043ac7..c8ed04fd2c 100644
--- a/rmgpy/data/rmg.py
+++ b/rmgpy/data/rmg.py
@@ -76,7 +76,7 @@ def load(self,
              kineticsDepositories=None,
              statmechLibraries=None,
              depository=True,
-             solvation=True,
+             solvation=False,
              ):
         """
         Load the RMG database from the given `path` on disk, where `path`