diff --git a/.gitignore b/.gitignore index a0ebe13c06..76ecb493ad 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,5 @@ # C-Compiled scripts *.pyc + +*.ipynb_checkpoints \ No newline at end of file diff --git a/scripts/convertKineticsLibraryToTrainingReactions.ipynb b/scripts/convertKineticsLibraryToTrainingReactions.ipynb new file mode 100644 index 0000000000..c5f605caa2 --- /dev/null +++ b/scripts/convertKineticsLibraryToTrainingReactions.ipynb @@ -0,0 +1,394 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Convert Kinetics Library to Training Reactions Script\n", + "\n", + "Specify the kinetics library name below and run the script. It automatically overwrites the training reactions files it needs to. Then you should commit those files.\n", + "\n", + "This script only trains safely. In other words, if a single match from an RMG family is found, a training reaction is created. Sometimes, there are no matches from RMG reaction families, or multiple matches. This indicates an error that requires manual fixing, and a printout is given in the script." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "libraryName = 'vinylCPD_H'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from rmgpy.data.rmg import RMGDatabase\n", + "from rmgpy.chemkin import saveChemkinFile, saveSpeciesDictionary\n", + "from rmgpy.rmg.model import Species\n", + "from rmgpy import settings\n", + "from convertKineticsLibraryToTrainingReactions import addAtomLabelsForReaction" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "## load lib_rxn" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "database = RMGDatabase()\n", + "database.load(settings['database.directory'], kineticsFamilies='all', reactionLibraries = [libraryName], kineticsDepositories='all')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## generate fam_rxn, spec replacement and get reactionDict" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "scrolled": false + }, + "outputs": [], + "source": [ + "reactionDict = {}\n", + "kineticLibrary = database.kinetics.libraries[libraryName]\n", + "for index, entry in kineticLibrary.entries.iteritems():\n", + " lib_rxn = entry.item\n", + " lib_rxn.kinetics = entry.data \n", + " lib_rxn.index = entry.index\n", + " lib_rxn.kinetics.comment = entry.label # Assign the entry's label to the comment\n", + " # Let's make RMG try to generate this reaction from the families!\n", + " fam_rxn_list = []\n", + " rxt_mol_mutation_num = 1\n", + " pdt_mol_mutation_num = 1\n", + " for reactant in lib_rxn.reactants:\n", + " rxt_mol_mutation_num *= len(reactant.molecule)\n", + "\n", + " for product in lib_rxn.products:\n", + " pdt_mol_mutation_num *= len(product.molecule)\n", + "\n", + " for mutation_i in range(rxt_mol_mutation_num):\n", + " rxts_mol = [spc.molecule[mutation_i%(len(spc.molecule))] for spc in lib_rxn.reactants]\n", + " pdts_mol = [spc.molecule[0] for spc in lib_rxn.products]\n", + " fam_rxn_list.extend(database.kinetics.generateReactionsFromFamilies(\n", + " reactants=rxts_mol, products=pdts_mol))\n", + "\n", + "\n", + " if len(fam_rxn_list) == 1:\n", + " fam_rxn = fam_rxn_list[0]\n", + "\n", + " # danger: the fam_rxn may have switched the reactants with products\n", + " # fam_rxn is survived from def filterReactions\n", + " # so it's matched with lib_rxn only we have to \n", + " # determine the direction\n", + " lib_reactants = [r for r in lib_rxn.reactants] \n", + " fam_reactants = [r for r in fam_rxn.reactants]\n", + " for lib_reactant in lib_reactants:\n", + " for fam_reactant in fam_reactants:\n", + " if lib_reactant.isIsomorphic(fam_reactant):\n", + " fam_reactants.remove(fam_reactant)\n", + " break\n", + "\n", + " lib_products = [r for r in lib_rxn.products] \n", + " fam_products = [r for r in fam_rxn.products]\n", + " for lib_product in lib_products:\n", + " for fam_product in fam_products:\n", + " if lib_product.isIsomorphic(fam_product):\n", + " fam_products.remove(fam_product)\n", + " break\n", + "\n", + " forward = not (len(fam_reactants) != 0 or len(fam_products) != 0)\n", + " # find the labeled atoms using family and reactants & products from fam_rxn \n", + " addAtomLabelsForReaction(fam_rxn, database)\n", + " # species replacement so that labeledAtoms is retored\n", + " if forward:\n", + " lib_rxn.reactants = fam_rxn.reactants\n", + " lib_rxn.products = fam_rxn.products\n", + " else:\n", + " lib_rxn.reactants = fam_rxn.products\n", + " lib_rxn.products = fam_rxn.reactants\n", + " if fam_rxn.family in reactionDict:\n", + " reactionDict[fam_rxn.family].append(lib_rxn)\n", + " else:\n", + " reactionDict[fam_rxn.family] = [lib_rxn]\n", + "\n", + " elif len(fam_rxn_list) == 0:\n", + " print \"Sad :( There are no matches. This is a magic reaction or has chemistry that should be made into a new reaction family\"\n", + " print ''\n", + " print lib_rxn\n", + " print ''\n", + " print 'Reactant SMILES:'\n", + " for reactant in lib_rxn.reactants:\n", + " print reactant.molecule[0].toSMILES()\n", + " print 'Product SMILES:'\n", + " for product in lib_rxn.products:\n", + " print product.molecule[0].toSMILES()\n", + " print '==============='\n", + " else:\n", + " print \"There are multiple RMG matches for this reaction. You have to manually create this training reaction\"\n", + " print ''\n", + " print lib_rxn\n", + " print ''\n", + " print 'Reactant SMILES:'\n", + " for reactant in lib_rxn.reactants:\n", + " print reactant.molecule[0].toSMILES()\n", + " print 'Product SMILES'\n", + " for product in lib_rxn.products:\n", + " print product.molecule[0].toSMILES()\n", + " print ''\n", + " print 'The following families were matched:'\n", + " for rxn in fam_rxn_list:\n", + " print rxn.family\n", + " print '==============='\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "for familyName in reactionDict:\n", + " print 'Adding training reactions for family: ' + familyName\n", + " kineticFamily = database.kinetics.families[familyName]\n", + " trainingDatabase = None\n", + " for depository in kineticFamily.depositories:\n", + " if depository.label.endswith('training'):\n", + " trainingDatabase = depository\n", + " break\n", + " reactions = reactionDict[familyName]\n", + " print 'reactions.py previously has {} rxns. Now adding {} new rxn(s).'.format(len(trainingDatabase.entries.values()), len(reactions))\n", + " print '================='\n", + " kineticFamily.saveTrainingReactions(reactions, shortDesc='Training reaction from kinetics library: {0}'.format(libraryName))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# How saveTrainingReaction works\n", + "\n", + "This part of the script is commented and should not be run. It serves only to demonstrate how the code for saving the training reactions works.\n", + "\n", + "## get speciesDict\n", + "\n", + "### load existing species as an intial speciesDict" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# import os\n", + "# from rmgpy.data.base import Database\n", + "\n", + "# training_path = os.path.join(settings['database.directory'], \\\n", + "# 'kinetics', 'families', 'R_Addition_MultipleBond', 'training')\n", + "\n", + "# dictionary_file = os.path.join(training_path, 'dictionary.txt')\n", + "\n", + "# # Load the existing set of the species of the training reactions\n", + "# speciesDict = Database().getSpecies(dictionary_file)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### for one family check uniqueness of each species in the lib_rxns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "scrolled": false + }, + "outputs": [], + "source": [ + "# familyName = 'R_Addition_MultipleBond'\n", + "# print 'Adding training reactions for family: ' + familyName\n", + "# kineticFamily = database.kinetics.families[familyName]\n", + "# reactions = reactionDict[familyName]\n", + "\n", + "# for rxn in reactions:\n", + "# for spec in (rxn.reactants + rxn.products):\n", + "# for ex_spec_label in speciesDict:\n", + "# ex_spec = speciesDict[ex_spec_label]\n", + "# if ex_spec.molecule[0].getFormula() != spec.molecule[0].getFormula():\n", + "# continue\n", + "# else:\n", + "# spec_labeledAtoms = spec.molecule[0].getLabeledAtoms()\n", + "# ex_spec_labeledAtoms = ex_spec.molecule[0].getLabeledAtoms()\n", + "# initialMap = {}\n", + "# try:\n", + "# for atomLabel in spec_labeledAtoms:\n", + "# initialMap[spec_labeledAtoms[atomLabel]] = ex_spec_labeledAtoms[atomLabel]\n", + "# except KeyError:\n", + "# # atom labels did not match, therefore not a match\n", + "# continue\n", + "# if spec.molecule[0].isIsomorphic(ex_spec.molecule[0],initialMap):\n", + "# spec.label = ex_spec.label\n", + "# break\n", + "# else:# no isomorphic existing species found\n", + "# spec_formula = spec.molecule[0].getFormula()\n", + "# if spec_formula not in speciesDict:\n", + "# spec.label = spec_formula\n", + "# else:\n", + "# index = 2\n", + "# while (spec_formula + '-{}'.format(index)) in speciesDict:\n", + "# index += 1\n", + "# spec.label = spec_formula + '-{}'.format(index)\n", + "# speciesDict[spec.label] = spec" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## save to files\n", + "\n", + "Save reactionDict to reactions.py and speciesDict to dictionary.txt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# # try to append \n", + "# training_file = open(os.path.join(settings['database.directory'], 'kinetics', 'families', \\\n", + "# kineticFamily.label, 'training', 'reactions_test.py'), 'a')\n", + "\n", + "# training_file.write(\"\\n\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# # find the largest reaction index\n", + "# for depository in kineticFamily.depositories:\n", + "# if depository.label.endswith('training'):\n", + "# break\n", + "# else:\n", + "# logging.info('Could not find training depository in family {0}.'.format(kineticFamily.label))\n", + "# logging.info('Starting a new one')\n", + "# depository = KineticsDepository()\n", + "# kineticFamily.depositories.append(depository)\n", + "\n", + "# trainingDatabase = depository\n", + "# indices = [entry.index for entry in trainingDatabase.entries.values()]\n", + "# if indices:\n", + "# maxIndex = max(indices)\n", + "# else:\n", + "# maxIndex = 0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# # save reactions.py\n", + "# from rmgpy.data.base import Entry\n", + "# for i, reaction in enumerate(reactions): \n", + "# entry = Entry(\n", + "# index = maxIndex+i+1,\n", + "# label = str(reaction),\n", + "# item = reaction,\n", + "# data = reaction.kinetics,\n", + "# reference = None,\n", + "# referenceType = '',\n", + "# shortDesc = unicode(''),\n", + "# longDesc = unicode(''),\n", + "# rank = 3,\n", + "# )\n", + "# print reaction\n", + "# kineticFamily.saveEntry(training_file, entry)\n", + "\n", + "# training_file.close()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# # save dictionary.txt\n", + "# directory_test_file = os.path.join(training_path, 'directory_test.txt')\n", + "# with open(directory_test_file, 'w') as f:\n", + "# for label in speciesDict.keys():\n", + "# f.write(speciesDict[label].molecule[0].toAdjacencyList(label=label, removeH=False))\n", + "# f.write('\\n')\n", + "# f.close()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.11" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/scripts/fitPolycyclicThermoGroupsFromThermoLibrary.ipynb b/scripts/fitPolycyclicThermoGroupsFromThermoLibrary.ipynb new file mode 100644 index 0000000000..912bc738a2 --- /dev/null +++ b/scripts/fitPolycyclicThermoGroupsFromThermoLibrary.ipynb @@ -0,0 +1,414 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Fit Polycyclic Thermo Groups From Thermo Library Script\n", + "\n", + "This script takes thermo libraries and fits new polycyclic groups from them. It saves the all of the polycyclic groups to the file `new_polycyclic.py`. This file can be used to overwrite the original polycyclics thermo groups file in `input/thermo/groups/polycyclic.py`.\n", + "\n", + "**IMPORTANT:** It averages any data that is found within the libraries, but will overwrite any old thermo data. If old data is trustworthy, this script must be modified\n", + " \n", + "Uncertainties for the groups are calculated as 2s, where s is the sample standard deviation\n", + "\n", + "Please fill in the list of thermo libraries in the next block below" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# Fill in the list of thermo libraries to be used for fitting polycyclic thermo groups\n", + "thermoLibraries = ['vinylCPD_H','C3','C10H11','Fulvene_H','naphthalene_H']\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import os\n", + "import re\n", + "import copy\n", + "import numpy\n", + "from IPython.display import Image, display\n", + "from rmgpy.data.thermo import *\n", + "from rmgpy.data.base import Entry\n", + "from rmgpy.data.rmg import RMGDatabase\n", + "from rmgpy import settings\n", + "from rmgpy.species import Species\n", + "from rmgpy.molecule import Molecule\n", + "from rmgpy.cantherm.output import prettify" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Variety of helper functions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "def extractPolycyclicGroups(molecule):\n", + " \"\"\"\n", + " Extract polycyclic functional groups from a real molecule\n", + " \"\"\"\n", + " struct = molecule.copy(deep=True)\n", + " # Saturate the structure if it is a radical\n", + " if struct.isRadical():\n", + " struct.saturate()\n", + " struct.deleteHydrogens()\n", + " \n", + " polyRings = struct.getPolycyclicRings()\n", + " groups = [convertCycleToGroup(ring) for ring in polyRings]\n", + " \n", + " return groups\n", + " \n", + "def convertCycleToGroup(cycle):\n", + " \"\"\"\n", + " This function converts a list of atoms in a cycle to a functional Group object\n", + " \"\"\"\n", + " from rmgpy.molecule.group import GroupAtom, GroupBond, Group\n", + " \n", + " # Create GroupAtom object for each atom in the cycle, label the first one in the cycle with a *\n", + " groupAtoms = {}\n", + " bonds = []\n", + " for atom in cycle:\n", + " groupAtoms[atom] = GroupAtom(atomType=[atom.atomType],\n", + " radicalElectrons=[0],\n", + " label='*' if cycle.index(atom)==0 else '')\n", + " \n", + " group = Group(atoms=groupAtoms.values()) \n", + " \n", + " # Create GroupBond for each bond between atoms in the cycle, but not outside of the cycle\n", + " for atom in cycle:\n", + " for bondedAtom, bond in atom.edges.iteritems():\n", + " if bondedAtom in cycle:\n", + " # create a group bond with the same bond order as in the original molecule,\n", + " # if it hasn't already been created\n", + " if not group.hasBond(groupAtoms[atom],groupAtoms[bondedAtom]):\n", + " group.addBond(GroupBond(groupAtoms[atom],groupAtoms[bondedAtom],order=[bond.order]))\n", + " else:\n", + " pass\n", + " \n", + " group.update()\n", + " \n", + " return group" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Thermo comparison and display functions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def displayThermo(thermoData):\n", + " print 'H298 = {0} kcal/mol'.format(thermoData.H298.value_si/4184)\n", + " print 'S298 = {0} cal/mol*K'.format(thermoData.S298.value_si/4.184)\n", + "def compareThermoData(thermoData1, thermoData2):\n", + " delH = thermoData1.H298.value_si - thermoData2.H298.value_si\n", + " print 'Difference in H298 = {0} kcal/mol'.format(delH/4184)\n", + " delS = thermoData1.S298.value_si - thermoData2.S298.value_si\n", + " print 'Difference S298 = {0} cal/mol*K'.format(delS/4.184)\n", + " #Tdata = [300,500,1000,2000]\n", + " #for T in Tdata:\n", + " # delCp = thermoData1.getHeatCapacity(T) - thermoData2.getHeatCapacity(T)\n", + " # print 'Difference in Cp at {0} = {1} cal/mol*K'.format(T, delCp/4.184)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load all the thermo libraries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "database = RMGDatabase()\n", + "database.load(settings['database.directory'], thermoLibraries = thermoLibraries, kineticsFamilies='none', kineticsDepositories='none', reactionLibraries=[])\n", + "\n", + "thermoDatabase = database.thermo\n", + "thermoDatabase0 = copy.deepcopy(database.thermo)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "## Extract polycyclic groups from thermo libraries and create new ones" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "fittingDictionary={}\n", + "for libraryName in thermoLibraries:\n", + " thermoLibrary = database.thermo.libraries[libraryName]\n", + " for label, entry in thermoLibrary.entries.iteritems():\n", + " molecule = entry.item\n", + " libraryThermoData = entry.data\n", + " if molecule.getAllPolycyclicVertices():\n", + " print label\n", + " species = Species(molecule=[molecule])\n", + " species.generateResonanceIsomers() \n", + " print 'Species has {0} resonance isomers'.format(len(species.molecule))\n", + " estimatedThermos = [thermoDatabase.estimateThermoViaGroupAdditivity(molecule) for molecule in species.molecule]\n", + " for estimatedThermo in estimatedThermos:\n", + " index = estimatedThermos.index(estimatedThermo)\n", + " ringGroups, polycyclicGroups = database.thermo.getRingGroupsFromComments(estimatedThermo)\n", + " \n", + " if len(polycyclicGroups) == 0:\n", + " raise Exception('Species {0} detected as polycyclic but estimated thermo contained no polycyclic groups: \\\n", + " you need to create a new polycyclic group'.format(label))\n", + "\n", + " elif len(polycyclicGroups) == 1:\n", + " polycyclicGroup = polycyclicGroups[0]\n", + " print 'Molecule {0} has a single polycyclic group match in thermo estimate.'.format(species.molecule[index].toSMILES())\n", + " # Draw the molecule in ipython notebook\n", + " display(species.molecule[index])\n", + " print 'Molecule SMILES: {0}'.format(species.molecule[index].toSMILES())\n", + " print 'Estimated thermo data:'\n", + " print prettify(repr(estimatedThermo))\n", + " displayThermo(estimatedThermo)\n", + "\n", + " withoutPolycyclicGroupThermo = removeThermoData(copy.deepcopy(estimatedThermo), polycyclicGroup.data)\n", + " newPolycyclicGroupThermo = removeThermoData(copy.deepcopy(libraryThermoData), withoutPolycyclicGroupThermo)\n", + "\n", + "\n", + " # Check to make sure that the polycyclic group is not generic\n", + " # If it is, create a new polycyclicGroup as the child\n", + " if polycyclicGroup.label == 'PolycyclicRing':\n", + " groups = extractPolycyclicGroups(species.molecule[index])\n", + " print groups[0].toAdjacencyList()\n", + " assert len(groups) == 1\n", + " # Create a new entry in the polycyclic groups with the same name as the thermo library entry\n", + " # Make sure it does not already exist\n", + " entryLabel = label\n", + " counter = 0\n", + " while entryLabel in thermoDatabase.groups['polycyclic'].entries:\n", + " counter += 1\n", + " entryLabel = entryLabel.split('-')[0]\n", + " entryLabel += '-{0}'.format(counter)\n", + "\n", + "\n", + " print 'Polycyclic group was found to be generic \"PolycyclicRing\". Creating new entry: {0}.'.format(entryLabel)\n", + " thermoDatabase.groups['polycyclic'].entries[entryLabel] = Entry(index = len(thermoDatabase.groups['polycyclic'].entries)+1,\n", + " label = entryLabel,\n", + " item = groups[0],\n", + " data = polycyclicGroup.data, # Use dummy thermo here so other estimates can find this group\n", + " parent = polycyclicGroup,\n", + " )\n", + "\n", + " # Set the new entry as the polycyclicGroup and make it a child of the generic group\n", + " polycyclicGroup = thermoDatabase.groups['polycyclic'].entries[entryLabel] \n", + " thermoDatabase.groups['polycyclic'].entries['PolycyclicRing'].children.append(polycyclicGroup)\n", + "\n", + "\n", + " else:\n", + " print 'Matched polycyclic group \"{0}\"'.format(polycyclicGroup.label)\n", + "\n", + "\n", + "\n", + "\n", + "\n", + " # Add the new group value to the fitting dictionary\n", + " if polycyclicGroup not in fittingDictionary:\n", + " # Add a tuple containing fitted group data, the original library entry, and thermo library\n", + " fittingDictionary[polycyclicGroup]=[(newPolycyclicGroupThermo, entry, thermoLibrary)]\n", + " else:\n", + " fittingDictionary[polycyclicGroup].append((newPolycyclicGroupThermo, entry, thermoLibrary))\n", + "\n", + " elif len(polycyclicGroups) > 1:\n", + " print 'Species {0} has matched multiple polycyclic groups. \\\n", + " This cannot be fitted with a single molecule\\'s thermo data.'.format(label)\n", + " raise Exception\n", + " print '====================='" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Fit the polycyclic groups and write to file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "for polycyclicGroup, fittingGroups in fittingDictionary.iteritems():\n", + " print 'Original thermo data for polycyclic group: {0}'.format(polycyclicGroup.label)\n", + " if polycyclicGroup.data:\n", + " print prettify(repr(polycyclicGroup.data))\n", + " else:\n", + " print 'No data found. Was created as a new group.'\n", + " \n", + " thermoDataset = [fitTuple[0] for fitTuple in fittingGroups]\n", + " labels = [fitTuple[1].label for fitTuple in fittingGroups]\n", + " libraryLabels = [fitTuple[2].name for fitTuple in fittingGroups]\n", + " # Average the new group values to fit the original polycyclic group\n", + " fittedGroupData = averageThermoData([fitTuple[0] for fitTuple in fittingGroups])\n", + " #print fittedGroupData\n", + " #print fittingGroups\n", + " polycyclicGroup.data = fittedGroupData\n", + " polycyclicGroup.shortDesc = \"Fitted from thermo library values\"\n", + " \n", + " comment = ''\n", + " for i in range(len(labels)):\n", + " comment += \"Fitted from species {0} from {1} library.\\n\".format(labels[i],libraryLabels[i])\n", + " polycyclicGroup.longDesc = comment.strip()\n", + " \n", + " print 'Fitted thermo data for polycyclic group: {0}'.format(polycyclicGroup.label)\n", + " print prettify(repr(polycyclicGroup.data))\n", + " print polycyclicGroup.longDesc\n", + " print '===================================================================='\n", + " # At this point, save and overwrite the entire polycyclic thermo library\n", + "\n", + "thermoDatabase.groups['polycyclic'].save('new_polycyclic.py')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Benchmark the new groups by checking the estimates against library values" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Test that the new group additivity values can estimate the old library ones.\n", + "\n", + "for libraryName in thermoLibraries:\n", + " thermoLibrary = database.thermo.libraries[libraryName]\n", + " for label, entry in thermoLibrary.entries.iteritems():\n", + " molecule = entry.item\n", + " libraryThermoData = entry.data\n", + "\n", + " if molecule.getAllPolycyclicVertices():\n", + " species = Species(molecule=[molecule])\n", + " species.generateResonanceIsomers()\n", + " print label\n", + " display (species.molecule[0])\n", + " print 'Species has {0} resonance isomer(s)'.format(len(species.molecule))\n", + " thermoDatabase.findCp0andCpInf(species, libraryThermoData)\n", + " \n", + " estimatedThermo = thermoDatabase.getThermoDataFromGroups(species)\n", + " originalEstimatedThermo = thermoDatabase0.getThermoDataFromGroups(species)\n", + " if libraryThermoData.isIdenticalTo(estimatedThermo):\n", + " print 'Library thermo data for species {0} matches the estimate from group additivity.\\n'.format(label)\n", + " \n", + " print 'Library thermo data:' \n", + " displayThermo(libraryThermoData)\n", + " print '' \n", + " \n", + " print 'Original estimated thermo data:' \n", + " ringGroups, polycyclicGroups = database.thermo.getRingGroupsFromComments(estimatedThermo)\n", + " print 'Polycyclic groups: {0}'.format(' '.join([grp.label for grp in polycyclicGroups]))\n", + " displayThermo(originalEstimatedThermo)\n", + " print ''\n", + " \n", + " print 'Comparison of library thermo with original estimated thermo'\n", + " compareThermoData(libraryThermoData,originalEstimatedThermo)\n", + " print ''\n", + " else:\n", + " print 'Library thermo data for species {0} does not match the estimate from group additivity\\n'.format(label)\n", + " \n", + " print 'Library thermo data:' \n", + " displayThermo(libraryThermoData)\n", + " print '' \n", + " \n", + " print 'Estimated thermo data:' \n", + " \n", + " ringGroups, polycyclicGroups = database.thermo.getRingGroupsFromComments(estimatedThermo)\n", + " print 'Polycyclic groups: {0}'.format(' '.join([grp.label for grp in polycyclicGroups]))\n", + " displayThermo(estimatedThermo)\n", + " print ''\n", + " \n", + " print 'Comparison of library thermo with estimated thermo'\n", + " compareThermoData(libraryThermoData,estimatedThermo)\n", + " print ''\n", + " \n", + " print 'Original estimated thermo data:' \n", + " \n", + " ringGroups, polycyclicGroups = database.thermo.getRingGroupsFromComments(originalEstimatedThermo)\n", + " print 'Polycyclic groups: {0}'.format(' '.join([grp.label for grp in polycyclicGroups]))\n", + " displayThermo(originalEstimatedThermo)\n", + " print ''\n", + " \n", + " print 'Comparison of library thermo with original estimated thermo'\n", + " compareThermoData(libraryThermoData,originalEstimatedThermo)\n", + " print ''\n", + " print '======================='" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.11" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/scripts/showNewTrainingRxnsImprovements.ipynb b/scripts/showNewTrainingRxnsImprovements.ipynb new file mode 100644 index 0000000000..91c49f88fe --- /dev/null +++ b/scripts/showNewTrainingRxnsImprovements.ipynb @@ -0,0 +1,282 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from rmgpy.data.rmg import RMGDatabase\n", + "from rmgpy.chemkin import saveChemkinFile, saveSpeciesDictionary\n", + "from rmgpy.rmg.model import Species, getFamilyLibraryObject, CoreEdgeReactionModel\n", + "from rmgpy import settings\n", + "from convertKineticsLibraryToTrainingReactions import addAtomLabelsForReaction" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "database = RMGDatabase()\n", + "libraries = ['C3']\n", + "database.load(settings['database.directory'], kineticsFamilies='all', reactionLibraries = libraries, kineticsDepositories='all')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## step1: find fam_rxn for each lib_rxn" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "reactionDict = {}\n", + "for libraryName in libraries:\n", + " kineticLibrary = database.kinetics.libraries[libraryName]\n", + " for index, entry in kineticLibrary.entries.iteritems():\n", + " lib_rxn = entry.item\n", + " lib_rxn.kinetics = entry.data \n", + " lib_rxn.index = entry.index\n", + " # Let's make RMG generate this reaction from the families!\n", + " fam_rxn_list = []\n", + " rxt_mol_mutation_num = 1\n", + " pdt_mol_mutation_num = 1\n", + " for reactant in lib_rxn.reactants:\n", + " rxt_mol_mutation_num *= len(reactant.molecule)\n", + "\n", + " for product in lib_rxn.products:\n", + " pdt_mol_mutation_num *= len(product.molecule)\n", + "\n", + " for mutation_i in range(rxt_mol_mutation_num):\n", + " rxts_mol = [spc.molecule[mutation_i%(len(spc.molecule))] for spc in lib_rxn.reactants]\n", + " pdts_mol = [spc.molecule[0] for spc in lib_rxn.products]\n", + " fam_rxn_list.extend(database.kinetics.generateReactionsFromFamilies(\n", + " reactants=rxts_mol, products=pdts_mol))\n", + "\n", + " if len(fam_rxn_list) == 1:\n", + " fam_rxn = fam_rxn_list[0] \n", + " lib_reactants = [r for r in lib_rxn.reactants] \n", + " fam_reactants = [r for r in fam_rxn.reactants]\n", + " for lib_reactant in lib_reactants:\n", + " for fam_reactant in fam_reactants:\n", + " if lib_reactant.isIsomorphic(fam_reactant):\n", + " fam_reactants.remove(fam_reactant)\n", + " break\n", + "\n", + " lib_products = [r for r in lib_rxn.products] \n", + " fam_products = [r for r in fam_rxn.products]\n", + " for lib_product in lib_products:\n", + " for fam_product in fam_products:\n", + " if lib_product.isIsomorphic(fam_product):\n", + " fam_products.remove(fam_product)\n", + " break\n", + "\n", + " forward = not (len(fam_reactants) != 0 or len(fam_products) != 0)\n", + " # find the labeled atoms using family and reactants & products from fam_rxn \n", + " addAtomLabelsForReaction(fam_rxn, database)\n", + " fam_rxn.index = lib_rxn.index\n", + " reactionDict[fam_rxn.family] = [fam_rxn]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from IPython.display import display\n", + "for fam_rxn in reactionDict['Intra_R_Add_Endocyclic']:\n", + " print fam_rxn.index\n", + " display(fam_rxn)\n", + " for spec in fam_rxn.reactants + fam_rxn.products:\n", + " print spec.molecule[0].toSMILES()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "for index, entry in kineticLibrary.entries.iteritems():\n", + " if entry.index == fam_rxn.index:\n", + " lib_rxn = entry.item\n", + " lib_rxn.kinetics = entry.data \n", + " lib_rxn.index = entry.index\n", + " break\n", + "lib_rxn" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "id(fam_rxn.reactants[0].molecule[0]), id(lib_rxn.reactants[0].molecule[0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## step2: get fam_rxn's kinetics" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Before training RMG estimates fam_rxn's kinetics as $ A = 10^9, n = 0.19, E_a = 83.68 kJ/mol $ at [here](http://rmg.mit.edu/database/kinetics/families/Intra_R_Add_Endocyclic/rate_rules/reactant1=multiplicity%25202%250A1%2520%2520C%2520u0%2520p0%2520c0%2520%257B2%252CS%257D%2520%257B3%252CS%257D%2520%257B8%252CS%257D%2520%257B9%252CS%257D%250A2%2520%2520C%2520u0%2520p0%2520c0%2520%257B1%252CS%257D%2520%257B4%252CS%257D%2520%257B5%252CS%257D%2520%257B6%252CS%257D%250A3%2520%2520C%2520u1%2520p0%2520c0%2520%257B1%252CS%257D%2520%257B7%252CD%257D%250A4%2520%2520C%2520u0%2520p0%2520c0%2520%257B2%252CS%257D%2520%257B10%252CT%257D%250A5%2520%2520H%2520u0%2520p0%2520c0%2520%257B2%252CS%257D%250A6%2520%2520H%2520u0%2520p0%2520c0%2520%257B2%252CS%257D%250A7%2520%2520C%2520u0%2520p0%2520c0%2520%257B3%252CD%257D%2520%257B11%252CS%257D%2520%257B12%252CS%257D%250A8%2520%2520H%2520u0%2520p0%2520c0%2520%257B1%252CS%257D%250A9%2520%2520H%2520u0%2520p0%2520c0%2520%257B1%252CS%257D%250A10%2520C%2520u0%2520p0%2520c0%2520%257B4%252CT%257D%2520%257B13%252CS%257D%250A11%2520H%2520u0%2520p0%2520c0%2520%257B7%252CS%257D%250A12%2520H%2520u0%2520p0%2520c0%2520%257B7%252CS%257D%250A13%2520H%2520u0%2520p0%2520c0%2520%257B10%252CS%257D%250A__product1=multiplicity%25202%250A1%2520%2520C%2520u0%2520p0%2520c0%2520%257B2%252CS%257D%2520%257B3%252CS%257D%2520%257B7%252CS%257D%2520%257B8%252CS%257D%250A2%2520%2520C%2520u0%2520p0%2520c0%2520%257B1%252CS%257D%2520%257B4%252CS%257D%2520%257B9%252CS%257D%2520%257B10%252CS%257D%250A3%2520%2520C%2520u0%2520p0%2520c0%2520%257B1%252CS%257D%2520%257B5%252CS%257D%2520%257B6%252CD%257D%250A4%2520%2520C%2520u1%2520p0%2520c0%2520%257B2%252CS%257D%2520%257B5%252CD%257D%250A5%2520%2520C%2520u0%2520p0%2520c0%2520%257B3%252CS%257D%2520%257B4%252CD%257D%2520%257B11%252CS%257D%250A6%2520%2520C%2520u0%2520p0%2520c0%2520%257B3%252CD%257D%2520%257B12%252CS%257D%2520%257B13%252CS%257D%250A7%2520%2520H%2520u0%2520p0%2520c0%2520%257B1%252CS%257D%250A8%2520%2520H%2520u0%2520p0%2520c0%2520%257B1%252CS%257D%250A9%2520%2520H%2520u0%2520p0%2520c0%2520%257B2%252CS%257D%250A10%2520H%2520u0%2520p0%2520c0%2520%257B2%252CS%257D%250A11%2520H%2520u0%2520p0%2520c0%2520%257B5%252CS%257D%250A12%2520H%2520u0%2520p0%2520c0%2520%257B6%252CS%257D%250A13%2520H%2520u0%2520p0%2520c0%2520%257B6%252CS%257D%250A)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## step3: after training get fam_rxn's kinetics" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "cem = CoreEdgeReactionModel()\n", + "cem.kineticsEstimator = 'rate rules'\n", + "cem.verboseComments = True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from rmgpy.kinetics.kineticsdata import KineticsData\n", + "from rmgpy.data.kinetics.family import TemplateReaction\n", + "from rmgpy.data.kinetics.depository import DepositoryReaction\n", + "\n", + "for idx, spec in enumerate(fam_rxn.reactants):\n", + " spec = Species(label=spec.label, molecule=spec.molecule)\n", + " spec.generateThermoData(database)\n", + " fam_rxn.reactants[idx] = spec\n", + "for idx, spec in enumerate(fam_rxn.products):\n", + " spec = Species(label=spec.label, molecule=spec.molecule)\n", + " spec.generateThermoData(database)\n", + " fam_rxn.products[idx] = spec\n", + "\n", + "family = getFamilyLibraryObject(fam_rxn.family)\n", + "\n", + "# If the reaction already has kinetics (e.g. from a library),\n", + "# assume the kinetics are satisfactory\n", + "if fam_rxn.kinetics is None:\n", + " # Set the reaction kinetics\n", + " kinetics, source, entry, isForward = cem.generateKinetics(fam_rxn)\n", + " fam_rxn.kinetics = kinetics\n", + " # Flip the reaction direction if the kinetics are defined in the reverse direction\n", + " if not isForward:\n", + " fam_rxn.reactants, fam_rxn.products = fam_rxn.products, fam_rxn.reactants\n", + " fam_rxn.pairs = [(p,r) for r,p in fam_rxn.pairs]\n", + " if family.ownReverse and hasattr(fam_rxn,'reverse'):\n", + " if not isForward:\n", + " fam_rxn.template = fam_rxn.reverse.template\n", + " # We're done with the \"reverse\" attribute, so delete it to save a bit of memory\n", + " delattr(fam_rxn,'reverse')\n", + "\n", + "# convert KineticsData to Arrhenius forms\n", + "if isinstance(fam_rxn.kinetics, KineticsData):\n", + " fam_rxn.kinetics = fam_rxn.kinetics.toArrhenius()\n", + "# correct barrier heights of estimated kinetics\n", + "if isinstance(fam_rxn,TemplateReaction) or isinstance(fam_rxn,DepositoryReaction): # i.e. not LibraryReaction\n", + " fam_rxn.fixBarrierHeight() # also converts ArrheniusEP to Arrhenius.\n", + "\n", + "if cem.pressureDependence and fam_rxn.isUnimolecular():\n", + " # If this is going to be run through pressure dependence code,\n", + " # we need to make sure the barrier is positive.\n", + " fam_rxn.fixBarrierHeight(forcePositive=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "fam_rxn.kinetics" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## step4: compare with lib_rxn's kinetics" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "lib_rxn.kinetics" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Conclusion: it improves the kinetics by factor of 10,000 at 673 for this reaction" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.11" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +}