Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
507a06b
[Feature] add COMETAdapter
Mar 8, 2017
93113bc
COMETAdapter changes - PepXML to idXML error not fixed
Mar 22, 2017
f4efe76
COMET Adapter now functional and removes pep.xml and .pin if debug ==0
Mar 24, 2017
c1f7480
[NOP] documentation
mwalzer May 26, 2015
831b4de
[FIX] refactoring and cleanup of TopPerc
mwalzer May 26, 2015
9dd939c
[NOP] fileheader and cmake files
mwalzer Jun 1, 2015
b8aa4ed
[FEATURE] added a replacement feature for msgf+ mhc ligand identifica…
mwalzer Jun 23, 2015
b442dc8
[Feature] refactored TopPerc a little bit more
mwalzer Oct 21, 2015
72f81c1
[NOP] added SE PIN info for TopPerc
mwalzer Nov 24, 2015
ac297da
[FEATURE] added comet precolator input prep
mwalzer Nov 27, 2015
43d6a98
[FIX] changed scan referencing
mwalzer Nov 27, 2015
541600a
[FIX] fixed metavaluename for matched ions in comet usage, removed to…
mwalzer Nov 27, 2015
c209934
[NOP] minor naming consistency correction
mwalzer Nov 27, 2015
6557da3
[FEATURE] added mascot feature set - not even close to mascotpercolat…
mwalzer Nov 28, 2015
4fcc2e7
[FIX] some refactoring and fixing the result storage
mwalzer May 29, 2016
1c5bfc4
[FIX] multi engine percolating like in my pyOpenMS script, but little…
mwalzer May 29, 2016
4dbd067
[FIX] minimal set multifeature for prepareMULTIpin
mwalzer May 30, 2016
5ac0593
[FIX] removed mapping bugs, improved on logging informative quality
mwalzer May 30, 2016
0f27ffb
[FEAT] added concat pin for topperc
mwalzer Jun 11, 2016
329876e
Successful build, used scan identifiers as SpecId for all search engines
MatthewThe Jul 12, 2016
1b78721
PercolatorAdapter working
MatthewThe Jul 13, 2016
be54960
Created PSMFeatureExtractor util
MatthewThe Jul 14, 2016
a541c36
Set up merging of several idXML files. Added options for peptide and …
MatthewThe Jul 14, 2016
b05e056
Fixed some issues for xtandem and multi search engine merge
MatthewThe Jul 21, 2016
346bbed
[FIX] fixed after merge to reflect renamed precursor_mass_tolerance
mwalzer Nov 24, 2016
d1c7878
[NOP] doc and code duplicate removal
mwalzer Nov 29, 2016
376c212
[FIX] fixed faulty getScanMergeKey_ function, added debug information
mwalzer Nov 29, 2016
e165306
[FIX] fixed refactoring introduced glitches
mwalzer Dec 12, 2016
e00a3e3
[FIX] fixing one comet feature fallback
mwalzer Mar 21, 2017
0f15bf5
COMET Adapter XTandem parts replaced by COMET, header still required …
Mar 24, 2017
192a204
[DOC] percolator related tools documentation
mwalzer Mar 25, 2017
fccbca3
[TEST] added TopPerc test suite as start
mwalzer Mar 26, 2017
4783b49
[TEST] fixed some test, some extended
mwalzer Mar 26, 2017
726dcfb
[TEST] activated rest of the test and moved unused operators in prote…
mwalzer Mar 26, 2017
bf28aec
[NOP] typos and docu clarifications
mwalzer Mar 26, 2017
338a505
[FIX] added warning, 'incomplete' MSGF PSM is encountered
mwalzer Mar 26, 2017
cde5e72
[FIX] fixed merge conflict with knime_package_support.cmake
mwalzer Mar 26, 2017
5ac5dd3
[RENAME, DOC] renamed TopPerc, some more documentation to its use
mwalzer Mar 26, 2017
f39a28e
[NOP,FIX] corrected some style issues; replaced the other find with h…
mwalzer Mar 27, 2017
8319404
Julianus changes and pin file option
Mar 27, 2017
5f1ebf5
PercolatorAdapter has been changed by Timo reverted to proper parsing…
Mar 28, 2017
31f3fc4
Merge branch 'COMET_and_Percolator' into fix/PercolatorAdapter
Leon-Bichmann Mar 30, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added .DS_Store
Binary file not shown.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,6 @@ OpenMS.config
OpenMS.creator
OpenMS.files
OpenMS.includes
.DS_Store
src/.DS_Store
src/openms/.DS_Store
4 changes: 4 additions & 0 deletions cmake/knime_package_support.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@ add_custom_target(
COMMAND ${CMAKE_COMMAND} -D SCRIPT_DIR=${SCRIPT_DIRECTORY} -DTOOLNAME=MSGFPlusAdapter -DPARAM=executable -D CTD_PATH=${CTD_PATH} -P ${SCRIPT_DIRECTORY}remove_parameter_from_ctd.cmake
# LuciPhorAdapter
COMMAND ${CMAKE_COMMAND} -D SCRIPT_DIR=${SCRIPT_DIRECTORY} -DTOOLNAME=LuciphorAdapter -DPARAM=executable -D CTD_PATH=${CTD_PATH} -P ${SCRIPT_DIRECTORY}remove_parameter_from_ctd.cmake
# PercolatorAdapter
COMMAND ${CMAKE_COMMAND} -D SCRIPT_DIR=${SCRIPT_DIRECTORY} -DTOOLNAME=PercolatorAdapter -DPARAM=percolator_executable -D CTD_PATH=${CTD_PATH} -P ${SCRIPT_DIRECTORY}remove_parameter_from_ctd.cmake
# FidoAdapter
COMMAND ${CMAKE_COMMAND} -D SCRIPT_DIR=${SCRIPT_DIRECTORY} -DTOOLNAME=FidoAdapter -DPARAM=fido_executable -D CTD_PATH=${CTD_PATH} -P ${SCRIPT_DIRECTORY}remove_parameter_from_ctd.cmake
COMMAND ${CMAKE_COMMAND} -D SCRIPT_DIR=${SCRIPT_DIRECTORY} -DTOOLNAME=FidoAdapter -DPARAM=fidocp_executable -D CTD_PATH=${CTD_PATH} -P ${SCRIPT_DIRECTORY}remove_parameter_from_ctd.cmake
Expand Down Expand Up @@ -265,6 +267,8 @@ elseif(NOT EXISTS ${SEARCH_ENGINES_DIRECTORY}/Fido)
message(FATAL_ERROR "The given search engine directory seems to have an invalid layout (Fido is missing). ${FOLDER_STRUCTURE_MESSAGE}")
elseif(NOT EXISTS ${SEARCH_ENGINES_DIRECTORY}/LuciPHOr2)
message(FATAL_ERROR "The given search engine directory seems to have an invalid layout (LuciPHOr2 is missing). ${FOLDER_STRUCTURE_MESSAGE}")
elseif(NOT EXISTS ${SEARCH_ENGINES_DIRECTORY}/Percolator)
message(FATAL_ERROR "The given search engine directory seems to have an invalid layout (Percolator is missing). Please check use the one from the SVN.")
elseif(NOT APPLE AND NOT EXISTS ${SEARCH_ENGINES_DIRECTORY}/MyriMatch)
message(FATAL_ERROR "The given search engine directory seems to have an invalid layout (MyriMatch is missing). ${FOLDER_STRUCTURE_MESSAGE}")
endif()
Expand Down
1 change: 1 addition & 0 deletions doc/doxygen/public/TOPP.doxygen
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@
- @subpage TOPP_PeptideIndexer - Refreshes the protein references for all peptide hits.
- @subpage TOPP_PhosphoScoring - Scores potential phosphorylation sites in order to localize the most probable sites.
- @subpage TOPP_ProteinInference - Infer proteins from a list of (high-confidence) peptides.
- @subpage TOPP_PercolatorAdapter - Applies the percolator algorithm to protein/peptide identifications.

<b>Targeted Experiments</b>
- @subpage TOPP_InclusionExclusionListCreator - Creates inclusion and/or exclusion lists for LC-MS/MS experiments.
Expand Down
2 changes: 2 additions & 0 deletions doc/doxygen/public/UTILS.doxygen
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@
- @subpage UTILS_RNPxl - Tool for RNP cross linking experiment analysis.
- @subpage UTILS_SequenceCoverageCalculator - Prints information about idXML files.
- @subpage UTILS_SpecLibCreator - Creates an MSP-formatted spectral library.
- @subpage UTILS_PSMFeatureExtractor - Creates search engine specific features for PercolatorAdapter input.


<b>Quantitation</b>
- @subpage UTILS_ERPairFinder - Evaluate pair ratios on enhanced resolution (zoom) scans.
Expand Down
Binary file added src/.DS_Store
Binary file not shown.
Binary file added src/openms/.DS_Store
Binary file not shown.
203 changes: 203 additions & 0 deletions src/openms/include/OpenMS/ANALYSIS/ID/PercolatorFeatureSetHelper.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
// --------------------------------------------------------------------------
// OpenMS -- Open-Source Mass Spectrometry
// --------------------------------------------------------------------------
// Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
// ETH Zurich, and Freie Universitaet Berlin 2002-2015.
//
// This software is released under a three-clause BSD license:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of any author or any participating institution
// may be used to endorse or promote products derived from this software
// without specific prior written permission.
// For a full list of authors, refer to the file AUTHORS.
// --------------------------------------------------------------------------
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
// INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// --------------------------------------------------------------------------
// $Maintainer: Mathias Walzer $
// $Authors: Mathias Walzer, Matthew The $
// --------------------------------------------------------------------------

#ifndef OPENMS_ANALYSIS_ID_TOPPERC_H
#define OPENMS_ANALYSIS_ID_TOPPERC_H

#include <vector>
#include <iostream>
#include <cmath>
#include <string>
#include <map>
#include <algorithm>
#include <limits>

#include <OpenMS/CONCEPT/Types.h>
#include <OpenMS/KERNEL/StandardTypes.h>
#include <OpenMS/FORMAT/FileHandler.h>
#include <OpenMS/DATASTRUCTURES/DataValue.h>

#include <boost/lexical_cast.hpp>

namespace OpenMS
{
/**
@brief Percolator feature set and integration helper

This class contains functions to handle (compute, aggregate, integrate)
Percolator features. This includes the calculation or extraction of
Percolator features for the specific search engine usage, preparation for
PercolatorApater usage and result reintegration and in the case of
multiple search engine incorporation of different features.
*/

class OPENMS_DLLAPI PercolatorFeatureSetHelper
{

public:
/**
* @brief concatMULTISEPeptideIds
* @param all_peptide_ids PeptideIdentification vector to append to
* @param new_peptide_ids PeptideIdentification vector to be appended
* @param search_engine search engine to depend on for feature creation
*
* Appends a vector of PeptideIdentification to another and registers concatenation Percolator features depending on given search engine.
*/
static void concatMULTISEPeptideIds(std::vector<PeptideIdentification>& all_peptide_ids, std::vector<PeptideIdentification>& new_peptide_ids, String search_engine);

/**
* @brief mergeMULTISEPeptideIds
* @param all_peptide_ids PeptideIdentification vector to be merged into
* @param new_peptide_ids PeptideIdentification vector to merge
* @param search_engine search engine to create features from their scores
*
* Merges a vector of PeptideIdentification into another and registers merge Percolator features depending on given search engine.
*/
static void mergeMULTISEPeptideIds(std::vector<PeptideIdentification>& all_peptide_ids, std::vector<PeptideIdentification>& new_peptide_ids, String search_engine);

/**
* @brief mergeMULTISEProteinIds
* @param all_protein_ids ProteinIdentification vector to be merged into
* @param new_protein_ids ProteinIdentification vector to merge
*
* Concatenates SearchParameter of multiple search engine runs and merges PeptideEvidences, registers the created Percolator features
*/
static void mergeMULTISEProteinIds(std::vector<ProteinIdentification>& all_protein_ids, std::vector<ProteinIdentification>& new_protein_ids);


/**
* @brief addMSGFFeatures
* @param peptide_ids PeptideIdentification vector to create Percolator features in
* @param feature_set register of added features
*
* Creates and adds MSGF+ specific Percolator features and registers them in feature_set
*/
static void addMSGFFeatures(std::vector<PeptideIdentification>& peptide_ids, StringList& feature_set);

/**
* @brief addXTANDEMFeatures
* @param peptide_ids PeptideIdentification vector to create Percolator featrues in
* @param feature_set register of added features
*
* Creates and adds X!Tandem specific Percolator features and registers them in feature_set
*/
static void addXTANDEMFeatures(std::vector<PeptideIdentification>& peptide_ids, StringList& feature_set);

/**
* @brief addCOMETFeatures
* @param peptide_ids PeptideIdentification vector to create Percolator featrues in
* @param feature_set register of added features
*
* Creates and adds Comet specific Percolator features and registers them in feature_set
*/
static void addCOMETFeatures(std::vector<PeptideIdentification>& peptide_ids, StringList& feature_set);

/**
* @brief addMASCOTFeatures
* @param peptide_ids PeptideIdentification vector to create Percolator featrues in
* @param feature_set register of added features
*
* Creates and adds Mascot specific Percolator features and registers them in feature_set
*/
static void addMASCOTFeatures(std::vector<PeptideIdentification>& peptide_ids, StringList& feature_set);

/**
* @brief addMULTISEFeatures
* @param peptide_ids PeptideIdentification vector to create Percolator featrues in
* @param search_engines_used the list of search engines to be considered
* @param feature_set register of added features
* @param complete_only will only add features for PeptideIdentifications where all given search engines identified something
* @param limits_imputation
*
* Adds multiple search engine specific Percolator features and registers them in feature_set
*/
static void addMULTISEFeatures(std::vector<PeptideIdentification>& peptide_ids, StringList& search_engines_used, StringList& feature_set, bool complete_only = true, bool limits_imputation = false);

/**
* @brief addCONCATSEFeatures
* @param peptide_id_list PeptideIdentification vector to create Percolator featrues in
* @param search_engines_used the list of search engines to be considered
* @param feature_set register of added features
*
* Adds multiple search engine specific Percolator features and registers them in feature_set
*/
static void addCONCATSEFeatures(std::vector<PeptideIdentification>& peptide_id_list, StringList& search_engines_used, StringList& feature_set);

/**
* @brief checkExtraFeatures
* @param psms the vector of PeptideHit to be checked
* @param extra_features the list of requested extra features
*
* checks and removes requested extra Percolator features that are actually unavailable (to compute)
*/
static void checkExtraFeatures(const std::vector<PeptideHit> &psms, StringList& extra_features);


protected:
PercolatorFeatureSetHelper();
virtual ~PercolatorFeatureSetHelper();

/// Rescales the fragment features to penalize features calculated by few ions, adapted from MSGFtoPercolator
static double rescaleFragmentFeature_(double featureValue, int NumMatchedMainIons);

/// helper functin for assigning the frequently occurring feature delta score
static void assignDeltaScore_(std::vector<PeptideHit>& hits, String score_ref, String output_ref);

/// gets the scan identifer to merge by
static String getScanMergeKey_(std::vector<PeptideIdentification>::iterator it, std::vector<PeptideIdentification>::iterator start);

/// For accession dependent sorting of ProteinHits
struct lq_ProteinHit
{
inline bool operator() (const ProteinHit& h1, const ProteinHit& h2)
{
return (h1.getAccession() < h2.getAccession());
}
};

/// For accession dependent sorting of PeptideEvidences
struct lq_PeptideEvidence
{
inline bool operator() (const PeptideEvidence& h1, const PeptideEvidence& h2)
{
return (h1.getProteinAccession() < h2.getProteinAccession());
}
};

};

} //namespace OpenMS

#endif //OPENMS_ANALYSIS_ID_PERCOLATORFEATURESETHELPER_H

1 change: 1 addition & 0 deletions src/openms/include/OpenMS/ANALYSIS/ID/sources.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ MetaboliteSpectralMatching.h
PeptideProteinResolution.h
ProtonDistributionModel.h
PeptideIndexing.h
PercolatorFeatureSetHelper.h
)

### add path to the filenames
Expand Down
1 change: 1 addition & 0 deletions src/openms/include/OpenMS/FORMAT/FileTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ namespace OpenMS
MRM, ///< SpectraST MRM List
PSMS, ///< Percolator tab-delimited output (PSM level)
PARAMXML, ///< internal format for writing and reading parameters (also used as part of CTD)
PIN, ///< Percolator tab-delimited input (PSM level)
SIZE_OF_TYPE ///< No file type. Simply stores the number of types
};

Expand Down
Loading