diff --git a/pyprophet/export.py b/pyprophet/export.py index 094613e9..0fdb86fc 100644 --- a/pyprophet/export.py +++ b/pyprophet/export.py @@ -1,6 +1,8 @@ import pandas as pd import numpy as np import sqlite3 +import duckdb +from duckdb_extensions import extension_importer import click import os @@ -8,9 +10,12 @@ from .data_handling import write_scores_sql_command from .report import plot_scores +## ensure proper extension installed +extension_importer.import_extension("sqlite_scanner") def export_tsv(infile, outfile, format, outcsv, transition_quantification, max_transition_pep, ipf, ipf_max_peptidoform_pep, max_rs_peakgroup_qvalue, peptide, max_global_peptide_qvalue, protein, max_global_protein_qvalue): + condb = duckdb.connect(infile) con = sqlite3.connect(infile) # output for merged but not scored pyprophet input @@ -29,7 +34,7 @@ def export_tsv(infile, outfile, format, outcsv, transition_quantification, max_t score_sql = ", " + score_sql # add comma at the beginning to fit to statement score_sql = score_sql[:-2] # remove additional space and comma from the end of the string - data = pd.read_sql_query(""" + data = condb.sql(""" SELECT RUN.ID AS id_run, PEPTIDE.ID AS id_peptide, @@ -40,8 +45,8 @@ def export_tsv(infile, outfile, format, outcsv, transition_quantification, max_t FEATURE.EXP_RT AS RT, FEATURE.EXP_RT - FEATURE.DELTA_RT AS assay_rt, FEATURE.DELTA_RT AS delta_rt, - PRECURSOR.LIBRARY_RT AS assay_RT, - FEATURE.NORM_RT - PRECURSOR.LIBRARY_RT AS delta_RT, + PRECURSOR.LIBRARY_RT AS assay_iRT, + FEATURE.NORM_RT - PRECURSOR.LIBRARY_RT AS delta_iRT, FEATURE.ID AS id, PRECURSOR.CHARGE AS Charge, PRECURSOR.PRECURSOR_MZ AS mz, @@ -59,7 +64,7 @@ def export_tsv(infile, outfile, format, outcsv, transition_quantification, max_t LEFT JOIN FEATURE_MS1 ON FEATURE_MS1.FEATURE_ID = FEATURE.ID LEFT JOIN FEATURE_MS2 ON FEATURE_MS2.FEATURE_ID = FEATURE.ID ORDER BY transition_group_id - """ % score_sql, con) + """ % score_sql).df() else: @@ -292,11 +297,11 @@ def export_tsv(infile, outfile, format, outcsv, transition_quantification, max_t # Execute main SQLite query click.echo("Info: Reading peak group-level results.") con.executescript(idx_query) # Add indices - data = pd.read_sql_query(query, con) + data = condb.sql(query).df() # Augment OpenSWATH results with IPF scores if ipf_present and ipf=='augmented': - data_augmented = pd.read_sql_query(query_augmented, con) + data_augmented = condb.sql(query_augmented).df() data_augmented = data_augmented.groupby('id').apply(lambda x: pd.Series({'ipf_FullUniModPeptideName': ";".join(x[x['ipf_peptidoform_pep'] == np.min(x['ipf_peptidoform_pep'])]['ipf_FullUniModPeptideName']), 'ipf_precursor_peakgroup_pep': x[x['ipf_peptidoform_pep'] == np.min(x['ipf_peptidoform_pep'])]['ipf_precursor_peakgroup_pep'].values[0], 'ipf_peptidoform_pep': x[x['ipf_peptidoform_pep'] == np.min(x['ipf_peptidoform_pep'])]['ipf_peptidoform_pep'].values[0], 'ipf_peptidoform_m_score': x[x['ipf_peptidoform_pep'] == np.min(x['ipf_peptidoform_pep'])]['ipf_peptidoform_m_score'].values[0]})).reset_index(level='id') @@ -317,7 +322,7 @@ def export_tsv(infile, outfile, format, outcsv, transition_quantification, max_t SELECT FEATURE_TRANSITION.FEATURE_ID AS id, GROUP_CONCAT(AREA_INTENSITY,';') AS aggr_Peak_Area, GROUP_CONCAT(APEX_INTENSITY,';') AS aggr_Peak_Apex, - GROUP_CONCAT(TRANSITION.ID || "_" || TRANSITION.TYPE || TRANSITION.ORDINAL || "_" || TRANSITION.CHARGE,';') AS aggr_Fragment_Annotation + GROUP_CONCAT(TRANSITION.ID || '_' || TRANSITION.TYPE || TRANSITION.ORDINAL || '_' || TRANSITION.CHARGE,';') AS aggr_Fragment_Annotation FROM FEATURE_TRANSITION INNER JOIN TRANSITION ON FEATURE_TRANSITION.TRANSITION_ID = TRANSITION.ID INNER JOIN SCORE_TRANSITION ON FEATURE_TRANSITION.TRANSITION_ID = SCORE_TRANSITION.TRANSITION_ID AND FEATURE_TRANSITION.FEATURE_ID = SCORE_TRANSITION.FEATURE_ID @@ -335,14 +340,14 @@ def export_tsv(infile, outfile, format, outcsv, transition_quantification, max_t SELECT FEATURE_ID AS id, GROUP_CONCAT(AREA_INTENSITY,';') AS aggr_Peak_Area, GROUP_CONCAT(APEX_INTENSITY,';') AS aggr_Peak_Apex, - GROUP_CONCAT(TRANSITION.ID || "_" || TRANSITION.TYPE || TRANSITION.ORDINAL || "_" || TRANSITION.CHARGE,';') AS aggr_Fragment_Annotation + GROUP_CONCAT(TRANSITION.ID || '_' || TRANSITION.TYPE || TRANSITION.ORDINAL || '_' || TRANSITION.CHARGE,';') AS aggr_Fragment_Annotation FROM FEATURE_TRANSITION INNER JOIN TRANSITION ON FEATURE_TRANSITION.TRANSITION_ID = TRANSITION.ID GROUP BY FEATURE_ID ''' click.echo("Info: Reading transition-level results.") con.executescript(idx_transition_query) # Add indices - data_transition = pd.read_sql_query(transition_query, con) + data_transition = condb.sql(transition_query).df() data = pd.merge(data, data_transition, how='left', on=['id']) # Append concatenated protein identifier @@ -353,13 +358,13 @@ def export_tsv(infile, outfile, format, outcsv, transition_quantification, max_t CREATE INDEX IF NOT EXISTS idx_peptide_protein_mapping_peptide_id ON PEPTIDE_PROTEIN_MAPPING (PEPTIDE_ID); ''') - data_protein = pd.read_sql_query(''' + data_protein = condb.sql(''' SELECT PEPTIDE_ID AS id_peptide, GROUP_CONCAT(PROTEIN.PROTEIN_ACCESSION,';') AS ProteinName FROM PEPTIDE_PROTEIN_MAPPING INNER JOIN PROTEIN ON PEPTIDE_PROTEIN_MAPPING.PROTEIN_ID = PROTEIN.ID GROUP BY PEPTIDE_ID; - ''', con) + ''').df() data = pd.merge(data, data_protein, how='inner', on=['id_peptide']) # Append peptide error-rate control @@ -369,32 +374,32 @@ def export_tsv(infile, outfile, format, outcsv, transition_quantification, max_t if peptide_present and peptide: click.echo("Info: Reading peptide-level results.") - data_peptide_run = pd.read_sql_query(''' + data_peptide_run = condb.sql(''' SELECT RUN_ID AS id_run, PEPTIDE_ID AS id_peptide, QVALUE AS m_score_peptide_run_specific FROM SCORE_PEPTIDE WHERE CONTEXT == 'run-specific'; - ''', con) + ''').df() if len(data_peptide_run.index) > 0: data = pd.merge(data, data_peptide_run, how='inner', on=['id_run','id_peptide']) - data_peptide_experiment = pd.read_sql_query(''' + data_peptide_experiment = condb.sql(''' SELECT RUN_ID AS id_run, PEPTIDE_ID AS id_peptide, QVALUE AS m_score_peptide_experiment_wide FROM SCORE_PEPTIDE WHERE CONTEXT == 'experiment-wide'; - ''', con) + ''').df() if len(data_peptide_experiment.index) > 0: data = pd.merge(data, data_peptide_experiment, on=['id_run','id_peptide']) - data_peptide_global = pd.read_sql_query(''' + data_peptide_global = condb.sql(''' SELECT PEPTIDE_ID AS id_peptide, QVALUE AS m_score_peptide_global FROM SCORE_PEPTIDE WHERE CONTEXT == 'global'; - ''', con) + ''').df() if len(data_peptide_global.index) > 0: data = pd.merge(data, data_peptide_global[data_peptide_global['m_score_peptide_global'] < max_global_peptide_qvalue], on=['id_peptide']) @@ -411,7 +416,7 @@ def export_tsv(infile, outfile, format, outcsv, transition_quantification, max_t CREATE INDEX IF NOT EXISTS idx_score_protein_protein_id ON SCORE_PROTEIN (PROTEIN_ID); CREATE INDEX IF NOT EXISTS idx_score_protein_run_id ON SCORE_PROTEIN (RUN_ID); ''') - data_protein_run = pd.read_sql_query(''' + data_protein_run = condb.sql(''' SELECT RUN_ID AS id_run, PEPTIDE_ID AS id_peptide, MIN(QVALUE) AS m_score_protein_run_specific @@ -420,7 +425,7 @@ def export_tsv(infile, outfile, format, outcsv, transition_quantification, max_t WHERE CONTEXT == 'run-specific' GROUP BY RUN_ID, PEPTIDE_ID; - ''', con) + ''').df() if len(data_protein_run.index) > 0: data = pd.merge(data, data_protein_run, how='inner', on=['id_run','id_peptide']) @@ -430,7 +435,7 @@ def export_tsv(infile, outfile, format, outcsv, transition_quantification, max_t CREATE INDEX IF NOT EXISTS idx_score_protein_protein_id ON SCORE_PROTEIN (PROTEIN_ID); CREATE INDEX IF NOT EXISTS idx_score_protein_run_id ON SCORE_PROTEIN (RUN_ID); ''') - data_protein_experiment = pd.read_sql_query(''' + data_protein_experiment = condb.sql(''' SELECT RUN_ID AS id_run, PEPTIDE_ID AS id_peptide, MIN(QVALUE) AS m_score_protein_experiment_wide @@ -439,7 +444,7 @@ def export_tsv(infile, outfile, format, outcsv, transition_quantification, max_t WHERE CONTEXT == 'experiment-wide' GROUP BY RUN_ID, PEPTIDE_ID; - ''', con) + ''').df() if len(data_protein_experiment.index) > 0: data = pd.merge(data, data_protein_experiment, how='inner', on=['id_run','id_peptide']) @@ -448,14 +453,14 @@ def export_tsv(infile, outfile, format, outcsv, transition_quantification, max_t CREATE INDEX IF NOT EXISTS idx_peptide_protein_mapping_peptide_id ON PEPTIDE_PROTEIN_MAPPING (PEPTIDE_ID); CREATE INDEX IF NOT EXISTS idx_score_protein_protein_id ON SCORE_PROTEIN (PROTEIN_ID); ''') - data_protein_global = pd.read_sql_query(''' + data_protein_global = condb.sql(''' SELECT PEPTIDE_ID AS id_peptide, MIN(QVALUE) AS m_score_protein_global FROM PEPTIDE_PROTEIN_MAPPING INNER JOIN SCORE_PROTEIN ON PEPTIDE_PROTEIN_MAPPING.PROTEIN_ID = SCORE_PROTEIN.PROTEIN_ID WHERE CONTEXT == 'global' GROUP BY PEPTIDE_ID; - ''', con) + ''').df() if len(data_protein_global.index) > 0: data = pd.merge(data, data_protein_global[data_protein_global['m_score_protein_global'] < max_global_protein_qvalue], how='inner', on=['id_peptide']) @@ -478,14 +483,16 @@ def export_tsv(infile, outfile, format, outcsv, transition_quantification, max_t data.to_csv(outfile, sep=sep, index=True) con.close() + condb.close() def export_score_plots(infile): con = sqlite3.connect(infile) + condb = duckdb.connect(infile) if check_sqlite_table(con, "SCORE_MS2"): outfile = infile.split(".osw")[0] + "_ms2_score_plots.pdf" - table_ms2 = pd.read_sql_query(''' + table_ms2 = condb.sql(''' SELECT *, RUN_ID || '_' || PRECURSOR_ID AS GROUP_ID FROM FEATURE_MS2 @@ -512,12 +519,12 @@ def export_score_plots(infile): ORDER BY RUN_ID, PRECURSOR.ID ASC, FEATURE.EXP_RT ASC; -''', con) +''').df() plot_scores(table_ms2, outfile) if check_sqlite_table(con, "SCORE_MS1"): outfile = infile.split(".osw")[0] + "_ms1_score_plots.pdf" - table_ms1 = pd.read_sql_query(''' + table_ms1 = condb.sql(''' SELECT *, RUN_ID || '_' || PRECURSOR_ID AS GROUP_ID FROM FEATURE_MS1 @@ -537,12 +544,12 @@ def export_score_plots(infile): ORDER BY RUN_ID, PRECURSOR.ID ASC, FEATURE.EXP_RT ASC; -''', con) +''').df() plot_scores(table_ms1, outfile) if check_sqlite_table(con, "SCORE_TRANSITION"): outfile = infile.split(".osw")[0] + "_transition_score_plots.pdf" - table_transition = pd.read_sql_query(''' + table_transition = condb.sql(''' SELECT TRANSITION.DECOY AS DECOY, FEATURE_TRANSITION.*, PRECURSOR.CHARGE AS VAR_PRECURSOR_CHARGE, @@ -568,7 +575,8 @@ def export_score_plots(infile): PRECURSOR.ID, FEATURE.EXP_RT, TRANSITION.ID; -''', con) +''').df() plot_scores(table_transition, outfile) con.close() + condb.close() diff --git a/pyprophet/export_compound.py b/pyprophet/export_compound.py index 98e4b692..56ab122e 100644 --- a/pyprophet/export_compound.py +++ b/pyprophet/export_compound.py @@ -1,12 +1,16 @@ -import pandas as pd +import duckdb +from duckdb_extensions import extension_importer import sqlite3 from .data_handling import check_sqlite_table from .data_handling import write_scores_sql_command -from .report import plot_scores + +## ensure proper extension installed +extension_importer.import_extension("sqlite_scanner") def export_compound_tsv(infile, outfile, format, outcsv, max_rs_peakgroup_qvalue): con = sqlite3.connect(infile) + condb = duckdb.connect(infile) # output for merged but not scored pyprophet input @@ -25,7 +29,7 @@ def export_compound_tsv(infile, outfile, format, outcsv, max_rs_peakgroup_qvalue score_sql = ", " + score_sql # add comma at the beginning to fit to statement score_sql = score_sql[:-2] # remove additional space and comma from the end of the string - data = pd.read_sql_query(""" + data = condb.sql(""" SELECT RUN.ID AS id_run, COMPOUND.ID AS id_compound, @@ -36,8 +40,8 @@ def export_compound_tsv(infile, outfile, format, outcsv, max_rs_peakgroup_qvalue FEATURE.EXP_RT AS RT, FEATURE.EXP_RT - FEATURE.DELTA_RT AS assay_rt, FEATURE.DELTA_RT AS delta_rt, - PRECURSOR.LIBRARY_RT AS assay_RT, - FEATURE.NORM_RT - PRECURSOR.LIBRARY_RT AS delta_RT, + PRECURSOR.LIBRARY_RT AS assay_iRT, + FEATURE.NORM_RT - PRECURSOR.LIBRARY_RT AS delta_iRT, FEATURE.ID AS id, COMPOUND.SUM_FORMULA AS sum_formula, COMPOUND.COMPOUND_NAME AS compound_name, @@ -58,10 +62,10 @@ def export_compound_tsv(infile, outfile, format, outcsv, max_rs_peakgroup_qvalue LEFT JOIN FEATURE_MS1 ON FEATURE_MS1.FEATURE_ID = FEATURE.ID LEFT JOIN FEATURE_MS2 ON FEATURE_MS2.FEATURE_ID = FEATURE.ID ORDER BY transition_group_id - """ % score_sql, con) + """ % score_sql).df() elif check_sqlite_table(con, "SCORE_MS1"): # MS1 scoring performend - data = pd.read_sql_query(""" + data = condb.sql(""" SELECT RUN.ID AS id_run, COMPOUND.ID AS id_compound, @@ -72,8 +76,8 @@ def export_compound_tsv(infile, outfile, format, outcsv, max_rs_peakgroup_qvalue FEATURE.EXP_RT AS RT, FEATURE.EXP_RT - FEATURE.DELTA_RT AS assay_rt, FEATURE.DELTA_RT AS delta_rt, - PRECURSOR.LIBRARY_RT AS assay_RT, - FEATURE.NORM_RT - PRECURSOR.LIBRARY_RT AS delta_RT, + PRECURSOR.LIBRARY_RT AS assay_iRT, + FEATURE.NORM_RT - PRECURSOR.LIBRARY_RT AS delta_iRT, FEATURE.ID AS id, COMPOUND.SUM_FORMULA AS sum_formula, COMPOUND.COMPOUND_NAME AS compound_name, @@ -99,9 +103,9 @@ def export_compound_tsv(infile, outfile, format, outcsv, max_rs_peakgroup_qvalue WHERE SCORE_MS1.QVALUE < %s ORDER BY transition_group_id, peak_group_rank; - """ % max_rs_peakgroup_qvalue, con) + """ % max_rs_peakgroup_qvalue).df() else: # MS2 or MS1MS2 scoring performend - data = pd.read_sql_query(""" + data = condb.sql(""" SELECT RUN.ID AS id_run, COMPOUND.ID AS id_compound, @@ -112,8 +116,8 @@ def export_compound_tsv(infile, outfile, format, outcsv, max_rs_peakgroup_qvalue FEATURE.EXP_RT AS RT, FEATURE.EXP_RT - FEATURE.DELTA_RT AS assay_rt, FEATURE.DELTA_RT AS delta_rt, - PRECURSOR.LIBRARY_RT AS assay_RT, - FEATURE.NORM_RT - PRECURSOR.LIBRARY_RT AS delta_RT, + PRECURSOR.LIBRARY_RT AS assay_iRT, + FEATURE.NORM_RT - PRECURSOR.LIBRARY_RT AS delta_iRT, FEATURE.ID AS id, COMPOUND.SUM_FORMULA AS sum_formula, COMPOUND.COMPOUND_NAME AS compound_name, @@ -139,9 +143,10 @@ def export_compound_tsv(infile, outfile, format, outcsv, max_rs_peakgroup_qvalue WHERE SCORE_MS2.QVALUE < %s ORDER BY transition_group_id, peak_group_rank; - """ % max_rs_peakgroup_qvalue, con) + """ % max_rs_peakgroup_qvalue).df() con.close() + condb.close() if outcsv: sep = "," diff --git a/pyprophet/export_parquet.py b/pyprophet/export_parquet.py index 1f89fa54..cd1afd71 100644 --- a/pyprophet/export_parquet.py +++ b/pyprophet/export_parquet.py @@ -5,6 +5,9 @@ from duckdb_extensions import extension_importer import re +## ensure proper extension installed +extension_importer.import_extension("sqlite_scanner") + def getPeptideProteinScoreTable(conndb, level): if level == 'peptide': id = 'PEPTIDE_ID' @@ -45,7 +48,6 @@ def export_to_parquet(infile, outfile, transitionLevel=False, onlyFeatures=False Return: None ''' - extension_importer.import_extension("sqlite_scanner") condb = duckdb.connect(infile) con = sqlite3.connect(infile) diff --git a/pyprophet/filter.py b/pyprophet/filter.py index ba443292..8f1990fc 100644 --- a/pyprophet/filter.py +++ b/pyprophet/filter.py @@ -1,3 +1,5 @@ +import duckdb +from duckdb_extensions import extension_importer import pandas as pd import numpy as np import sqlite3 @@ -7,6 +9,9 @@ from .data_handling import check_sqlite_table +## ensure proper extension installed +extension_importer.import_extension("sqlite_scanner") + # Filter a sqMass chromatogram file by given input labels def filter_chrom_by_labels(infile, outfile, labels): if len(labels) == 0: @@ -94,6 +99,7 @@ def get_ids_stmt(keep_ids): def filter_sqmass(sqmassfiles, infile=None, max_precursor_pep=0.7, max_peakgroup_pep=0.7, max_transition_pep=0.7, keep_naked_peptides=[], remove_decoys=True): if infile is not None: con = sqlite3.connect(infile) + condb = duckdb.connect(infile) if remove_decoys: decoy_query = " AND DECOY=0" @@ -107,7 +113,7 @@ def filter_sqmass(sqmassfiles, infile=None, max_precursor_pep=0.7, max_peakgroup if infile is not None: if check_sqlite_table(con, 'SCORE_MS1') and check_sqlite_table(con, 'SCORE_MS2') and check_sqlite_table(con, 'SCORE_TRANSITION'): - transitions = pd.read_sql_query(''' + transitions = condb.sql(''' SELECT TRANSITION_ID AS transition_id FROM PRECURSOR INNER JOIN FEATURE ON PRECURSOR.ID = FEATURE.PRECURSOR_ID @@ -120,10 +126,10 @@ def filter_sqmass(sqmassfiles, infile=None, max_precursor_pep=0.7, max_peakgroup AND SCORE_TRANSITION.PEP <= {2} AND FILENAME LIKE '%{3}%' {4}; - '''.format(max_precursor_pep, max_peakgroup_pep, max_transition_pep, sqm_in.split(".sqMass")[0], decoy_query), con)['transition_id'].values + '''.format(max_precursor_pep, max_peakgroup_pep, max_transition_pep, sqm_in.split(".sqMass")[0], decoy_query)).df()['transition_id'].values elif check_sqlite_table(con, 'SCORE_MS1') and check_sqlite_table(con, 'SCORE_MS2') and not check_sqlite_table(con, 'SCORE_TRANSITION'): - transitions = pd.read_sql_query(''' + transitions = condb.sql(''' SELECT TRANSITION_ID AS transition_id FROM PRECURSOR INNER JOIN FEATURE ON PRECURSOR.ID = FEATURE.PRECURSOR_ID @@ -135,10 +141,10 @@ def filter_sqmass(sqmassfiles, infile=None, max_precursor_pep=0.7, max_peakgroup AND SCORE_MS2.PEP <= {1} AND FILENAME LIKE '%{2}%' {3}; - '''.format(max_precursor_pep, max_peakgroup_pep, sqm_in.split(".sqMass")[0], decoy_query), con)['transition_id'].values + '''.format(max_precursor_pep, max_peakgroup_pep, sqm_in.split(".sqMass")[0], decoy_query)).df()['transition_id'].values elif not check_sqlite_table(con, 'SCORE_MS1') and check_sqlite_table(con, 'SCORE_MS2') and not check_sqlite_table(con, 'SCORE_TRANSITION'): - transitions = pd.read_sql_query(''' + transitions = condb.sql(''' SELECT TRANSITION_ID AS transition_id FROM PRECURSOR INNER JOIN FEATURE ON PRECURSOR.ID = FEATURE.PRECURSOR_ID @@ -148,22 +154,23 @@ def filter_sqmass(sqmassfiles, infile=None, max_precursor_pep=0.7, max_peakgroup WHERE SCORE_MS2.PEP <= {0} AND FILENAME LIKE '%{1}%' {2}; - '''.format(max_peakgroup_pep, sqm_in.split(".sqMass")[0], decoy_query), con)['transition_id'].values + '''.format(max_peakgroup_pep, sqm_in.split(".sqMass")[0], decoy_query)).df()['transition_id'].values else: raise click.ClickException("Conduct scoring on MS1, MS2 and/or transition-level before filtering.") elif len(keep_naked_peptides) != 0: - con = sqlite3.connect(sqm_in) - transitions = pd.read_sql_query(f''' + condb = duckdb.connect(sqm_in) + transitions = condb.sql(f''' SELECT NATIVE_ID FROM CHROMATOGRAM INNER JOIN PRECURSOR ON PRECURSOR.CHROMATOGRAM_ID = CHROMATOGRAM.ID - WHERE PRECURSOR.PEPTIDE_SEQUENCE IN ('{"','".join(keep_naked_peptides)}') ''', con)['NATIVE_ID'].values - con.close() + WHERE PRECURSOR.PEPTIDE_SEQUENCE IN ('{"','".join(keep_naked_peptides)}') ''').df()['NATIVE_ID'].values + condb.close() else: raise click.ClickException("Please provide either an associated OSW file to filter based on scoring or a list of peptides to keep.") filter_chrom_by_labels(sqm_in, sqm_out, transitions) + con.close() def filter_osw(oswfiles, remove_decoys=True, omit_tables=[], max_gene_fdr=None, max_protein_fdr=None, max_peptide_fdr=None, max_ms2_fdr=None, keep_naked_peptides=[], run_ids=[]): diff --git a/pyprophet/ipf.py b/pyprophet/ipf.py index abbdcacb..638fb164 100644 --- a/pyprophet/ipf.py +++ b/pyprophet/ipf.py @@ -1,14 +1,18 @@ +import duckdb +from duckdb_extensions import extension_importer import pandas as pd import numpy as np -import scipy as sp import sqlite3 -import sys import click from scipy.stats import rankdata from .data_handling import check_sqlite_table from shutil import copyfile +## ensure proper extension installed +extension_importer.import_extension("sqlite_scanner") + + def compute_model_fdr(data_in): data = np.asarray(data_in) @@ -27,6 +31,7 @@ def compute_model_fdr(data_in): return fdr +# NOTE: For testing purposes cannot use duckdb as order in sql statements is not guaranteed def read_pyp_peakgroup_precursor(path, ipf_max_peakgroup_pep, ipf_ms1_scoring, ipf_ms2_scoring): click.echo("Info: Reading precursor-level data.") # precursors are restricted according to ipf_max_peakgroup_pep to exclude very poor peak groups @@ -47,7 +52,7 @@ def read_pyp_peakgroup_precursor(path, ipf_max_peakgroup_pep, ipf_ms1_scoring, i CREATE INDEX IF NOT EXISTS idx_score_transition_transition_id ON SCORE_TRANSITION (TRANSITION_ID); ''') - data = pd.read_sql_query(''' + data = pd.read_sql(''' SELECT FEATURE.ID AS FEATURE_ID, SCORE_MS2.PEP AS MS2_PEAKGROUP_PEP, NULL AS MS1_PRECURSOR_PEP, @@ -79,7 +84,7 @@ def read_pyp_peakgroup_precursor(path, ipf_max_peakgroup_pep, ipf_ms1_scoring, i CREATE INDEX IF NOT EXISTS idx_score_ms2_feature_id ON SCORE_MS2 (FEATURE_ID); ''') - data = pd.read_sql_query(''' + data = pd.read_sql(''' SELECT FEATURE.ID AS FEATURE_ID, SCORE_MS2.PEP AS MS2_PEAKGROUP_PEP, SCORE_MS1.PEP AS MS1_PRECURSOR_PEP, @@ -108,7 +113,7 @@ def read_pyp_peakgroup_precursor(path, ipf_max_peakgroup_pep, ipf_ms1_scoring, i CREATE INDEX IF NOT EXISTS idx_score_transition_transition_id ON SCORE_TRANSITION (TRANSITION_ID); ''') - data = pd.read_sql_query(''' + data = pd.read_sql(''' SELECT FEATURE.ID AS FEATURE_ID, SCORE_MS2.PEP AS MS2_PEAKGROUP_PEP, SCORE_MS1.PEP AS MS1_PRECURSOR_PEP, @@ -140,7 +145,7 @@ def read_pyp_peakgroup_precursor(path, ipf_max_peakgroup_pep, ipf_ms1_scoring, i CREATE INDEX IF NOT EXISTS idx_score_ms2_feature_id ON SCORE_MS2 (FEATURE_ID); ''') - data = pd.read_sql_query(''' + data = pd.read_sql(''' SELECT FEATURE.ID AS FEATURE_ID, SCORE_MS2.PEP AS MS2_PEAKGROUP_PEP, NULL AS MS1_PRECURSOR_PEP, @@ -154,7 +159,6 @@ def read_pyp_peakgroup_precursor(path, ipf_max_peakgroup_pep, ipf_ms1_scoring, i data.columns = [col.lower() for col in data.columns] con.close() - return data @@ -162,6 +166,7 @@ def read_pyp_transition(path, ipf_max_transition_pep, ipf_h0): click.echo("Info: Reading peptidoform-level data.") # only the evidence is restricted to ipf_max_transition_pep, the peptidoform-space is complete con = sqlite3.connect(path) + condb = duckdb.connect(path) con.executescript(''' CREATE INDEX IF NOT EXISTS idx_transition_peptide_mapping_transition_id ON TRANSITION_PEPTIDE_MAPPING (TRANSITION_ID); @@ -171,7 +176,7 @@ def read_pyp_transition(path, ipf_max_transition_pep, ipf_h0): ''') # transition-level evidence - evidence = pd.read_sql_query(''' + evidence = condb.sql(''' SELECT FEATURE_ID, TRANSITION_ID, PEP @@ -180,11 +185,11 @@ def read_pyp_transition(path, ipf_max_transition_pep, ipf_h0): WHERE TRANSITION.TYPE!='' AND TRANSITION.DECOY=0 AND PEP < %s; - ''' % ipf_max_transition_pep, con) + ''' % ipf_max_transition_pep).df() evidence.columns = [col.lower() for col in evidence.columns] # transition-level bitmask - bitmask = pd.read_sql_query(''' + bitmask = condb.sql(''' SELECT DISTINCT TRANSITION.ID AS TRANSITION_ID, PEPTIDE_ID, 1 AS BMASK @@ -193,11 +198,11 @@ def read_pyp_transition(path, ipf_max_transition_pep, ipf_h0): INNER JOIN TRANSITION_PEPTIDE_MAPPING ON TRANSITION.ID = TRANSITION_PEPTIDE_MAPPING.TRANSITION_ID WHERE TRANSITION.TYPE!='' AND TRANSITION.DECOY=0; -''', con) +''').df() bitmask.columns = [col.lower() for col in bitmask.columns] # potential peptidoforms per feature - num_peptidoforms = pd.read_sql_query(''' + num_peptidoforms = condb.sql(''' SELECT FEATURE_ID, COUNT(DISTINCT PEPTIDE_ID) AS NUM_PEPTIDOFORMS FROM SCORE_TRANSITION @@ -207,11 +212,11 @@ def read_pyp_transition(path, ipf_max_transition_pep, ipf_h0): AND TRANSITION.DECOY=0 GROUP BY FEATURE_ID ORDER BY FEATURE_ID; -''', con) +''').df() num_peptidoforms.columns = [col.lower() for col in num_peptidoforms.columns] # peptidoform space per feature - peptidoforms = pd.read_sql_query(''' + peptidoforms = condb.sql(''' SELECT DISTINCT FEATURE_ID, PEPTIDE_ID FROM SCORE_TRANSITION @@ -220,10 +225,11 @@ def read_pyp_transition(path, ipf_max_transition_pep, ipf_h0): WHERE TRANSITION.TYPE!='' AND TRANSITION.DECOY=0 ORDER BY FEATURE_ID; -''', con) +''').df() peptidoforms.columns = [col.lower() for col in peptidoforms.columns] con.close() + condb.close() # add h0 (peptide_id: -1) to peptidoform-space if necessary if ipf_h0: diff --git a/pyprophet/levels_contexts.py b/pyprophet/levels_contexts.py index edc36028..803e690e 100644 --- a/pyprophet/levels_contexts.py +++ b/pyprophet/levels_contexts.py @@ -1,15 +1,17 @@ -import sys import os import click import pandas as pd -import numpy as np import sqlite3 +import duckdb +from duckdb_extensions import extension_importer from .stats import error_statistics, lookup_values_from_error_table, final_err_table, summary_err_table from .report import save_report from shutil import copyfile from .data_handling import check_sqlite_table +## ensure proper extension installed +extension_importer.import_extension("sqlite_scanner") def statistics_report(data, outfile, context, analyte, parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, lfdr_truncate, lfdr_monotone, lfdr_transformation, lfdr_adj, lfdr_eps, color_palette): @@ -45,6 +47,7 @@ def statistics_report(data, outfile, context, analyte, parametric, pfdr, pi0_lam def infer_genes(infile, outfile, context, parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, lfdr_truncate, lfdr_monotone, lfdr_transformation, lfdr_adj, lfdr_eps, color_palette): con = sqlite3.connect(infile) + condb = duckdb.connect(infile) if not check_sqlite_table(con, "SCORE_MS2"): raise click.ClickException("Apply scoring to MS2-level data before running gene-level scoring.") @@ -69,7 +72,7 @@ def infer_genes(infile, outfile, context, parametric, pfdr, pi0_lambda, pi0_meth CREATE INDEX IF NOT EXISTS idx_score_ms2_feature_id ON SCORE_MS2 (FEATURE_ID); ''') - data = pd.read_sql_query(''' + data = condb.sql(''' SELECT %s AS RUN_ID, %s AS GROUP_ID, GENE.ID AS GENE_ID, @@ -131,6 +134,7 @@ def infer_genes(infile, outfile, context, parametric, pfdr, pi0_lambda, pi0_meth def infer_proteins(infile, outfile, context, parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, lfdr_truncate, lfdr_monotone, lfdr_transformation, lfdr_adj, lfdr_eps, color_palette): con = sqlite3.connect(infile) + condb = duckdb.connect(infile) if not check_sqlite_table(con, "SCORE_MS2"): raise click.ClickException("Apply scoring to MS2-level data before running protein-level scoring.") @@ -141,7 +145,7 @@ def infer_proteins(infile, outfile, context, parametric, pfdr, pi0_lambda, pi0_m group_id = 'PROTEIN.ID' else: run_id = 'RUN_ID' - group_id = 'RUN_ID || "_" || PROTEIN.ID' + group_id = "RUN_ID || '_' || PROTEIN.ID" con.executescript(''' CREATE INDEX IF NOT EXISTS idx_peptide_protein_mapping_protein_id ON PEPTIDE_PROTEIN_MAPPING (PROTEIN_ID); @@ -155,13 +159,13 @@ def infer_proteins(infile, outfile, context, parametric, pfdr, pi0_lambda, pi0_m CREATE INDEX IF NOT EXISTS idx_score_ms2_feature_id ON SCORE_MS2 (FEATURE_ID); ''') - data = pd.read_sql_query(''' + data = condb.sql(''' SELECT %s AS RUN_ID, %s AS GROUP_ID, PROTEIN.ID AS PROTEIN_ID, PRECURSOR.DECOY AS DECOY, - SCORE, - "%s" AS CONTEXT + MAX(SCORE), + '%s' AS CONTEXT FROM PROTEIN INNER JOIN (SELECT PEPTIDE_PROTEIN_MAPPING.PEPTIDE_ID AS PEPTIDE_ID, @@ -178,12 +182,12 @@ def infer_proteins(infile, outfile, context, parametric, pfdr, pi0_lambda, pi0_m INNER JOIN PRECURSOR ON PRECURSOR_PEPTIDE_MAPPING.PRECURSOR_ID = PRECURSOR.ID INNER JOIN FEATURE ON PRECURSOR.ID = FEATURE.PRECURSOR_ID INNER JOIN SCORE_MS2 ON FEATURE.ID = SCORE_MS2.FEATURE_ID -GROUP BY GROUP_ID -HAVING MAX(SCORE) -ORDER BY SCORE DESC -''' % (run_id, group_id, context), con) +GROUP BY GROUP_ID, RUN_ID, PROTEIN.ID, PRECURSOR.DECOY +ORDER BY MAX(SCORE) DESC +''' % (run_id, group_id, context)).df().rename(columns={"max(SCORE)": "score"}) else: raise click.ClickException("Unspecified context selected.") + condb.close() data.columns = [col.lower() for col in data.columns] con.close() diff --git a/pyprophet/runner.py b/pyprophet/runner.py index 5f0ef209..e9e32007 100644 --- a/pyprophet/runner.py +++ b/pyprophet/runner.py @@ -13,6 +13,8 @@ from .report import save_report from .data_handling import is_sqlite_file, check_sqlite_table from shutil import copyfile +import duckdb +from duckdb_extensions import extension_importer try: profile @@ -34,6 +36,8 @@ def read_tsv(infile): return(table) def read_osw(infile, level, ipf_max_peakgroup_rank, ipf_max_peakgroup_pep, ipf_max_transition_isotope_overlap, ipf_min_transition_sn): + extension_importer.import_extension("sqlite_scanner") + condb = duckdb.connect(infile) con = sqlite3.connect(infile) if level == "ms2" or level == "ms1ms2": @@ -47,7 +51,7 @@ def read_osw(infile, level, ipf_max_peakgroup_rank, ipf_max_peakgroup_pep, ipf_m CREATE INDEX IF NOT EXISTS idx_feature_ms2_feature_id ON FEATURE_MS2 (FEATURE_ID); ''') - table = pd.read_sql_query(''' + table = condb.sql(''' SELECT *, RUN_ID || '_' || PRECURSOR_ID AS GROUP_ID FROM FEATURE_MS2 @@ -72,7 +76,7 @@ def read_osw(infile, level, ipf_max_peakgroup_rank, ipf_max_peakgroup_pep, ipf_m ORDER BY RUN_ID, PRECURSOR.ID ASC, FEATURE.EXP_RT ASC; -''', con) +''').df() elif level == "ms1": if not check_sqlite_table(con, "FEATURE_MS1"): raise click.ClickException("MS1-level feature table not present in file.") @@ -84,7 +88,7 @@ def read_osw(infile, level, ipf_max_peakgroup_rank, ipf_max_peakgroup_pep, ipf_m CREATE INDEX IF NOT EXISTS idx_feature_ms1_feature_id ON FEATURE_MS1 (FEATURE_ID); ''') - table = pd.read_sql_query(''' + table = condb.sql(''' SELECT *, RUN_ID || '_' || PRECURSOR_ID AS GROUP_ID FROM FEATURE_MS1 @@ -102,7 +106,7 @@ def read_osw(infile, level, ipf_max_peakgroup_rank, ipf_max_peakgroup_pep, ipf_m ORDER BY RUN_ID, PRECURSOR.ID ASC, FEATURE.EXP_RT ASC; -''', con) +''').df() elif level == "transition": if not check_sqlite_table(con, "SCORE_MS2"): raise click.ClickException("Transition-level scoring for IPF requires prior MS2 or MS1MS2-level scoring. Please run 'pyprophet score --level=ms2' or 'pyprophet score --level=ms1ms2' on this file first.") @@ -119,7 +123,7 @@ def read_osw(infile, level, ipf_max_peakgroup_rank, ipf_max_peakgroup_pep, ipf_m CREATE INDEX IF NOT EXISTS idx_feature_transition_transition_id ON FEATURE_TRANSITION (TRANSITION_ID); ''') - table = pd.read_sql_query(''' + table = condb.sql(''' SELECT TRANSITION.DECOY AS DECOY, FEATURE_TRANSITION.*, PRECURSOR.CHARGE AS PRECURSOR_CHARGE, @@ -148,7 +152,7 @@ def read_osw(infile, level, ipf_max_peakgroup_rank, ipf_max_peakgroup_pep, ipf_m PRECURSOR.ID, FEATURE.EXP_RT, TRANSITION.ID; -''' % (ipf_max_peakgroup_rank, ipf_max_peakgroup_pep, ipf_max_transition_isotope_overlap, ipf_min_transition_sn), con) +''' % (ipf_max_peakgroup_rank, ipf_max_peakgroup_pep, ipf_max_transition_isotope_overlap, ipf_min_transition_sn)).df() else: raise click.ClickException("Unspecified data level selected.") @@ -192,6 +196,7 @@ def read_osw(infile, level, ipf_max_peakgroup_rank, ipf_max_peakgroup_pep, ipf_m table = table.rename(index=str, columns={'precursor_charge': 'var_precursor_charge', 'product_charge': 'var_product_charge', 'transition_count': 'var_transition_count'}) con.close() + condb.close() return(table) # Check for auto main score selection diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_compound_0.out b/tests/_regtest_outputs/test_pyprophet_export.test_compound_0.out index d0b12cb6..8b0ffa0d 100644 --- a/tests/_regtest_outputs/test_pyprophet_export.test_compound_0.out +++ b/tests/_regtest_outputs/test_pyprophet_export.test_compound_0.out @@ -1,14 +1,14 @@ - Adducts Charge Intensity RT aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_RT assay_rt compound_name d_score decoy delta_RT delta_rt filename id leftWidth m_score mz peak_group_rank rightWidth run_id sum_formula transition_group_id -0 M+Na+ 1 71301.300 475.327 90356.0 2.8526e+05 475.718 475.7176 Isoprothiolane 4.7040 0 -0.391 -0.3906 /Users/alka/Documents/work/projects/OpenSWATH_... 6732968656885689606 472.409 0.0081 313.0539 1 478.992 2408145804652532658 C12H18O4S2 3 -1 M+Na+ 1 19160.700 475.303 31671.0 9.9949e+04 475.718 475.7184 Isoprothiolane 3.5835 0 -0.415 -0.4154 /Users/alka/Documents/work/projects/OpenSWATH_... 6921299947704833207 472.408 0.0231 313.0539 1 479.814 1007350642398073598 C12H18O4S2 3 -2 M+Na+ 1 9308.450 475.253 17956.0 5.0582e+04 475.718 475.7180 Isoprothiolane 3.9997 0 -0.465 -0.4650 /Users/alka/Documents/work/projects/OpenSWATH_... 5388675400702073898 472.408 0.0200 313.0539 1 479.814 214379053049545951 C12H18O4S2 3 -3 M+Na+ 1 147.925 481.026 746.0 6.9487e+02 475.718 475.7176 Isoprothiolane 2.9949 0 5.308 5.3084 /Users/alka/Documents/work/projects/OpenSWATH_... 7508128245913369262 479.815 0.0442 313.0539 2 483.106 2408145804652532658 C12H18O4S2 3 -4 M+H+ 1 949.648 453.263 1705988.0 4.6946e+06 453.946 453.9459 Triadimefon_decoy 3.4093 1 -0.683 -0.6829 /Users/alka/Documents/work/projects/OpenSWATH_... 671489720159068151 451.012 0.0266 294.1004 1 461.710 3140099155998074833 C14H16ClN3O2 6 -.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... -95 M+H+ 1 1788.530 455.453 5767.0 1.6677e+04 455.082 455.0823 Boscalid 4.3525 0 0.371 0.3707 /Users/alka/Documents/work/projects/OpenSWATH_... 4911838227523744331 452.659 0.0110 343.0399 1 460.888 6344784885616328495 C18H12Cl2N2O 104 -96 M+Na+ 1 57212.200 502.251 97318.0 3.1908e+05 501.656 501.6563 Picoxystrobin 3.1734 0 0.595 0.5947 /Users/alka/Documents/work/projects/OpenSWATH_... 995120417245308422 499.563 0.0319 390.0924 1 506.969 3140099155998074833 C18H16F3NO4 108 -97 M+Na+ 1 28159.500 502.291 32319.0 8.4656e+04 501.656 501.6562 Picoxystrobin 4.2856 0 0.635 0.6348 /Users/alka/Documents/work/projects/OpenSWATH_... 6098343142262325573 499.564 0.0110 390.0924 1 505.324 2408145804652532658 C18H16F3NO4 108 -98 M+Na+ 1 11019.300 502.517 7266.0 2.0389e+04 501.656 501.6556 Picoxystrobin 3.0175 0 0.861 0.8614 /Users/alka/Documents/work/projects/OpenSWATH_... 3727023492293302868 499.564 0.0430 390.0924 1 506.147 1007350642398073598 C18H16F3NO4 108 -99 M+Na+ 1 5384.930 502.411 4580.0 1.1208e+04 501.656 501.6560 Picoxystrobin 3.3909 0 0.755 0.7550 /Users/alka/Documents/work/projects/OpenSWATH_... 7379182434544455482 499.564 0.0281 390.0924 1 505.324 214379053049545951 C18H16F3NO4 108 + Adducts Charge Intensity RT aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt compound_name d_score decoy delta_iRT delta_rt filename id leftWidth m_score mz peak_group_rank rightWidth run_id sum_formula transition_group_id +0 M+Na+ 1 71301.300 475.327 90356.0 2.8526e+05 475.718 475.7176 Isoprothiolane 4.7040 0 -0.391 -0.3906 /Users/alka/Documents/work/projects/OpenSWATH_... 6732968656885689606 472.409 0.0081 313.0539 1 478.992 2408145804652532658 C12H18O4S2 3 +1 M+Na+ 1 19160.700 475.303 31671.0 9.9949e+04 475.718 475.7184 Isoprothiolane 3.5835 0 -0.415 -0.4154 /Users/alka/Documents/work/projects/OpenSWATH_... 6921299947704833207 472.408 0.0231 313.0539 1 479.814 1007350642398073598 C12H18O4S2 3 +2 M+Na+ 1 9308.450 475.253 17956.0 5.0582e+04 475.718 475.7180 Isoprothiolane 3.9997 0 -0.465 -0.4650 /Users/alka/Documents/work/projects/OpenSWATH_... 5388675400702073898 472.408 0.0200 313.0539 1 479.814 214379053049545951 C12H18O4S2 3 +3 M+Na+ 1 147.925 481.026 746.0 6.9487e+02 475.718 475.7176 Isoprothiolane 2.9949 0 5.308 5.3084 /Users/alka/Documents/work/projects/OpenSWATH_... 7508128245913369262 479.815 0.0442 313.0539 2 483.106 2408145804652532658 C12H18O4S2 3 +4 M+H+ 1 949.648 453.263 1705988.0 4.6946e+06 453.946 453.9459 Triadimefon_decoy 3.4093 1 -0.683 -0.6829 /Users/alka/Documents/work/projects/OpenSWATH_... 671489720159068151 451.012 0.0266 294.1004 1 461.710 3140099155998074833 C14H16ClN3O2 6 +.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... +95 M+H+ 1 300818.000 455.202 802215.0 2.1763e+06 455.082 455.0819 Boscalid 5.2355 0 0.120 0.1201 /Users/alka/Documents/work/projects/OpenSWATH_... 1499846308261616358 452.658 0.0054 343.0399 1 460.887 3140099155998074833 C18H12Cl2N2O 104 +96 M+Na+ 1 57212.200 502.251 97318.0 3.1908e+05 501.656 501.6563 Picoxystrobin 3.1734 0 0.595 0.5947 /Users/alka/Documents/work/projects/OpenSWATH_... 995120417245308422 499.563 0.0319 390.0924 1 506.969 3140099155998074833 C18H16F3NO4 108 +97 M+Na+ 1 28159.500 502.291 32319.0 8.4656e+04 501.656 501.6562 Picoxystrobin 4.2856 0 0.635 0.6348 /Users/alka/Documents/work/projects/OpenSWATH_... 6098343142262325573 499.564 0.0110 390.0924 1 505.324 2408145804652532658 C18H16F3NO4 108 +98 M+Na+ 1 11019.300 502.517 7266.0 2.0389e+04 501.656 501.6556 Picoxystrobin 3.0175 0 0.861 0.8614 /Users/alka/Documents/work/projects/OpenSWATH_... 3727023492293302868 499.564 0.0430 390.0924 1 506.147 1007350642398073598 C18H16F3NO4 108 +99 M+Na+ 1 5384.930 502.411 4580.0 1.1208e+04 501.656 501.6560 Picoxystrobin 3.3909 0 0.755 0.7550 /Users/alka/Documents/work/projects/OpenSWATH_... 7379182434544455482 499.564 0.0281 390.0924 1 505.324 214379053049545951 C18H16F3NO4 108 [100 rows x 23 columns] diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_compound_1.out b/tests/_regtest_outputs/test_pyprophet_export.test_compound_1.out index 38f654d6..d53ef999 100644 --- a/tests/_regtest_outputs/test_pyprophet_export.test_compound_1.out +++ b/tests/_regtest_outputs/test_pyprophet_export.test_compound_1.out @@ -1,14 +1,14 @@ - Adducts Charge Intensity RT aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_RT assay_rt compound_name d_score decoy delta_RT delta_rt filename id leftWidth m_score mz peak_group_rank rightWidth run_id sum_formula transition_group_id -0 M+H+ 1 6225.5100 112.742 73086.0 314988.6293 117.983 117.9827 Methamidophos 2.9820 0 -5.241 -5.2406 /Users/alka/Documents/work/projects/OpenSWATH_... 6629051171722998253 108.687 0.0033 142.0086 1 121.853 3140099155998074833 C2H8NO2PS 0 -1 M+H+ 1 758.0860 117.364 15110.0 49705.2486 117.983 117.9831 Methamidophos 3.0012 0 -0.619 -0.6191 /Users/alka/Documents/work/projects/OpenSWATH_... 8877729345763666378 114.448 0.0033 142.0086 1 121.854 2408145804652532658 C2H8NO2PS 0 -2 M+H+ 1 198.6610 114.092 2967.0 12507.2948 117.983 117.9832 Methamidophos 1.9660 0 -3.891 -3.8912 /Users/alka/Documents/work/projects/OpenSWATH_... 85932679098247514 110.333 0.0223 142.0086 1 119.385 1007350642398073598 C2H8NO2PS 0 -3 M+H+ 1 165.0120 117.559 2930.0 7454.3224 117.983 117.9831 Methamidophos 2.4779 0 -0.424 -0.4241 /Users/alka/Documents/work/projects/OpenSWATH_... 8707546894788545451 115.270 0.0086 142.0086 1 120.208 214379053049545951 C2H8NO2PS 0 -4 M+H+ 1 31.2721 115.696 505.0 1301.6900 117.983 117.9831 Methamidophos 2.0155 0 -2.287 -2.2871 /Users/alka/Documents/work/projects/OpenSWATH_... 8419023093353902826 112.802 0.0203 142.0086 1 120.208 6344784885616328495 C2H8NO2PS 0 -.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... -95 M+H+ 1 133.2040 456.633 4737.0 13662.9190 456.737 456.7369 Molinate 1.6615 0 -0.104 -0.1039 /Users/alka/Documents/work/projects/OpenSWATH_... 6094669438926273156 453.482 0.0353 188.1104 1 460.065 6344784885616328495 C9H17NOS 25 -96 M+H+ 1 37115.3000 143.845 20587.0 65994.6621 146.160 146.1605 Acephate 2.8177 0 -2.315 -2.3155 /Users/alka/Documents/work/projects/OpenSWATH_... 1446287227778771923 140.780 0.0045 184.0192 1 149.832 3140099155998074833 C4H10NO3PS 27 -97 M+H+ 1 8140.1400 145.778 6678.0 18505.5670 146.160 146.1603 Acephate 2.8313 0 -0.382 -0.3823 /Users/alka/Documents/work/projects/OpenSWATH_... 726467501366503692 143.249 0.0045 184.0192 1 149.010 2408145804652532658 C4H10NO3PS 27 -98 M+H+ 1 1688.6600 144.121 1632.0 4300.0667 146.160 146.1601 Acephate 2.5718 0 -2.039 -2.0391 /Users/alka/Documents/work/projects/OpenSWATH_... 829341238503978341 141.603 0.0072 184.0192 1 147.363 1007350642398073598 C4H10NO3PS 27 -99 M+H+ 1 907.0830 145.815 738.0 1741.9189 146.160 146.1597 Acephate 1.8453 0 -0.345 -0.3447 /Users/alka/Documents/work/projects/OpenSWATH_... 4292801597472146447 143.249 0.0263 184.0192 1 149.009 214379053049545951 C4H10NO3PS 27 + Adducts Charge Intensity RT aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt compound_name d_score decoy delta_iRT delta_rt filename id leftWidth m_score mz peak_group_rank rightWidth run_id sum_formula transition_group_id +0 M+H+ 1 6225.5100 112.742 73086.0 314988.6293 117.983 117.9827 Methamidophos 2.9820 0 -5.241 -5.2406 /Users/alka/Documents/work/projects/OpenSWATH_... 6629051171722998253 108.687 0.0033 142.0086 1 121.853 3140099155998074833 C2H8NO2PS 0 +1 M+H+ 1 758.0860 117.364 15110.0 49705.2486 117.983 117.9831 Methamidophos 3.0012 0 -0.619 -0.6191 /Users/alka/Documents/work/projects/OpenSWATH_... 8877729345763666378 114.448 0.0033 142.0086 1 121.854 2408145804652532658 C2H8NO2PS 0 +2 M+H+ 1 198.6610 114.092 2967.0 12507.2948 117.983 117.9832 Methamidophos 1.9660 0 -3.891 -3.8912 /Users/alka/Documents/work/projects/OpenSWATH_... 85932679098247514 110.333 0.0223 142.0086 1 119.385 1007350642398073598 C2H8NO2PS 0 +3 M+H+ 1 165.0120 117.559 2930.0 7454.3224 117.983 117.9831 Methamidophos 2.4779 0 -0.424 -0.4241 /Users/alka/Documents/work/projects/OpenSWATH_... 8707546894788545451 115.270 0.0086 142.0086 1 120.208 214379053049545951 C2H8NO2PS 0 +4 M+H+ 1 31.2721 115.696 505.0 1301.6900 117.983 117.9831 Methamidophos 2.0155 0 -2.287 -2.2871 /Users/alka/Documents/work/projects/OpenSWATH_... 8419023093353902826 112.802 0.0203 142.0086 1 120.208 6344784885616328495 C2H8NO2PS 0 +.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... +95 M+H+ 1 133.2040 456.633 4737.0 13662.9190 456.737 456.7369 Molinate 1.6615 0 -0.104 -0.1039 /Users/alka/Documents/work/projects/OpenSWATH_... 6094669438926273156 453.482 0.0353 188.1104 1 460.065 6344784885616328495 C9H17NOS 25 +96 M+H+ 1 37115.3000 143.845 20587.0 65994.6621 146.160 146.1605 Acephate 2.8177 0 -2.315 -2.3155 /Users/alka/Documents/work/projects/OpenSWATH_... 1446287227778771923 140.780 0.0045 184.0192 1 149.832 3140099155998074833 C4H10NO3PS 27 +97 M+H+ 1 8140.1400 145.778 6678.0 18505.5670 146.160 146.1603 Acephate 2.8313 0 -0.382 -0.3823 /Users/alka/Documents/work/projects/OpenSWATH_... 726467501366503692 143.249 0.0045 184.0192 1 149.010 2408145804652532658 C4H10NO3PS 27 +98 M+H+ 1 1688.6600 144.121 1632.0 4300.0667 146.160 146.1601 Acephate 2.5718 0 -2.039 -2.0391 /Users/alka/Documents/work/projects/OpenSWATH_... 829341238503978341 141.603 0.0072 184.0192 1 147.363 1007350642398073598 C4H10NO3PS 27 +99 M+H+ 1 907.0830 145.815 738.0 1741.9189 146.160 146.1597 Acephate 1.8453 0 -0.345 -0.3447 /Users/alka/Documents/work/projects/OpenSWATH_... 4292801597472146447 143.249 0.0263 184.0192 1 149.009 214379053049545951 C4H10NO3PS 27 [100 rows x 23 columns] diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_compound_unscored.out b/tests/_regtest_outputs/test_pyprophet_export.test_compound_unscored.out index 42d43160..bc405802 100644 --- a/tests/_regtest_outputs/test_pyprophet_export.test_compound_unscored.out +++ b/tests/_regtest_outputs/test_pyprophet_export.test_compound_unscored.out @@ -1,14 +1,14 @@ - Adducts Charge Intensity RT aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_RT assay_rt compound_name decoy delta_RT delta_rt filename id leftWidth mz rightWidth run_id sum_formula transition_group_id var_ms1_im_ms1_delta_score var_ms1_isotope_correlation_score var_ms1_isotope_overlap_score var_ms1_massdev_score var_ms1_mi_combined_score var_ms1_mi_contrast_score var_ms1_mi_score var_ms1_xcorr_coelution var_ms1_xcorr_coelution_combined var_ms1_xcorr_coelution_contrast var_ms1_xcorr_shape var_ms1_xcorr_shape_combined var_ms1_xcorr_shape_contrast var_ms2_bseries_score var_ms2_dotprod_score var_ms2_elution_model_fit_score var_ms2_im_delta_score var_ms2_im_xcorr_coelution var_ms2_im_xcorr_shape var_ms2_intensity_score var_ms2_isotope_correlation_score var_ms2_isotope_overlap_score var_ms2_library_corr var_ms2_library_dotprod var_ms2_library_manhattan var_ms2_library_rmsd var_ms2_library_rootmeansquare var_ms2_library_sangle var_ms2_log_sn_score var_ms2_manhattan_score var_ms2_massdev_score var_ms2_massdev_score_weighted var_ms2_mi_ratio_score var_ms2_mi_score var_ms2_mi_weighted_score var_ms2_norm_rt_score var_ms2_sonar_lag var_ms2_sonar_log_diff var_ms2_sonar_log_sn var_ms2_sonar_log_trend var_ms2_sonar_rsq var_ms2_sonar_shape var_ms2_xcorr_coelution var_ms2_xcorr_coelution_weighted var_ms2_xcorr_shape var_ms2_xcorr_shape_weighted var_ms2_yseries_score -0 M+H+ 1 6225.5100 112.742 73086.0000 314988.6293 117.983 117.9827 Methamidophos 0 -5.241 -5.2406 /Users/alka/Documents/work/projects/OpenSWATH_... 6629051171722998253 108.687 142.0086 121.853 3140099155998074833 C2H8NO2PS 0 NaN 0.9999 0.0014 5.1551 NaN NaN NaN 0.0000 16.5692 18.6667 0.9377 0.4422 0.2955 0.0 0.4198 NaN NaN NaN NaN 0.8171 0.9986 0.0 -0.4959 0.4383 1.4720 0.5386 0.5975 1.2820 2.8562 1.4708 0.6429 0.3704 NaN NaN NaN 5.5165e-03 NaN NaN NaN NaN NaN NaN 19.8653 15.4097 0.1667 0.0369 0.0 -1 M+H+ 1 9.1700 123.499 457.0000 680.6210 117.983 117.9830 Methamidophos 0 5.516 5.5160 /Users/alka/Documents/work/projects/OpenSWATH_... 5754119136871178291 121.853 142.0086 125.145 3140099155998074833 C2H8NO2PS 0 NaN 0.0000 0.3990 2.1159 NaN NaN NaN 4.1909 4.7088 4.8749 0.4609 0.2621 0.1932 0.0 0.1824 NaN NaN NaN NaN 0.0012 0.5902 0.0 -0.4959 0.4383 1.4720 0.5386 0.5975 1.2820 0.0000 1.6955 3.1363 1.8072 NaN NaN NaN 5.8063e-03 NaN NaN NaN NaN NaN NaN 4.9663 3.8524 0.1667 0.0369 0.0 -2 M+H+ 1 758.0860 117.364 15110.0000 49705.2486 117.983 117.9831 Methamidophos 0 -0.619 -0.6191 /Users/alka/Documents/work/projects/OpenSWATH_... 8877729345763666378 114.448 142.0086 121.854 2408145804652532658 C2H8NO2PS 0 NaN 0.0000 0.0017 6.1092 NaN NaN NaN 1.3367 9.3543 10.3531 0.8274 0.3707 0.2022 0.0 0.4061 NaN NaN NaN NaN 0.7938 0.9959 0.0 -0.4959 0.4383 1.4720 0.5386 0.5975 1.2820 3.6594 1.4708 1.7058 0.9830 NaN NaN NaN 6.5173e-04 NaN NaN NaN NaN NaN NaN 11.1742 8.6680 0.1667 0.0369 0.0 -3 M+H+ 1 198.6610 114.092 2967.0000 12507.2948 117.983 117.9832 Methamidophos 0 -3.891 -3.8912 /Users/alka/Documents/work/projects/OpenSWATH_... 85932679098247514 110.333 142.0086 119.385 1007350642398073598 C2H8NO2PS 0 NaN 0.9908 0.0090 9.8699 NaN NaN NaN 11.5249 12.9492 13.4061 0.4405 0.2372 0.1487 0.0 0.2990 NaN NaN NaN NaN 0.7852 0.8089 0.0 -0.4959 0.4383 1.4720 0.5386 0.5975 1.2820 2.5903 1.6034 3.4600 1.9938 NaN NaN NaN 4.0960e-03 NaN NaN NaN NaN NaN NaN 13.6574 10.5942 0.1667 0.0369 0.0 -4 M+H+ 1 165.0120 117.559 2930.0000 7454.3224 117.983 117.9831 Methamidophos 0 -0.424 -0.4241 /Users/alka/Documents/work/projects/OpenSWATH_... 8707546894788545451 115.270 142.0086 120.208 214379053049545951 C2H8NO2PS 0 NaN 0.0000 0.0103 1.2196 NaN NaN NaN 7.3341 8.2174 8.4327 0.4717 0.2578 0.1760 0.0 0.3276 NaN NaN NaN NaN 0.7501 0.9725 0.0 -0.4959 0.4383 1.4720 0.5386 0.5975 1.2820 3.3673 1.6283 2.0336 1.1719 NaN NaN NaN 4.4643e-04 NaN NaN NaN NaN NaN NaN 8.6911 6.7417 0.1667 0.0369 0.0 -.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... -95 M+Na+ 1 6.1690 461.710 62.0000 50.9950 475.718 475.7176 Isoprothiolane_decoy 1 -14.008 -14.0076 /Users/alka/Documents/work/projects/OpenSWATH_... 1964030463263715205 460.065 313.0539 463.356 6344784885616328495 C12H18O4S2 4 NaN 0.0000 0.0000 159.7169 NaN NaN NaN 3.7247 3.5925 3.4444 0.1667 0.1201 0.0579 0.0 0.4601 NaN NaN NaN NaN 0.0290 -0.4941 0.0 -0.5000 0.3184 1.5837 0.5991 0.6960 1.4453 0.0000 1.3431 6.0865 14.5563 NaN NaN NaN 1.4745e-02 NaN NaN NaN NaN NaN NaN 3.7247 2.9692 0.1667 0.0103 0.0 -96 M+Na+ 1 9.8760 472.007 0.0000 0.0000 475.718 475.7183 Isoprothiolane_decoy 1 -3.711 -3.7113 /Users/alka/Documents/work/projects/OpenSWATH_... 2859267736951343070 471.061 313.0539 472.707 5621422513449725403 C12H18O4S2 4 NaN -0.3997 0.0000 29.1235 NaN NaN NaN 2.0000 2.3868 2.0000 0.0000 0.1190 0.0000 0.0 0.5684 NaN NaN NaN NaN 0.0606 -0.3470 0.0 -0.9897 0.4499 1.1673 0.5315 0.5647 1.3934 1.4663 1.0597 13.0114 25.3056 NaN NaN NaN 3.9066e-03 NaN NaN NaN NaN NaN NaN 2.1499 1.9383 0.4167 0.0308 0.0 -97 M+Na+ 1 21.3980 468.593 39.4751 51.0260 475.718 475.7180 Isoprothiolane_decoy 1 -7.125 -7.1250 /Users/alka/Documents/work/projects/OpenSWATH_... 7551648122635593597 466.124 313.0539 471.061 5621422513449725403 C12H18O4S2 4 NaN 0.9682 0.6667 56.4466 NaN NaN NaN 8.6911 8.5100 8.5556 0.1667 0.1175 0.0519 0.0 0.5610 NaN NaN NaN NaN 0.1313 0.5428 0.0 1.0000 0.8929 0.8327 0.1352 0.1434 0.1780 0.0000 1.0697 13.8422 17.2104 NaN NaN NaN 7.5000e-03 NaN NaN NaN NaN NaN NaN 8.6911 2.5514 0.1667 0.6355 0.0 -98 M+Na+ 1 9.0475 486.697 0.0000 0.0000 475.718 475.7184 Isoprothiolane_decoy 1 10.979 10.9786 /Users/alka/Documents/work/projects/OpenSWATH_... 1499988340730146690 485.051 313.0539 488.342 5621422513449725403 C12H18O4S2 4 NaN 0.0000 0.0000 159.7169 NaN NaN NaN 5.0000 5.8530 5.0000 0.0000 0.0476 0.0000 0.0 0.4831 NaN NaN NaN NaN 0.0555 -0.4200 0.0 1.0000 0.8929 0.8327 0.1352 0.1434 0.1780 1.2993 1.1348 24.0979 12.6527 NaN NaN NaN 1.1556e-02 NaN NaN NaN NaN NaN NaN 6.2079 1.8224 0.1667 0.6355 0.0 -99 M+Na+ 1 44.4405 475.691 3081.0000 8395.4228 475.718 475.7179 Isoprothiolane_decoy 1 -0.027 -0.0269 /Users/alka/Documents/work/projects/OpenSWATH_... 1898773645914670705 473.231 313.0539 478.991 5621422513449725403 C12H18O4S2 4 NaN 0.2104 0.0000 0.3133 NaN NaN NaN 0.0000 5.3096 5.9138 0.8544 0.5499 0.3979 0.0 0.5071 NaN NaN NaN NaN 0.2726 -0.2917 0.0 0.9245 0.9266 0.4163 0.1176 0.1252 0.2708 0.0000 1.0414 58.9369 65.1163 NaN NaN NaN 2.8275e-05 NaN NaN NaN NaN NaN NaN 7.3364 1.5093 0.4733 0.7816 0.0 + Adducts Charge Intensity RT aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt compound_name decoy delta_iRT delta_rt filename id leftWidth mz rightWidth run_id sum_formula transition_group_id var_ms1_im_ms1_delta_score var_ms1_isotope_correlation_score var_ms1_isotope_overlap_score var_ms1_massdev_score var_ms1_mi_combined_score var_ms1_mi_contrast_score var_ms1_mi_score var_ms1_xcorr_coelution var_ms1_xcorr_coelution_combined var_ms1_xcorr_coelution_contrast var_ms1_xcorr_shape var_ms1_xcorr_shape_combined var_ms1_xcorr_shape_contrast var_ms2_bseries_score var_ms2_dotprod_score var_ms2_elution_model_fit_score var_ms2_im_delta_score var_ms2_im_xcorr_coelution var_ms2_im_xcorr_shape var_ms2_intensity_score var_ms2_isotope_correlation_score var_ms2_isotope_overlap_score var_ms2_library_corr var_ms2_library_dotprod var_ms2_library_manhattan var_ms2_library_rmsd var_ms2_library_rootmeansquare var_ms2_library_sangle var_ms2_log_sn_score var_ms2_manhattan_score var_ms2_massdev_score var_ms2_massdev_score_weighted var_ms2_mi_ratio_score var_ms2_mi_score var_ms2_mi_weighted_score var_ms2_norm_rt_score var_ms2_sonar_lag var_ms2_sonar_log_diff var_ms2_sonar_log_sn var_ms2_sonar_log_trend var_ms2_sonar_rsq var_ms2_sonar_shape var_ms2_xcorr_coelution var_ms2_xcorr_coelution_weighted var_ms2_xcorr_shape var_ms2_xcorr_shape_weighted var_ms2_yseries_score +0 M+H+ 1 6225.5100 112.742 73086.0000 314988.6293 117.983 117.9827 Methamidophos 0 -5.241 -5.2406 /Users/alka/Documents/work/projects/OpenSWATH_... 6629051171722998253 108.687 142.0086 121.853 3140099155998074833 C2H8NO2PS 0 NaN 0.9999 0.0014 5.1551 NaN NaN NaN 0.0000 16.5692 18.6667 0.9377 0.4422 0.2955 0.0 0.4198 NaN NaN NaN NaN 0.8171 0.9986 0.0 -0.4959 0.4383 1.4720 0.5386 0.5975 1.2820 2.8562 1.4708 0.6429 0.3704 NaN NaN NaN 5.5165e-03 NaN NaN NaN NaN NaN NaN 19.8653 15.4097 0.1667 0.0369 0.0 +1 M+H+ 1 9.1700 123.499 457.0000 680.6210 117.983 117.9830 Methamidophos 0 5.516 5.5160 /Users/alka/Documents/work/projects/OpenSWATH_... 5754119136871178291 121.853 142.0086 125.145 3140099155998074833 C2H8NO2PS 0 NaN 0.0000 0.3990 2.1159 NaN NaN NaN 4.1909 4.7088 4.8749 0.4609 0.2621 0.1932 0.0 0.1824 NaN NaN NaN NaN 0.0012 0.5902 0.0 -0.4959 0.4383 1.4720 0.5386 0.5975 1.2820 0.0000 1.6955 3.1363 1.8072 NaN NaN NaN 5.8063e-03 NaN NaN NaN NaN NaN NaN 4.9663 3.8524 0.1667 0.0369 0.0 +2 M+H+ 1 758.0860 117.364 15110.0000 49705.2486 117.983 117.9831 Methamidophos 0 -0.619 -0.6191 /Users/alka/Documents/work/projects/OpenSWATH_... 8877729345763666378 114.448 142.0086 121.854 2408145804652532658 C2H8NO2PS 0 NaN 0.0000 0.0017 6.1092 NaN NaN NaN 1.3367 9.3543 10.3531 0.8274 0.3707 0.2022 0.0 0.4061 NaN NaN NaN NaN 0.7938 0.9959 0.0 -0.4959 0.4383 1.4720 0.5386 0.5975 1.2820 3.6594 1.4708 1.7058 0.9830 NaN NaN NaN 6.5173e-04 NaN NaN NaN NaN NaN NaN 11.1742 8.6680 0.1667 0.0369 0.0 +3 M+H+ 1 198.6610 114.092 2967.0000 12507.2948 117.983 117.9832 Methamidophos 0 -3.891 -3.8912 /Users/alka/Documents/work/projects/OpenSWATH_... 85932679098247514 110.333 142.0086 119.385 1007350642398073598 C2H8NO2PS 0 NaN 0.9908 0.0090 9.8699 NaN NaN NaN 11.5249 12.9492 13.4061 0.4405 0.2372 0.1487 0.0 0.2990 NaN NaN NaN NaN 0.7852 0.8089 0.0 -0.4959 0.4383 1.4720 0.5386 0.5975 1.2820 2.5903 1.6034 3.4600 1.9938 NaN NaN NaN 4.0960e-03 NaN NaN NaN NaN NaN NaN 13.6574 10.5942 0.1667 0.0369 0.0 +4 M+H+ 1 165.0120 117.559 2930.0000 7454.3224 117.983 117.9831 Methamidophos 0 -0.424 -0.4241 /Users/alka/Documents/work/projects/OpenSWATH_... 8707546894788545451 115.270 142.0086 120.208 214379053049545951 C2H8NO2PS 0 NaN 0.0000 0.0103 1.2196 NaN NaN NaN 7.3341 8.2174 8.4327 0.4717 0.2578 0.1760 0.0 0.3276 NaN NaN NaN NaN 0.7501 0.9725 0.0 -0.4959 0.4383 1.4720 0.5386 0.5975 1.2820 3.3673 1.6283 2.0336 1.1719 NaN NaN NaN 4.4643e-04 NaN NaN NaN NaN NaN NaN 8.6911 6.7417 0.1667 0.0369 0.0 +.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... +95 M+Na+ 1 6.1690 461.710 62.0000 50.9950 475.718 475.7176 Isoprothiolane_decoy 1 -14.008 -14.0076 /Users/alka/Documents/work/projects/OpenSWATH_... 1964030463263715205 460.065 313.0539 463.356 6344784885616328495 C12H18O4S2 4 NaN 0.0000 0.0000 159.7169 NaN NaN NaN 3.7247 3.5925 3.4444 0.1667 0.1201 0.0579 0.0 0.4601 NaN NaN NaN NaN 0.0290 -0.4941 0.0 -0.5000 0.3184 1.5837 0.5991 0.6960 1.4453 0.0000 1.3431 6.0865 14.5563 NaN NaN NaN 1.4745e-02 NaN NaN NaN NaN NaN NaN 3.7247 2.9692 0.1667 0.0103 0.0 +96 M+Na+ 1 9.8760 472.007 0.0000 0.0000 475.718 475.7183 Isoprothiolane_decoy 1 -3.711 -3.7113 /Users/alka/Documents/work/projects/OpenSWATH_... 2859267736951343070 471.061 313.0539 472.707 5621422513449725403 C12H18O4S2 4 NaN -0.3997 0.0000 29.1235 NaN NaN NaN 2.0000 2.3868 2.0000 0.0000 0.1190 0.0000 0.0 0.5684 NaN NaN NaN NaN 0.0606 -0.3470 0.0 -0.9897 0.4499 1.1673 0.5315 0.5647 1.3934 1.4663 1.0597 13.0114 25.3056 NaN NaN NaN 3.9066e-03 NaN NaN NaN NaN NaN NaN 2.1499 1.9383 0.4167 0.0308 0.0 +97 M+Na+ 1 21.3980 468.593 39.4751 51.0260 475.718 475.7180 Isoprothiolane_decoy 1 -7.125 -7.1250 /Users/alka/Documents/work/projects/OpenSWATH_... 7551648122635593597 466.124 313.0539 471.061 5621422513449725403 C12H18O4S2 4 NaN 0.9682 0.6667 56.4466 NaN NaN NaN 8.6911 8.5100 8.5556 0.1667 0.1175 0.0519 0.0 0.5610 NaN NaN NaN NaN 0.1313 0.5428 0.0 1.0000 0.8929 0.8327 0.1352 0.1434 0.1780 0.0000 1.0697 13.8422 17.2104 NaN NaN NaN 7.5000e-03 NaN NaN NaN NaN NaN NaN 8.6911 2.5514 0.1667 0.6355 0.0 +98 M+Na+ 1 9.0475 486.697 0.0000 0.0000 475.718 475.7184 Isoprothiolane_decoy 1 10.979 10.9786 /Users/alka/Documents/work/projects/OpenSWATH_... 1499988340730146690 485.051 313.0539 488.342 5621422513449725403 C12H18O4S2 4 NaN 0.0000 0.0000 159.7169 NaN NaN NaN 5.0000 5.8530 5.0000 0.0000 0.0476 0.0000 0.0 0.4831 NaN NaN NaN NaN 0.0555 -0.4200 0.0 1.0000 0.8929 0.8327 0.1352 0.1434 0.1780 1.2993 1.1348 24.0979 12.6527 NaN NaN NaN 1.1556e-02 NaN NaN NaN NaN NaN NaN 6.2079 1.8224 0.1667 0.6355 0.0 +99 M+Na+ 1 44.4405 475.691 3081.0000 8395.4228 475.718 475.7179 Isoprothiolane_decoy 1 -0.027 -0.0269 /Users/alka/Documents/work/projects/OpenSWATH_... 1898773645914670705 473.231 313.0539 478.991 5621422513449725403 C12H18O4S2 4 NaN 0.2104 0.0000 0.3133 NaN NaN NaN 0.0000 5.3096 5.9138 0.8544 0.5499 0.3979 0.0 0.5071 NaN NaN NaN NaN 0.2726 -0.2917 0.0 0.9245 0.9266 0.4163 0.1176 0.1252 0.2708 0.0000 1.0414 58.9369 65.1163 NaN NaN NaN 2.8275e-05 NaN NaN NaN NaN NaN NaN 7.3364 1.5093 0.4733 0.7816 0.0 [100 rows x 67 columns] diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_osw_unscored.out b/tests/_regtest_outputs/test_pyprophet_export.test_osw_unscored.out index 30555765..c37f5846 100644 --- a/tests/_regtest_outputs/test_pyprophet_export.test_osw_unscored.out +++ b/tests/_regtest_outputs/test_pyprophet_export.test_osw_unscored.out @@ -1,14 +1,14 @@ - Charge Intensity RT aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_RT assay_rt decoy delta_RT delta_rt filename id leftWidth mz rightWidth run_id transition_group_id var_ms1_isotope_correlation_score var_ms1_isotope_overlap_score var_ms1_massdev_score var_ms1_xcorr_coelution var_ms1_xcorr_shape var_ms2_bseries_score var_ms2_dotprod_score var_ms2_elution_model_fit_score var_ms2_intensity_score var_ms2_isotope_correlation_score var_ms2_isotope_overlap_score var_ms2_library_corr var_ms2_library_dotprod var_ms2_library_manhattan var_ms2_library_rmsd var_ms2_library_rootmeansquare var_ms2_library_sangle var_ms2_log_sn_score var_ms2_manhattan_score var_ms2_massdev_score var_ms2_massdev_score_weighted var_ms2_norm_rt_score var_ms2_sonar_lag var_ms2_sonar_log_diff var_ms2_sonar_log_sn var_ms2_sonar_log_trend var_ms2_sonar_rsq var_ms2_sonar_shape var_ms2_xcorr_coelution var_ms2_xcorr_coelution_weighted var_ms2_xcorr_shape var_ms2_xcorr_shape_weighted var_ms2_yseries_score -0 2 207283.0 2661.55 117220.7482 854645.0 26.5 2595.5788 0 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz -4409520928686189639 2640.5100 728.8795 2705.3701 -8670811102654834151 0 0.9835 0.1247 1.3707 0.0000 0.9907 9.0 0.7708 NaN 0.7811 0.9962 0.0000 0.9987 0.9978 0.0659 0.0239 0.0262 0.0725 4.7388 0.7451 0.3398 0.1793 0.0194 NaN NaN NaN NaN NaN NaN 0.0000 0.0000 0.9936 0.9958 11.0 -1 2 6385.0 2605.74 8790.7812 104006.0 26.5 2595.5733 0 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 260819276075322832 2575.6399 728.8795 2623.4399 -8670811102654834151 0 0.9555 0.2667 5.4202 5.1430 0.6532 2.0 0.7610 NaN 0.0241 0.9216 0.1104 0.8271 0.9764 0.2223 0.0995 0.1102 0.3579 1.3130 0.7675 4.5391 3.5103 0.0032 NaN NaN NaN NaN NaN NaN 7.0474 2.3104 0.7806 0.8341 6.0 -2 2 5180.0 2832.77 10419.7435 241873.0 26.5 2595.5778 0 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 5163914660633416481 2811.2000 728.8795 2855.5801 -8670811102654834151 0 0.6123 0.4707 8.9907 4.0083 0.5985 2.0 0.7923 NaN 0.0195 0.8418 0.0911 0.9916 0.9960 0.0958 0.0387 0.0426 0.1243 0.6699 0.6863 4.7328 2.9948 0.0690 NaN NaN NaN NaN NaN NaN 4.3568 2.0950 0.6909 0.6974 6.0 -3 2 2693.0 2795.06 4036.5600 25862.3 26.5 2595.5754 0 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 6932937885234622359 2790.7200 728.8795 2811.2000 -8670811102654834151 0 0.1872 2.4435 1.8505 4.0083 0.6422 4.0 0.7883 NaN 0.0101 0.6804 0.1794 0.4554 0.9481 0.3084 0.1494 0.1882 0.6202 0.6284 0.6986 5.4811 3.8885 0.0581 NaN NaN NaN NaN NaN NaN 1.6487 0.9186 0.7955 0.7971 6.0 -4 2 3838.0 2708.53 5750.4716 73215.2 26.5 2595.5750 0 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 8534214264242363560 2705.3701 728.8795 2736.0901 -8670811102654834151 0 -0.3692 0.7498 7.1610 6.7500 0.4827 3.0 0.8181 NaN 0.0145 0.7660 0.1334 0.8344 0.9736 0.2367 0.1055 0.1166 0.3772 0.6034 0.6468 2.5636 1.1471 0.0330 NaN NaN NaN NaN NaN NaN 3.4656 0.9347 0.6790 0.7379 5.0 -.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... -95 2 10959.0 2538.15 10810.2698 56553.9 16.3 2245.2307 0 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz -5430403952310232561 2520.5100 612.3184 2544.4099 -8670811102654834151 19 0.7016 0.5650 8.3246 1.8165 0.5954 2.0 0.6942 NaN 0.0121 0.4590 0.6792 -0.1524 0.9216 0.3502 0.1629 0.1915 0.6979 0.6856 0.7628 4.0046 4.4541 0.0856 NaN NaN NaN NaN NaN NaN 1.4718 0.6660 0.7586 0.6855 6.0 -96 2 20746.0 2120.97 10568.0806 85676.6 16.3 2245.2373 0 -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz -610141049182829192 2100.6001 612.3184 2141.5701 -8670811102654834151 19 0.5016 0.9312 12.3692 1.8165 0.6723 4.0 0.7176 NaN 0.0230 0.6551 0.4036 0.5606 0.9754 0.2021 0.0921 0.1010 0.3719 0.7732 0.7428 4.7941 2.9011 0.0354 NaN NaN NaN NaN NaN NaN 2.5491 1.1289 0.8015 0.7928 5.0 -97 2 48058.0 2291.53 6480.8607 75465.1 16.3 2245.2345 0 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 2043199813358518344 2267.8799 612.3184 2298.6101 -8670811102654834151 19 0.7397 3.0841 3.4962 5.3116 0.4407 5.0 0.6916 NaN 0.0533 0.7474 0.0866 -0.3525 0.8138 0.5478 0.2791 0.3394 1.0605 1.2194 0.8070 2.2562 2.3997 0.0141 NaN NaN NaN NaN NaN NaN 4.4101 1.9926 0.7236 0.7141 7.0 -98 2 16553.0 2317.38 9656.7598 89588.0 16.3 2245.2345 0 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 6262215160571261022 2302.0200 612.3184 2332.7400 -8670811102654834151 19 0.4212 0.8821 2.1071 7.7080 0.4296 4.0 0.7153 NaN 0.0183 0.0829 0.2014 -0.0107 0.9622 0.2794 0.1337 0.1370 0.5120 0.2548 0.7596 1.9435 1.6447 0.0216 NaN NaN NaN NaN NaN NaN 0.7830 0.1896 0.7600 0.7459 7.0 -99 2 597887.0 2230.18 269150.5777 1192530.0 16.3 2245.2318 0 -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 6870255268859409918 2213.2600 612.3184 2247.3999 -8670811102654834151 19 0.9939 0.1300 2.0203 0.0000 0.9990 8.0 0.7684 NaN 0.6626 0.9928 0.0000 0.8658 0.9884 0.1569 0.0719 0.0725 0.2613 4.7826 0.7659 1.6355 1.5429 0.0037 NaN NaN NaN NaN NaN NaN 0.0000 0.0000 0.9992 0.9993 8.0 + Charge Intensity RT aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt decoy delta_iRT delta_rt filename id leftWidth mz rightWidth run_id transition_group_id var_ms1_isotope_correlation_score var_ms1_isotope_overlap_score var_ms1_massdev_score var_ms1_xcorr_coelution var_ms1_xcorr_shape var_ms2_bseries_score var_ms2_dotprod_score var_ms2_elution_model_fit_score var_ms2_intensity_score var_ms2_isotope_correlation_score var_ms2_isotope_overlap_score var_ms2_library_corr var_ms2_library_dotprod var_ms2_library_manhattan var_ms2_library_rmsd var_ms2_library_rootmeansquare var_ms2_library_sangle var_ms2_log_sn_score var_ms2_manhattan_score var_ms2_massdev_score var_ms2_massdev_score_weighted var_ms2_norm_rt_score var_ms2_sonar_lag var_ms2_sonar_log_diff var_ms2_sonar_log_sn var_ms2_sonar_log_trend var_ms2_sonar_rsq var_ms2_sonar_shape var_ms2_xcorr_coelution var_ms2_xcorr_coelution_weighted var_ms2_xcorr_shape var_ms2_xcorr_shape_weighted var_ms2_yseries_score +0 2 207283.0 2661.55 117220.7482 854645.0 26.5 2595.5788 0 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz -4409520928686189639 2640.5100 728.8795 2705.3701 -8670811102654834151 0 0.9835 0.1247 1.3707 0.0000 0.9907 9.0 0.7708 NaN 0.7811 0.9962 0.0000 0.9987 0.9978 0.0659 0.0239 0.0262 0.0725 4.7388 0.7451 0.3398 0.1793 0.0194 NaN NaN NaN NaN NaN NaN 0.0000 0.0000 0.9936 0.9958 11.0 +1 2 5180.0 2832.77 10419.7435 241873.0 26.5 2595.5778 0 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 5163914660633416481 2811.2000 728.8795 2855.5801 -8670811102654834151 0 0.6123 0.4707 8.9907 4.0083 0.5985 2.0 0.7923 NaN 0.0195 0.8418 0.0911 0.9916 0.9960 0.0958 0.0387 0.0426 0.1243 0.6699 0.6863 4.7328 2.9948 0.0690 NaN NaN NaN NaN NaN NaN 4.3568 2.0950 0.6909 0.6974 6.0 +2 2 2693.0 2795.06 4036.5600 25862.3 26.5 2595.5754 0 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 6932937885234622359 2790.7200 728.8795 2811.2000 -8670811102654834151 0 0.1872 2.4435 1.8505 4.0083 0.6422 4.0 0.7883 NaN 0.0101 0.6804 0.1794 0.4554 0.9481 0.3084 0.1494 0.1882 0.6202 0.6284 0.6986 5.4811 3.8885 0.0581 NaN NaN NaN NaN NaN NaN 1.6487 0.9186 0.7955 0.7971 6.0 +3 2 6385.0 2605.74 8790.7812 104006.0 26.5 2595.5733 0 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 260819276075322832 2575.6399 728.8795 2623.4399 -8670811102654834151 0 0.9555 0.2667 5.4202 5.1430 0.6532 2.0 0.7610 NaN 0.0241 0.9216 0.1104 0.8271 0.9764 0.2223 0.0995 0.1102 0.3579 1.3130 0.7675 4.5391 3.5103 0.0032 NaN NaN NaN NaN NaN NaN 7.0474 2.3104 0.7806 0.8341 6.0 +4 2 3838.0 2708.53 5750.4716 73215.2 26.5 2595.5750 0 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 8534214264242363560 2705.3701 728.8795 2736.0901 -8670811102654834151 0 -0.3692 0.7498 7.1610 6.7500 0.4827 3.0 0.8181 NaN 0.0145 0.7660 0.1334 0.8344 0.9736 0.2367 0.1055 0.1166 0.3772 0.6034 0.6468 2.5636 1.1471 0.0330 NaN NaN NaN NaN NaN NaN 3.4656 0.9347 0.6790 0.7379 5.0 +.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... +95 2 597887.0 2230.18 269150.5777 1192530.0 16.3 2245.2318 0 -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 6870255268859409918 2213.2600 612.3184 2247.3999 -8670811102654834151 19 0.9939 0.1300 2.0203 0.0000 0.9990 8.0 0.7684 NaN 0.6626 0.9928 0.0000 0.8658 0.9884 0.1569 0.0719 0.0725 0.2613 4.7826 0.7659 1.6355 1.5429 0.0037 NaN NaN NaN NaN NaN NaN 0.0000 0.0000 0.9992 0.9993 8.0 +96 2 48058.0 2291.53 6480.8607 75465.1 16.3 2245.2345 0 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 2043199813358518344 2267.8799 612.3184 2298.6101 -8670811102654834151 19 0.7397 3.0841 3.4962 5.3116 0.4407 5.0 0.6916 NaN 0.0533 0.7474 0.0866 -0.3525 0.8138 0.5478 0.2791 0.3394 1.0605 1.2194 0.8070 2.2562 2.3997 0.0141 NaN NaN NaN NaN NaN NaN 4.4101 1.9926 0.7236 0.7141 7.0 +97 2 16553.0 2317.38 9656.7598 89588.0 16.3 2245.2345 0 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 6262215160571261022 2302.0200 612.3184 2332.7400 -8670811102654834151 19 0.4212 0.8821 2.1071 7.7080 0.4296 4.0 0.7153 NaN 0.0183 0.0829 0.2014 -0.0107 0.9622 0.2794 0.1337 0.1370 0.5120 0.2548 0.7596 1.9435 1.6447 0.0216 NaN NaN NaN NaN NaN NaN 0.7830 0.1896 0.7600 0.7459 7.0 +98 2 20746.0 2120.97 10568.0806 85676.6 16.3 2245.2373 0 -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz -610141049182829192 2100.6001 612.3184 2141.5701 -8670811102654834151 19 0.5016 0.9312 12.3692 1.8165 0.6723 4.0 0.7176 NaN 0.0230 0.6551 0.4036 0.5606 0.9754 0.2021 0.0921 0.1010 0.3719 0.7732 0.7428 4.7941 2.9011 0.0354 NaN NaN NaN NaN NaN NaN 2.5491 1.1289 0.8015 0.7928 5.0 +99 2 10959.0 2538.15 10810.2698 56553.9 16.3 2245.2307 0 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz -5430403952310232561 2520.5100 612.3184 2544.4099 -8670811102654834151 19 0.7016 0.5650 8.3246 1.8165 0.5954 2.0 0.6942 NaN 0.0121 0.4590 0.6792 -0.1524 0.9216 0.3502 0.1629 0.1915 0.6979 0.6856 0.7628 4.0046 4.4541 0.0856 NaN NaN NaN NaN NaN NaN 1.4718 0.6660 0.7586 0.6855 6.0 [100 rows x 50 columns]