11import os
2- import pickle
3- from shutil import copyfile
4- import sqlite3
5- from typing import Literal , Tuple
62import re
3+ import sqlite3
4+ from typing import Tuple
5+
6+ import click
77import duckdb
8- import pandas as pd
98import numpy as np
10- import click
9+ import pandas as pd
1110from loguru import logger
11+
12+ from ..._config import ExportIOConfig
13+ from .._base import BaseOSWReader , BaseOSWWriter
1214from ..util import (
1315 check_sqlite_table ,
14- check_duckdb_table ,
15- unimod_to_codename ,
16- write_scores_sql_command ,
17- load_sqlite_scanner ,
1816 get_table_columns ,
1917 get_table_columns_with_types ,
18+ load_sqlite_scanner ,
19+ unimod_to_codename ,
20+ write_scores_sql_command ,
2021)
21- from .._base import BaseOSWReader , BaseOSWWriter
22- from ..._config import ExportIOConfig
2322
2423
2524class OSWReader (BaseOSWReader ):
@@ -167,10 +166,67 @@ def _check_alignment_presence(self, con):
167166 con , "SCORE_ALIGNMENT"
168167 )
169168
169+ def _has_im_boundaries (self , con ) -> bool :
170+ """Return True if the FEATURE table contains IM boundary columns.
171+
172+ Older OSW files may not have these columns; this helper centralises the
173+ PRAGMA check so callers don't duplicate the logic.
174+ """
175+ try :
176+ cols = [
177+ r [1 ] for r in con .execute ("PRAGMA table_info('FEATURE')" ).fetchall ()
178+ ]
179+ except Exception :
180+ return False
181+ return "EXP_IM_LEFTWIDTH" in cols and "EXP_IM_RIGHTWIDTH" in cols
182+
183+ def _has_im (self , con ) -> bool :
184+ """Return True if the FEATURE table contains the EXP_IM column.
185+
186+ Older OSW files may not have this column; centralise the PRAGMA
187+ check so callers don't duplicate the logic.
188+ """
189+ try :
190+ cols = [
191+ r [1 ] for r in con .execute ("PRAGMA table_info('FEATURE')" ).fetchall ()
192+ ]
193+ except Exception :
194+ return False
195+ return "EXP_IM" in cols
196+
170197 def _read_unscored_data (self , con ):
171198 """Read data from unscored files."""
172199 score_sql = self ._build_score_sql (con )
173200
201+ # IM columns may or may not be present; centralised checks
202+ has_im_boundaries = self ._has_im_boundaries (con )
203+ has_im = self ._has_im (con )
204+
205+ # Compose EXP_IM (or NULL) plus IM boundary columns (or NULLs)
206+ im_cols_sql = (
207+ (
208+ """FEATURE.EXP_IM AS EXP_IM,
209+ FEATURE.EXP_IM_LEFTWIDTH AS IM_leftWidth,
210+ FEATURE.EXP_IM_RIGHTWIDTH AS IM_rightWidth"""
211+ )
212+ if has_im and has_im_boundaries
213+ else (
214+ """FEATURE.EXP_IM AS EXP_IM,
215+ NULL AS IM_leftWidth,
216+ NULL AS IM_rightWidth"""
217+ )
218+ if has_im and not has_im_boundaries
219+ else (
220+ """NULL AS EXP_IM,
221+ FEATURE.EXP_IM_LEFTWIDTH AS IM_leftWidth,
222+ FEATURE.EXP_IM_RIGHTWIDTH AS IM_rightWidth"""
223+ )
224+ if (not has_im ) and has_im_boundaries
225+ else """NULL AS EXP_IM,
226+ NULL AS IM_leftWidth,
227+ NULL AS IM_rightWidth"""
228+ )
229+
174230 query = f"""
175231 SELECT
176232 RUN.ID AS id_run,
@@ -191,7 +247,8 @@ def _read_unscored_data(self, con):
191247 FEATURE_MS1.AREA_INTENSITY AS aggr_prec_Peak_Area,
192248 FEATURE_MS1.APEX_INTENSITY AS aggr_prec_Peak_Apex,
193249 FEATURE.LEFT_WIDTH AS leftWidth,
194- FEATURE.RIGHT_WIDTH AS rightWidth
250+ FEATURE.RIGHT_WIDTH AS rightWidth,
251+ { im_cols_sql }
195252 { score_sql }
196253 FROM PRECURSOR
197254 INNER JOIN PRECURSOR_PEPTIDE_MAPPING ON PRECURSOR.ID = PRECURSOR_PEPTIDE_MAPPING.PRECURSOR_ID
@@ -224,6 +281,34 @@ def _read_peptidoform_data(self, con, cfg):
224281 """Read data with peptidoform IPF information."""
225282 score_ms1_pep , link_ms1 = self ._get_ms1_score_info (con )
226283
284+ # IM columns may or may not be present; centralised checks
285+ has_im_boundaries = self ._has_im_boundaries (con )
286+ has_im = self ._has_im (con )
287+
288+ im_cols_sql = (
289+ (
290+ """FEATURE.EXP_IM AS EXP_IM,
291+ FEATURE.EXP_IM_LEFTWIDTH AS IM_leftWidth,
292+ FEATURE.EXP_IM_RIGHTWIDTH AS IM_rightWidth,"""
293+ )
294+ if has_im and has_im_boundaries
295+ else (
296+ """FEATURE.EXP_IM AS EXP_IM,
297+ NULL AS IM_leftWidth,
298+ NULL AS IM_rightWidth,"""
299+ )
300+ if has_im and not has_im_boundaries
301+ else (
302+ """NULL AS EXP_IM,
303+ FEATURE.EXP_IM_LEFTWIDTH AS IM_leftWidth,
304+ FEATURE.EXP_IM_RIGHTWIDTH AS IM_rightWidth,"""
305+ )
306+ if (not has_im ) and has_im_boundaries
307+ else """NULL AS EXP_IM,
308+ NULL AS IM_leftWidth,
309+ NULL AS IM_rightWidth,"""
310+ )
311+
227312 query = f"""
228313 SELECT RUN.ID AS id_run,
229314 PEPTIDE.ID AS id_peptide,
@@ -247,6 +332,7 @@ def _read_peptidoform_data(self, con, cfg):
247332 FEATURE_MS1.APEX_INTENSITY AS aggr_prec_Peak_Apex,
248333 FEATURE.LEFT_WIDTH AS leftWidth,
249334 FEATURE.RIGHT_WIDTH AS rightWidth,
335+ { im_cols_sql }
250336 { score_ms1_pep } AS ms1_pep,
251337 SCORE_MS2.PEP AS ms2_pep,
252338 SCORE_IPF.PRECURSOR_PEAKGROUP_PEP AS precursor_pep,
@@ -275,6 +361,34 @@ def _read_augmented_data(self, con, cfg):
275361 """Read standard data augmented with IPF information."""
276362 score_ms1_pep , link_ms1 = self ._get_ms1_score_info (con )
277363
364+ # IM columns may or may not be present; centralised checks
365+ has_im_boundaries = self ._has_im_boundaries (con )
366+ has_im = self ._has_im (con )
367+
368+ im_cols_sql = (
369+ (
370+ """FEATURE.EXP_IM AS EXP_IM,
371+ FEATURE.EXP_IM_LEFTWIDTH AS IM_leftWidth,
372+ FEATURE.EXP_IM_RIGHTWIDTH AS IM_rightWidth,"""
373+ )
374+ if has_im and has_im_boundaries
375+ else (
376+ """FEATURE.EXP_IM AS EXP_IM,
377+ NULL AS IM_leftWidth,
378+ NULL AS IM_rightWidth,"""
379+ )
380+ if has_im and not has_im_boundaries
381+ else (
382+ """NULL AS EXP_IM,
383+ FEATURE.EXP_IM_LEFTWIDTH AS IM_leftWidth,
384+ FEATURE.EXP_IM_RIGHTWIDTH AS IM_rightWidth,"""
385+ )
386+ if (not has_im ) and has_im_boundaries
387+ else """NULL AS EXP_IM,
388+ NULL AS IM_leftWidth,
389+ NULL AS IM_rightWidth,"""
390+ )
391+
278392 query = f"""
279393 SELECT RUN.ID AS id_run,
280394 PEPTIDE.ID AS id_peptide,
@@ -298,6 +412,7 @@ def _read_augmented_data(self, con, cfg):
298412 FEATURE_MS1.APEX_INTENSITY AS aggr_prec_Peak_Apex,
299413 FEATURE.LEFT_WIDTH AS leftWidth,
300414 FEATURE.RIGHT_WIDTH AS rightWidth,
415+ { im_cols_sql }
301416 SCORE_MS2.RANK AS peak_group_rank,
302417 SCORE_MS2.SCORE AS d_score,
303418 SCORE_MS2.QVALUE AS m_score,
@@ -326,6 +441,17 @@ def _read_standard_data(self, con, cfg):
326441 # Check if we should attempt alignment integration
327442 use_alignment = cfg .use_alignment and self ._check_alignment_presence (con )
328443
444+ # IM boundary columns may or may not be present; centralised check
445+ has_im_boundaries = self ._has_im_boundaries (con )
446+
447+ im_cols_sql = (
448+ """FEATURE.EXP_IM_LEFTWIDTH AS IM_leftWidth,
449+ FEATURE.EXP_IM_RIGHTWIDTH AS IM_rightWidth,"""
450+ if has_im_boundaries
451+ else """NULL AS IM_leftWidth,
452+ NULL AS IM_rightWidth,"""
453+ )
454+
329455 # First, get features that pass MS2 QVALUE threshold
330456 query = f"""
331457 SELECT RUN.ID AS id_run,
@@ -350,6 +476,7 @@ def _read_standard_data(self, con, cfg):
350476 FEATURE_MS1.APEX_INTENSITY AS aggr_prec_Peak_Apex,
351477 FEATURE.LEFT_WIDTH AS leftWidth,
352478 FEATURE.RIGHT_WIDTH AS rightWidth,
479+ { im_cols_sql }
353480 SCORE_MS2.RANK AS peak_group_rank,
354481 SCORE_MS2.SCORE AS d_score,
355482 SCORE_MS2.QVALUE AS m_score,
0 commit comments