// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
// SPDX-License-Identifier: BSD-3-Clause
//
// --------------------------------------------------------------------------
// $Maintainer: Hannes Roest $
// $Authors: Hannes Roest $
// --------------------------------------------------------------------------

// Consumers
#include <OpenMS/FORMAT/DATAACCESS/MSDataWritingConsumer.h>
#include <OpenMS/FORMAT/DATAACCESS/MSDataSqlConsumer.h>

// Files
#include <OpenMS/FORMAT/FileHandler.h>
#include <OpenMS/FORMAT/FileTypes.h>
#include <OpenMS/FORMAT/SwathFile.h>
#include <OpenMS/FORMAT/DATAACCESS/MSDataTransformingConsumer.h>
#include <OpenMS/ANALYSIS/OPENSWATH/SwathWindowLoader.h>
#include <OpenMS/ANALYSIS/OPENSWATH/SwathQC.h>
#include <OpenMS/ANALYSIS/OPENSWATH/TransitionTSVFile.h>
#include <OpenMS/ANALYSIS/OPENSWATH/TransitionPQPFile.h>
#include <OpenMS/ANALYSIS/OPENSWATH/OpenSwathOSWWriter.h>
#include <OpenMS/SYSTEM/File.h>

// Kernel and implementations
#include <OpenMS/KERNEL/MSExperiment.h>
#include <OpenMS/ANALYSIS/OPENSWATH/DATAACCESS/SpectrumAccessOpenMS.h>
#include <OpenMS/ANALYSIS/OPENSWATH/DATAACCESS/SpectrumAccessTransforming.h>
#include <OpenMS/ANALYSIS/OPENSWATH/DATAACCESS/SpectrumAccessOpenMSInMemory.h>
#include <OpenMS/OPENSWATHALGO/DATAACCESS/SwathMap.h>

// Helpers
#include <OpenMS/ANALYSIS/OPENSWATH/OpenSwathHelper.h>
#include <OpenMS/ANALYSIS/OPENSWATH/DATAACCESS/DataAccessHelper.h>
#include <OpenMS/ANALYSIS/OPENSWATH/DATAACCESS/SimpleOpenMSSpectraAccessFactory.h>

// Algorithms
#include <OpenMS/ANALYSIS/OPENSWATH/MRMRTNormalizer.h>
#include <OpenMS/ANALYSIS/OPENSWATH/ChromatogramExtractor.h>
#include <OpenMS/ANALYSIS/OPENSWATH/MRMFeatureFinderScoring.h>
#include <OpenMS/ANALYSIS/OPENSWATH/MRMTransitionGroupPicker.h>
#include <OpenMS/ANALYSIS/OPENSWATH/SwathMapMassCorrection.h>

#include <OpenMS/ANALYSIS/OPENSWATH/OpenSwathWorkflow.h>

#include <cassert>
#include <limits>

// #define OPENSWATH_WORKFLOW_DEBUG

using namespace OpenMS;

// OpenMS base classes
#include <OpenMS/APPLICATIONS/TOPPBase.h>
#include <OpenMS/APPLICATIONS/OpenSwathBase.h>
#include <OpenMS/CONCEPT/ProgressLogger.h>


#include <QDir>

//-------------------------------------------------------------
//Doxygen docu
//-------------------------------------------------------------

/**
@page TOPP_OpenSwathWorkflow OpenSwathWorkflow

@brief Complete workflow to run OpenSWATH

This implements the OpenSWATH workflow as described in Rost and Rosenberger
et al. (Nature Biotechnology, 2014) and provides a complete, integrated
analysis tool without the need to run multiple tools consecutively. See also
http://openswath.org/ for additional documentation.

It executes the following steps in order, which is implemented in @ref OpenMS::OpenSwathWorkflow "OpenSwathWorkflow":

<ul>
  <li>Reading of input files, which can be provided as one single mzML or multiple "split" mzMLs (one per SWATH)</li>
  <li>Computing the retention time transformation, mass-to-charge and ion mobility correction using calibrant peptides</li>
  <li>Reading of the transition list</li>
  <li>Extracting the specified transitions</li>
  <li>Scoring the peak groups in the extracted ion chromatograms (XIC)</li>
  <li>Reporting the peak groups and the chromatograms</li>
</ul>


See below or have a look at the INI file (via "OpenSwathWorkflow -write_ini myini.ini") for available parameters and more functionality.

<h3>Input: SWATH maps and assay library (transition list) </h3>
SWATH maps can be provided as mzML files, either as single file directly from
the machine (this assumes that the SWATH method has 1 MS1 and then n MS2
spectra which are ordered the same way for each cycle). E.g. a valid method
would be MS1, MS2 [400-425], MS2 [425-450], MS1, MS2 [400-425], MS2 [425-450]
while an invalid method would be MS1, MS2 [400-425], MS2 [425-450], MS1, MS2
[425-450], MS2 [400-425] where MS2 [xx-yy] indicates an MS2 scan with an
isolation window starting at xx and ending at yy. OpenSwathWorkflow will try
to read the SWATH windows from the data, if this is not possible please
provide a tab-separated list with the correct windows using the
-swath_windows_file parameter (this is recommended). Note that the software
expects extraction windows (e.g. which peptides to extract from
which window) which cannot have overlaps, otherwise peptides will be
extracted from two different windows.

Alternatively, a set of split files (n+1 mzML files) can be provided, each
containing one SWATH map (or MS1 map).

Since the file size can become rather large, it is recommended to not load the
whole file into memory but rather cache it somewhere on the disk using a
fast-access data format. This can be specified using the -readOptions cacheWorkingInMemory
parameter (this is recommended!).

The assay library (transition list) is provided through the @p -tr parameter and can be in one of the following formats:

  <ul>
    <li> @ref OpenMS::TraMLFile "TraML" </li>
    <li> @ref OpenMS::TransitionTSVFile "OpenSWATH TSV transition lists" </li>
    <li> @ref OpenMS::TransitionPQPFile "OpenSWATH PQP SQLite files" </li>
    <li> SpectraST MRM transition lists </li>
    <li> Skyline transition lists </li>
    <li> Spectronaut transition lists </li>
  </ul>

<h3>Parameters</h3>
The current parameters are optimized for 2 hour gradients on SCIEX 5600 /
6600 TripleTOF instruments with a peak width of around 30 seconds using iRT
peptides.  If your chromatography differs, please consider adjusting
@p -Scoring:TransitionGroupPicker:min_peak_width  to allow for smaller or larger
peaks and adjust the @p -rt_extraction_window to use a different extraction
window for the retention time. In m/z domain, consider adjusting
@p -mz_extraction_window to your instrument resolution, which can be in Th or
ppm.

Furthermore, if you wish to use MS1 information, use the @p -enable_ms1 flag
and provide an MS1 map in addition to the SWATH data.

If you encounter issues with peak picking, try to disable peak filtering by
setting @p -Scoring:TransitionGroupPicker:compute_peak_quality false which will
disable the filtering of peaks by chromatographic quality. Furthermore, you
can adjust the smoothing parameters for the peak picking, by adjusting
@p -Scoring:TransitionGroupPicker:PeakPickerChromatogram:sgolay_frame_length or using a
Gaussian smoothing based on your estimated peak width. Adjusting the signal
to noise threshold will make the peaks wider or smaller.

<h3>Output: Feature list and chromatograms </h3>
The output of the OpenSwathWorkflow is a feature list, either as FeatureXML
or a @ref OpenMS::OSWFile "OpenSWATH SQLite file" (use @p -out_features) while the latter is more memory
friendly and can be directly used as input to other tools such as pyProphet (a Python
re-implementation of mProphet) software tool, see Reiter et al (2011, Nature
Methods).
If you analyze large datasets, it is recommended to only use the @ref OpenMS::OSWFile "OSWFile format".
For downstream analysis (e.g. using pyProphet) the @ref OpenMS::OSWFile "OSWFile format" is recommended.

In addition, the extracted chromatograms can be written out using the
@p -out_chrom parameter.

<h4> Feature list output format </h4>

For more information on the feature tables in the @ref OpenMS::OSWFile "OpenSWATH SQLite file output", see @ref OpenMS::OpenSwathOSWWriter "the OpenSwathOSWWriter class".

<h3>Execution flow:</h3>

The overall execution flow for this tool is implemented in @ref OpenMS::OpenSwathWorkflow "OpenSwathWorkflow".

<B>The command line parameters of this tool are:</B>
@verbinclude TOPP_OpenSwathWorkflow.cli
<B>INI file documentation of this tool:</B>
@htmlinclude TOPP_OpenSwathWorkflow.html

*/

// We do not want this class to show up in the docu:
/// @cond TOPPCLASSES
class TOPPOpenSwathWorkflow
  : public TOPPOpenSwathBase
{
public:

  TOPPOpenSwathWorkflow()
    : TOPPOpenSwathBase("OpenSwathWorkflow", "Complete workflow to run OpenSWATH", true,
                        {
                          {"Roest, H.L. et al.",
                           "OpenSWATH enables automated, targeted analysis of data-independent acquisition MS data",
                           "Nature Biotechnology volume 32, pages 219–223 (2014)",
                           "https://doi.org/10.1038/nbt.2841"},
                          {"Rosenberger, G. et al.",
                           "Inference and quantification of peptidoforms in large sample cohorts by SWATH-MS",
                           "Nature Biotechnology volume 35, pages 781–788 (2017)",
                           "https://doi.org/10.1038/nbt.3908"},
                          {"Meier, F. et al.",
                           "diaPASEF: parallel accumulation–serial fragmentation combined with data-independent acquisition",
                           "Nature Methods volume 17, pages 1229–1236 (2020)",
                           "https://doi.org/10.1038/s41592-020-00998-0"}
                        })
  {
  }

protected:

  void registerOptionsAndFlags_() override
  {
    registerInputFileList_("in", "<files>", StringList(), "Input files separated by blank");
    setValidFormats_("in", ListUtils::create<String>("mzML,mzXML,sqMass"));

    registerInputFile_("tr", "<file>", "", "transition file ('TraML','tsv','pqp')");
    setValidFormats_("tr", ListUtils::create<String>("traML,tsv,pqp"));
    registerStringOption_("tr_type", "<type>", "", "input file type -- default: determined from file extension or content\n", false);
    setValidStrings_("tr_type", ListUtils::create<String>("traML,tsv,pqp"));

    // iRT calibration
    registerStringOption_("auto_irt", "<true|false>", "true",
                          "Whether to sample iRTs on‐the‐fly (true) from the input targeted transition file (instead of passing specific iRT files). This may be useful if standard iRTs (Biognosys iRT kit) were not spiked-in. If set to false, and no additional iRT files are provided via `-tr_irt` / `-tr_irt_nonlinear`, and no transformation is provided via `-rt_norm`, then no calibration is performed.", false, true);
    setValidStrings_("auto_irt", ListUtils::create<String>("true,false"));

    registerInputFile_("swath_windows_file", "<file>", "", "Optional, tab-separated file containing the SWATH windows for extraction: lower_offset upper_offset. Note that the first line is a header and will be skipped.", false);
    registerFlag_("sort_swath_maps", "Sort input SWATH files when matching to SWATH windows from swath_windows_file", true);

    registerStringOption_("enable_ms1", "<true|false>", "true", "Extract the precursor ion trace(s) and use for scoring if present", false, true);
    setValidStrings_("enable_ms1", ListUtils::create<String>("true,false"));

    registerStringOption_("enable_ipf", "<true|false>", "true", "Enable additional scoring of identification assays using IPF (see online documentation)", false, true);
    setValidStrings_("enable_ipf", ListUtils::create<String>("true,false"));

    registerOutputFile_("out_features", "<file>", "", "feature output file, either .osw (PyProphet-compatible SQLite file) or .featureXML", false);
    setValidFormats_("out_features", ListUtils::create<String>("osw,featureXML"));

    registerStringOption_("out_features_type", "<type>", "", "input file type -- default: determined from file extension or content\n", false);
    setValidStrings_("out_features_type", {"osw","featureXML"});

    registerOutputFile_("out_chrom", "<file>", "", "Also output all computed chromatograms output in mzML (chrom.mzML) or sqMass (SQLite format)", false, true);
    setValidFormats_("out_chrom", ListUtils::create<String>("mzML,sqMass"));

    // additional QC data
    registerOutputFile_("out_qc", "<file>", "", "Optional QC meta data (charge distribution in MS1). Only works with mzML input files.", false, true);
    setValidFormats_("out_qc", ListUtils::create<String>("json"));


    // misc options
    registerDoubleOption_("min_upper_edge_dist", "<double>", 0.0, "Minimal distance to the upper edge of a Swath window to still consider a precursor, in Thomson", false, true);
    registerFlag_("pasef", "data is PASEF data");

    // RT, mz and IM windows
    registerStringOption_("estimate_extraction_windows", "<all|none|rt[,mz][,im]>", "all", "Choose which extraction windows to estimate during iRT calibration. 'all' = estimate RT, m/z, and IM windows; 'none' = use user-set windows; or a comma-separated list from {rt,mz,im}.", false);
    registerDoubleOption_("rt_estimation_padding_factor", "<double>", 1.3, "A padding factor to multiply the estimated RT window by. For example, a factor of 1.3 will add a 30% padding to the estimated RT window, so if the estimated RT window is 144, then 43 will be added for a total estimated RT window of 187 seconds. A factor of 1.0 will not add any padding to the estimated window.", false);
    setMinFloat_("rt_estimation_padding_factor", 1.0);
    registerDoubleOption_("im_estimation_padding_factor", "<double>", 1.0, "A padding factor to multiply the estimated ion_mobility window by. For example, a factor of 1.3 will add a 30% padding to the estimated ion_mobility window, so if the estimated ion_mobility window is 0.03, then 0.009 will be added for a total estimated ion_mobility window of 0.039. A factor of 1.0 will not add any padding to the estimated window.", false);
    setMinFloat_("im_estimation_padding_factor", 1.0);
    registerDoubleOption_("mz_estimation_padding_factor", "<double>", 1.0, "A padding factor to multiply the estimated m/z window by. For example, a factor of 1.3 will add a 30% padding to the estimated m/z window, so if the estimated m/z window is 18, then 5.4 will be added for a total estimated m/z window of 23.4. A factor of 1.0 will not add any padding to the estimated window.", false);
    setMinFloat_("mz_estimation_padding_factor", 1.0);

    registerDoubleOption_("rt_extraction_window", "<double>", 600.0, "Only extract RT around this value (-1 means extract over the whole range, a value of 600 means to extract around +/- 300 s of the expected elution).", false);
    registerDoubleOption_("extra_rt_extraction_window", "<double>", 0.0, "Output an XIC with a RT-window by this much larger (e.g. to visually inspect a larger area of the chromatogram)", false, true);
    setMinFloat_("extra_rt_extraction_window", 0.0);
    registerDoubleOption_("ion_mobility_window", "<double>", -1, "Extraction window in ion mobility dimension (in 1/k0 or milliseconds depending on library). This is the full window size, e.g. a value of 10 milliseconds would extract 5 milliseconds on either side. -1 means extract over the whole range or ion mobility is not present. (Default for diaPASEF data: 0.06 1/k0)", false);
    registerDoubleOption_("mz_extraction_window", "<double>", 50, "Extraction window in Thomson or ppm (see mz_extraction_window_unit)", false);
    setMinFloat_("mz_extraction_window", 0.0);
    registerStringOption_("mz_extraction_window_unit", "<name>", "ppm", "Unit for mz extraction", false, true);
    setValidStrings_("mz_extraction_window_unit", ListUtils::create<String>("Th,ppm"));

    // MS1 mz windows and ion mobility
    registerDoubleOption_("mz_extraction_window_ms1", "<double>", 50, "Extraction window used in MS1 in Thomson or ppm (see mz_extraction_window_ms1_unit)", false);
    setMinFloat_("mz_extraction_window_ms1", 0.0);
    registerStringOption_("mz_extraction_window_ms1_unit", "<name>", "ppm", "Unit of the MS1 m/z extraction window", false, true);
    setValidStrings_("mz_extraction_window_ms1_unit", ListUtils::create<String>("ppm,Th"));
    registerDoubleOption_("im_extraction_window_ms1", "<double>", -1, "Extraction window in ion mobility dimension for MS1 (in 1/k0 or milliseconds depending on library). -1 means this is not ion mobility data.", false);

    registerStringOption_("use_ms1_ion_mobility", "<name>", "true", "Also perform precursor extraction using the same ion mobility window as for fragment ion extraction", false, true);
    setValidStrings_("use_ms1_ion_mobility", ListUtils::create<String>("true,false"));

    registerStringOption_("matching_window_only", "<name>", "false", "Assume the input data is targeted / PRM-like data with potentially overlapping DIA windows. Will only attempt to extract each assay from the *best* matching DIA window (instead of all matching windows).", false, true);
    setValidStrings_("matching_window_only", ListUtils::create<String>("true,false"));

    // iRT mz and IM windows
    registerDoubleOption_("irt_mz_extraction_window", "<double>", 50, "Extraction window used for iRT and m/z correction in Thomson or ppm (see irt_mz_extraction_window_unit)", false, true);
    setMinFloat_("irt_mz_extraction_window", 0.0);
    registerStringOption_("irt_mz_extraction_window_unit", "<name>", "ppm", "Unit for mz extraction", false, true);
    setValidStrings_("irt_mz_extraction_window_unit", ListUtils::create<String>("Th,ppm"));
    registerDoubleOption_("irt_im_extraction_window", "<double>", -1, "Ion mobility extraction window used for iRT (in 1/K0 or milliseconds depending on library). -1 means do not perform ion mobility calibration", false, true);
    registerDoubleOption_("irt_nonlinear_rt_extraction_window", "<double>", 600.0, "Only extract RT around this value for non linear iRT calibration (-1 means extract over the whole range, a value of 600 means to extract around +/- 300 s of the expected elution).", false, true);
    setMinFloat_("irt_nonlinear_rt_extraction_window", -1.0); // means extract over the whole range

    registerDoubleOption_("min_rsq", "<double>", 0.95, "Minimum r-squared of RT peptides regression", false, true);
    registerDoubleOption_("min_coverage", "<double>", 0.6, "Minimum relative amount of RT peptides to keep", false, true);

    registerFlag_("split_file_input", "The input files each contain one single SWATH (alternatively: all SWATH are in separate files)", true);
    registerFlag_("use_elution_model_score", "Turn on elution model score (EMG fit to peak)", true);

    registerStringOption_("readOptions", "<name>", "normal", "Whether to run OpenSWATH directly on the input data, cache data to disk first or to perform a datareduction step first. If you choose cache, make sure to also set tempDirectory", false, true);
    setValidStrings_("readOptions", ListUtils::create<String>("normal,cache,cacheWorkingInMemory,workingInMemory"));

    registerStringOption_("mz_correction_function", "<name>", "none", "Use the retention time normalization peptide MS2 masses to perform a mass correction (linear, weighted by intensity linear or quadratic) of all spectra.", false, true);
    setValidStrings_("mz_correction_function", ListUtils::create<String>("none,regression_delta_ppm,unweighted_regression,weighted_regression,quadratic_regression,weighted_quadratic_regression,weighted_quadratic_regression_delta_ppm,quadratic_regression_delta_ppm"));

    registerStringOption_("tempDirectory", "<tmp>", File::getTempDirectory(), "Temporary directory to store cached files for example", false, true);

    registerStringOption_("extraction_function", "<name>", "tophat", "Function used to extract the signal", false, true);
    setValidStrings_("extraction_function", ListUtils::create<String>("tophat,bartlett"));

    registerIntOption_("batchSize", "<number>", 1000, "The batch size of chromatograms to process (0 means to only have one batch, sensible values are around 250-1000)", false, true);
    setMinInt_("batchSize", 0);
    registerIntOption_("outer_loop_threads", "<number>", -1, "How many threads should be used for the outer loop (-1 use all threads, use 4 to analyze 4 SWATH windows in memory at once).", false, true);

    registerIntOption_("ms1_isotopes", "<number>", 3, "The number of MS1 isotopes used for extraction", false, true);
    setMinInt_("ms1_isotopes", 0);

    registerSubsection_("Scoring", "Scoring parameters section");
    registerSubsection_("Library", "Library parameters section");

    registerSubsection_("Calibration", "Parameters for calibrant iRT peptides for RT normalization and mass / ion mobility correction.");
    registerSubsection_("Calibration:RTNormalization", "Parameters for the RTNormalization for iRT peptides. This specifies how the RT alignment is performed and how outlier detection is applied. Outlier detection can be done iteratively (by default) which removes one outlier per iteration or using the RANSAC algorithm.");
    registerSubsection_("Calibration:MassIMCorrection", "Parameters for the m/z and ion mobility calibration.");

    registerTOPPSubsection_("Debugging", "Debugging");
    registerOutputFile_("Debugging:irt_mzml", "<file>", "", "Chromatogram mzML containing the iRT peptides", false);
    setValidFormats_("Debugging:irt_mzml", ListUtils::create<String>("mzML"));
    registerOutputFile_("Debugging:irt_trafo", "<file>", "", "Transformation file for RT transform", false);
    setValidFormats_("Debugging:irt_trafo", ListUtils::create<String>("trafoXML"));
  }

  Param getSubsectionDefaults_(const String& name) const override
  {
    if (name == "Scoring")
    {
      // set sensible default parameters
      Param feature_finder_param = MRMFeatureFinderScoring().getDefaults();
      feature_finder_param.remove("rt_extraction_window");
      feature_finder_param.setValue("stop_report_after_feature", 5);
      feature_finder_param.setValue("rt_normalization_factor", 100.0); // for iRT peptides between 0 and 100 (more or less)
      feature_finder_param.setValue("Scores:use_ms1_mi", "true");
      feature_finder_param.setValue("Scores:use_mi_score", "true");

      feature_finder_param.setValue("TransitionGroupPicker:min_peak_width", -1.0);
      feature_finder_param.setValue("TransitionGroupPicker:recalculate_peaks", "true");
      feature_finder_param.setValue("TransitionGroupPicker:compute_peak_quality", "false");
      feature_finder_param.setValue("TransitionGroupPicker:minimal_quality", -1.5);
      feature_finder_param.setValue("TransitionGroupPicker:background_subtraction", "none");
      feature_finder_param.setValue("TransitionGroupPicker:compute_peak_shape_metrics", "false");
      feature_finder_param.remove("TransitionGroupPicker:stop_after_intensity_ratio");

      // Peak Picker
      feature_finder_param.setValue("TransitionGroupPicker:PeakPickerChromatogram:use_gauss", "false");
      feature_finder_param.setValue("TransitionGroupPicker:PeakPickerChromatogram:sgolay_polynomial_order", 3);
      feature_finder_param.setValue("TransitionGroupPicker:PeakPickerChromatogram:sgolay_frame_length", 11);
      feature_finder_param.setValue("TransitionGroupPicker:PeakPickerChromatogram:peak_width", -1.0);
      feature_finder_param.setValue("TransitionGroupPicker:PeakPickerChromatogram:remove_overlapping_peaks", "true");
      feature_finder_param.setValue("TransitionGroupPicker:PeakPickerChromatogram:write_sn_log_messages", "false"); // no log messages
      // TODO it seems that the legacy method produces slightly larger peaks, e.g. it will not cut off peaks too early
      // however the same can be achieved by using a relatively low SN cutoff in the -Scoring:TransitionGroupPicker:PeakPickerChromatogram:signal_to_noise 0.5
      feature_finder_param.setValue("TransitionGroupPicker:recalculate_peaks_max_z", 0.75);
      feature_finder_param.setValue("TransitionGroupPicker:PeakPickerChromatogram:method", "corrected");
      feature_finder_param.setValue("TransitionGroupPicker:PeakPickerChromatogram:signal_to_noise", 0.1);
      feature_finder_param.setValue("TransitionGroupPicker:PeakPickerChromatogram:gauss_width", 30.0);
      feature_finder_param.setValue("uis_threshold_sn", -1);
      feature_finder_param.setValue("uis_threshold_peak_area", 0);
      feature_finder_param.remove("TransitionGroupPicker:PeakPickerChromatogram:sn_win_len");
      feature_finder_param.remove("TransitionGroupPicker:PeakPickerChromatogram:sn_bin_count");
      feature_finder_param.remove("TransitionGroupPicker:PeakPickerChromatogram:stop_after_feature");

      // EMG Scoring - turn off by default since it is very CPU-intensive
      feature_finder_param.remove("Scores:use_elution_model_score");
      feature_finder_param.setValue("EMGScoring:max_iteration", 10);
      feature_finder_param.remove("EMGScoring:interpolation_step");
      feature_finder_param.remove("EMGScoring:tolerance_stdev_bounding_box");
      feature_finder_param.remove("EMGScoring:deltaAbsError");

      // remove these parameters
      feature_finder_param.remove("EMGScoring:statistics:mean");
      feature_finder_param.remove("EMGScoring:statistics:variance");
      return feature_finder_param;
    }
    else if (name == "Library")
    {
      return TransitionTSVFile().getDefaults();
    }
    else if (name == "Calibration")
    {
      Param p;

      p.setValue("irt_bins", 100, "Number of RT bins for sampling. (When `auto_irt` is set to 'true')");
      p.setMinInt("irt_bins", 5);
      p.setValue("irt_peptides_per_bin",  5, "Peptides sampled per bin. (When `auto_irt` is set to 'true')");
      p.setMinInt("irt_peptides_per_bin", 1);
      p.setValue("irt_seed",  5489, "RNG seed (0 = non‐deterministic). (When `auto_irt` is set to 'true')");
      p.setMinInt("irt_seed", 0);

      p.setValue("irt_bins_nonlinear",  2000, "Number of RT bins for sampling. (When `auto_irt` is set to 'true')");
      p.setMinInt("irt_bins_nonlinear", 5);
      p.setValue("irt_peptides_per_bin_nonlinear",  50, "Peptides sampled per bin for additional nonlinear calibration. If 0, nonlinear calibration will not be performed. (When `auto_irt` is set to 'true')");
      p.setMinInt("irt_peptides_per_bin_nonlinear", 0);

      // one of the following two needs to be set
      p.setValue("tr_irt", "", "transition file ('TraML') for linear iRTs. Takes precedent even when `auto_rt` is set to 'true'");

      // one of the following two needs to be set
      p.setValue("tr_irt_nonlinear", "", "additional nonlinear transition file ('TraML'). Takes precedent even when `auto_rt` is set to 'true'");

      // priority peptides for sampling
      p.setValue("tr_irt_priority_sampling", "", "Optional custom transition file (TSV format only) containing additional priority peptides for iRT sampling. These peptides will be prioritized alongside the built-in irtkit and cirtkit peptides when `auto_irt` is enabled. Useful for including project-specific or custom iRT peptides.");

      p.setValue("rt_norm", "", "RT normalization file (how to map the RTs of this run to the ones stored in the library). If set, tr_irt may be omitted.");

      return p;
    }
    else if (name == "Calibration:RTNormalization")
    {
      Param p;

      p.setValue("alignmentMethod", "linear", "How to perform the alignment to the normalized RT space using anchor points. 'linear': perform linear regression (for few anchor points). 'interpolated': Interpolate between anchor points (for few, noise-free anchor points). 'lowess' Use local regression (for many, noisy anchor points). 'b_spline' use b splines for smoothing.");
      p.setValidStrings("alignmentMethod", {"linear","interpolated","lowess","b_spline"});
      p.setValue("lowess:auto_span", "true", "If true, or if 'span' is 0, automatically select LOWESS span by cross-validation.");
      p.setValidStrings("lowess:auto_span", {"true","false"});
      p.setValue("lowess:span", 0.05, "Span parameter for lowess");
      p.setMinFloat("lowess:span", 0.0);
      p.setMaxFloat("lowess:span", 1.0);
      p.setValue("lowess:auto_span_min", 0.15,"Lower bound for auto-selected span.");
      p.setMinFloat("lowess:auto_span_min", 0.001);
      p.setValue("lowess:auto_span_max", 0.80,"Upper bound for auto-selected span.");
      p.setMaxFloat("lowess:auto_span_max", 0.99);
      p.setValue("lowess:auto_span_grid", "0.005,0.01,0.05,0.15,0.25,0.30,0.50,0.70,0.90", "Optional explicit grid of span candidates in (0,1]. Comma-separated list, e.g. '0.2,0.3,0.5'.  If empty, a default grid is used.");
      p.setValue("b_spline:num_nodes", 5, "Number of nodes for b spline");
      p.setMinInt("b_spline:num_nodes", 0);

      p.setValue("outlierMethod", "iter_residual", "Which outlier detection method to use (valid: 'iter_residual', 'iter_jackknife', 'ransac', 'none'). Iterative methods remove one outlier at a time. Jackknife approach optimizes for maximum r-squared improvement while 'iter_residual' removes the datapoint with the largest residual error (removal by residual is computationally cheaper, use this with lots of peptides).");
      p.setValidStrings("outlierMethod", {"iter_residual","iter_jackknife","ransac","none"});

      p.setValue("useIterativeChauvenet", "false", "Whether to use Chauvenet's criterion when using iterative methods. This should be used if the algorithm removes too many datapoints but it may lead to true outliers being retained.");
      p.setValidStrings("useIterativeChauvenet", {"true","false"});

      p.setValue("RANSACMaxIterations", 1000, "Maximum iterations for the RANSAC outlier detection algorithm.");
      p.setValue("RANSACMaxPercentRTThreshold", 3, "Maximum threshold in RT dimension for the RANSAC outlier detection algorithm (in percent of the total gradient). Default is set to 3% which is around +/- 4 minutes on a 120 gradient.");
      p.setValue("RANSACSamplingSize", 10, "Sampling size of data points per iteration for the RANSAC outlier detection algorithm.");

      p.setValue("estimateBestPeptides", "false", "Whether the algorithms should try to choose the best peptides based on their peak shape for normalization. Use this option you do not expect all your peptides to be detected in a sample and too many 'bad' peptides enter the outlier removal step (e.g. due to them being endogenous peptides or using a less curated list of peptides).");
      p.setValidStrings("estimateBestPeptides", {"true","false"});

      p.setValue("InitialQualityCutoff", 0.5, "The initial overall quality cutoff for a peak to be scored (range ca. -2 to 2)");
      p.setValue("OverallQualityCutoff", 5.5, "The overall quality cutoff for a peak to go into the retention time estimation (range ca. 0 to 10)");
      p.setValue("NrRTBins", 10, "Number of RT bins to use to compute coverage. This option should be used to ensure that there is a complete coverage of the RT space (this should detect cases where only a part of the RT gradient is actually covered by normalization peptides)");
      p.setValue("MinPeptidesPerBin", 1, "Minimal number of peptides that are required for a bin to counted as 'covered'");
      p.setValue("MinBinsFilled", 8, "Minimal number of bins required to be covered");
      return p;
    }
    else if (name == "Calibration:MassIMCorrection")
    {
      Param p = SwathMapMassCorrection().getDefaults();
      return p;
    }
    else
    {
      throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Unknown subsection", name);
    }
  }

  /**
    @brief Selection flags for using auto-estimated extraction windows.

    This POD struct indicates for which coordinates (RT, m/z, ion mobility)
    the automatically estimated extraction windows should be applied.
  */
  struct EstimateWindowsChoice
  {
    bool rt{false};
    bool mz{false};
    bool im{false};
  };

  /**
    @brief Parse the user option for selecting estimated extraction windows.

    Interprets the option value as one of: \n
     - @c "all"  → enable all: RT, m/z, and ion mobility \n
     - @c "none" → enable none (default; keeps user-specified fixed windows) \n
     - a comma-separated list drawn from @c {"rt","mz","im"}, e.g. @c "rt,mz" \n

    Parsing is case-insensitive and tolerant of surrounding whitespace.
    Unknown tokens or an empty/malformed value raise an exception.

    @param estimate_windows_option_str  The option string (e.g. "all", "none", "rt,mz").
    @return An @c EstimateWindowsChoice with the requested flags set.
    @throws Exception::InvalidParameter
            If the string is empty/malformed or contains unknown tokens.
  */
  EstimateWindowsChoice parseEstimateExtractionWindows_(String estimate_windows_option_str)
  {
    EstimateWindowsChoice out;
    const String s = estimate_windows_option_str.trim().toLower();

    if (s == "all")
    {
      out.rt = out.mz = out.im = true;
      return out;
    }
    if (s == "none")
    {
      return out; // all false, don't use estimated extraction windows
    }

    StringList toks;
    s.split(',', toks);
    if (toks.empty())
    {
      throw OpenMS::Exception::InvalidParameter(
        __FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
        "estimate_extraction_windows: value is empty or malformed (expected all|none|rt[,mz][,im])");
    }

    for (String t : toks)
    {
      t.trim();
      t.toLower();
      if (t == "rt")       { out.rt = true; }
      else if (t == "mz")  { out.mz = true; }
      else if (t == "im")  { out.im = true; }
      else if (!t.empty())
      {
        throw OpenMS::Exception::InvalidParameter(
          __FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
          "estimate_extraction_windows: unknown token '" + t +
            "'. Allowed: all, none, or a comma-separated value from {rt,mz,im}.");
      }
    }
    return out;
  }

  /**
    @brief Validate an auto-estimated extraction window.

    A window is considered valid if it is finite and strictly greater than a
     small positive threshold. This guards against denormals (e.g., ~1e-310),
     zeros, negative values, and NaNs/Inf.

    Typical units:
      - RT window: seconds
      - m/z window: ppm
      - IM window: native instrument units

    @param v            The candidate window value.
    @param min_positive Minimum strictly-positive threshold (default: 1e-9).
                         Estimates <= this threshold are deemed invalid.
    @return True if the window is usable; false otherwise.
  */
  inline bool is_valid_win(double v, double min_positive = 1e-9) noexcept
  {
    return std::isfinite(v) && (v > min_positive);
  }

  /**
    @brief Validate, log, and optionally apply an auto-estimated extraction window.

    Behavior:
      - If @p applicable is false (e.g., no IM data), logs an INFO and leaves @p dst_param unchanged.
      - If the estimate is invalid, logs a WARN and leaves @p dst_param unchanged.
      - If the estimate is valid and @p commit is true, logs an INFO and assigns @p dst_param = @p estimate.
      - If the estimate is valid and @p commit is false, logs an INFO that reports the estimate and that the user value is kept.

    Typical usage:
      - RT window (seconds)
      - MS2 m/z window (ppm)
      - MS1 m/z window (ppm)
      - IM window (1/k0), only when applicable (e.g., PASEF/IM data)

    @param label       Human-readable label used in logs (e.g., "RT", "MS2 m/z (ppm)", "MS1 ion mobility (1/k0)").
    @param estimate    Auto-estimated window value to consider.
    @param dst_param   Destination parameter to update on success (by reference).
    @param user_value  The current/user-specified value (reported in logs).
    @param applicable  Whether this window is applicable for the current run/config.
                       If false, the value is not applied and a note is logged.
    @param commit      Whether to apply the estimate. If false, only logs the estimate vs. user value.
                       Default: true (backwards compatible).
  */
  void apply_window(const char* label,
                           double estimate,
                           double& dst_param,
                           const double user_value,
                           bool applicable = true,
                           bool commit = true)
    {
      if (!applicable)
      {
        OPENMS_LOG_INFO << "[Estimated] " << label
                        << " window: not applicable; keeping user value "
                        << user_value << std::endl;
        return;
      }

      if (!is_valid_win(estimate))
      {
        OPENMS_LOG_WARN << "[Estimated] " << label
                        << " window estimate invalid (estimated=" << estimate
                        << "); keeping user value " << user_value << std::endl;
        return;
      }

      if (commit)
      {
        OPENMS_LOG_INFO << "[Estimated] " << label
                        << " window applied: " << estimate
                        << " (was " << user_value << ")" << std::endl;
        dst_param = estimate;
      }
      else
      {
        OPENMS_LOG_INFO << "[Estimated] " << label
                        << " window estimated: " << estimate
                        << "; keeping user value " << user_value << std::endl;
        // leave dst_param unchanged
      }
    }

  /**
    @brief Load priority peptide sequences from TSV files (irtkit and cirtkit)
    
    Loads peptide sequences from the specified TSV files and returns them as a
    set for quick lookup. Used to prioritize common iRT peptides during sampling.
    
    @param[in] tsv_files Vector of file paths to TSV files to load
    @param[in] tsv_reader_param Parameters for the TSV reader
    
    @return Set of unique peptide sequences from the loaded files
  */
  std::unordered_set<std::string> loadPriorityPeptideSequences(
    const std::vector<String>& tsv_files,
    const Param& tsv_reader_param)
  {
    std::unordered_set<std::string> priority_sequences;
    
    for (const auto& tsv_file : tsv_files)
    {
      if (tsv_file.empty() || !File::exists(tsv_file))
      {
        OPENMS_LOG_WARN << "Priority peptide file not found: " << tsv_file << std::endl;
        continue;
      }
      
      try
      {
        FileTypes::Type file_type = FileHandler::getType(tsv_file);
        OpenSwath::LightTargetedExperiment priority_exp = loadTransitionList(file_type, tsv_file, tsv_reader_param);
        
        for (const auto& compound : priority_exp.getCompounds())
        {
          if (!compound.sequence.empty())
          {
            priority_sequences.insert(compound.sequence);
          }
        }
        
        OPENMS_LOG_INFO << "Loaded " << priority_exp.getCompounds().size() 
                        << " compounds from priority file: " << tsv_file << std::endl;
      }
      catch (const Exception::BaseException& e)
      {
        OPENMS_LOG_WARN << "Failed to load priority peptide file " << tsv_file 
                        << ": " << e.what() << std::endl;
      }
    }
    
    OPENMS_LOG_INFO << "Total unique priority peptide sequences: " 
                    << priority_sequences.size() << std::endl;
    
    return priority_sequences;
  }

  ExitCodes main_(int, const char **) override
  {
    ///////////////////////////////////
    // Prepare Parameters
    ///////////////////////////////////
    StringList file_list = getStringList_("in");
    String tr_file = getStringOption_("tr");
    String out_features = getStringOption_("out_features");

    //tr_file input file type
    FileTypes::Type tr_type = FileTypes::nameToType(getStringOption_("tr_type"));
    if (tr_type == FileTypes::UNKNOWN)
    {
      tr_type = FileHandler::getType(tr_file);
      writeDebug_(String("Input file type (-tr): ") + FileTypes::typeToName(tr_type), 2);
    }

    if (tr_type == FileTypes::UNKNOWN)
    {
      writeLogError_("Error: Could not determine input file type for '-tr' !");
      return PARSE_ERROR;
    }

    //tr_file input file type
    FileTypes::Type out_features_type = FileTypes::nameToType(getStringOption_("out_features_type"));
    if (out_features_type == FileTypes::UNKNOWN)
    {
      out_features_type = FileHandler::getType(out_features);
      writeDebug_(String("Input file type (-out): ") + FileTypes::typeToName(out_features_type), 2);
    }

    if (out_features_type == FileTypes::UNKNOWN)
    {
      writeLogError_("Error: Could not determine input file type for '-out_features' !");
      return PARSE_ERROR;
    }

    String out_qc = getStringOption_("out_qc");

    bool auto_irt = (getStringOption_("auto_irt") == "true");

    Param irt_calibration_params = getParam_().copy("Calibration:", true);
    UInt irt_seed  = irt_calibration_params.getValue("irt_seed");
    UInt irt_bins_lin = irt_calibration_params.getValue("irt_bins");
    UInt irt_pep_lin  = irt_calibration_params.getValue("irt_peptides_per_bin");
    UInt irt_bins_nl  = irt_calibration_params.getValue("irt_bins_nonlinear");
    UInt irt_pep_nl   = irt_calibration_params.getValue("irt_peptides_per_bin_nonlinear");

    String irt_tr_file = irt_calibration_params.getValue("tr_irt").toString();
    String nonlinear_irt_tr_file = irt_calibration_params.getValue("tr_irt_nonlinear").toString();
    String priority_sampling_irt_tr_file = irt_calibration_params.getValue("tr_irt_priority_sampling").toString();
    String trafo_in = irt_calibration_params.getValue("rt_norm").toString();
    String swath_windows_file = getStringOption_("swath_windows_file");

    String out_chrom = getStringOption_("out_chrom");
    bool split_file = getFlag_("split_file_input");
    bool use_emg_score = getFlag_("use_elution_model_score");
    bool force = getFlag_("force");
    bool pasef = getFlag_("pasef");
    bool sort_swath_maps = getFlag_("sort_swath_maps");
    bool use_ms1_traces = getStringOption_("enable_ms1") == "true";
    bool enable_uis_scoring = getStringOption_("enable_ipf") == "true";
    int batchSize = (int)getIntOption_("batchSize");
    int outer_loop_threads = (int)getIntOption_("outer_loop_threads");
    int ms1_isotopes = (int)getIntOption_("ms1_isotopes");
    Size debug_level = (Size)getIntOption_("debug");

    double min_rsq = getDoubleOption_("min_rsq");
    double min_coverage = getDoubleOption_("min_coverage");

    Param debug_params = getParam_().copy("Debugging:", true);

    String readoptions = getStringOption_("readOptions");
    String mz_correction_function = getStringOption_("mz_correction_function");

    // make sure tmp is a directory with proper separator at the end (downstream methods simply do path + filename)
    // (do not use QDir::separator(), since its platform specific (/ or \) while absolutePath() will always use '/')
    String tmp_dir = String(QDir(getStringOption_("tempDirectory").c_str()).absolutePath()).ensureLastChar('/');

    ///////////////////////////////////
    // Parameter validation
    ///////////////////////////////////

    bool load_into_memory = false;
    if (readoptions == "cacheWorkingInMemory")
    {
      readoptions = "cache";
      load_into_memory = true;
    }
    else if (readoptions == "workingInMemory")
    {
      readoptions = "normal";
      load_into_memory = true;
    }

    bool is_sqmass_input  = (FileHandler::getTypeByFileName(file_list[0]) == FileTypes::SQMASS);
    if (is_sqmass_input && !load_into_memory)
    {
      std::cout << "When using sqMass input files, it is highly recommended to use the workingInMemory option as otherwise data access will be very slow." << std::endl;
    }

    if (trafo_in.empty() && irt_tr_file.empty() && !auto_irt)
    {
      std::cout << "Since neither rt_norm nor tr_irt nor auto_irt is set, OpenSWATH will " <<
        "not use RT-transformation (rather a null transformation will be applied)" << std::endl;
    }

    // -----------------------------------------------------------------
    // Validate auto_irt parameters
    // -----------------------------------------------------------------
    if (auto_irt)
    {
      // linear sampling must have at least one bin and one peptide per bin
      if (irt_bins_lin == 0)
      {
        writeLogError_("Parameter error: --irt_bins must be > 0 when auto_irt is enabled.");
        return PARSE_ERROR;
      }
      if (irt_pep_lin == 0)
      {
        writeLogError_("Parameter error: --irt_peptides_per_bin must be > 0 when auto_irt is enabled.");
        return PARSE_ERROR;
      }
    }
    
    // Validate priority iRT sampling file format if provided
    if (!priority_sampling_irt_tr_file.empty())
    {
      if (!File::exists(priority_sampling_irt_tr_file))
      {
        writeLogError_("Parameter error: Priority iRT file does not exist: " + priority_sampling_irt_tr_file);
        return PARSE_ERROR;
      }
      
      FileTypes::Type priority_file_type = FileHandler::getType(priority_sampling_irt_tr_file);
      if (priority_file_type != FileTypes::TSV)
      {
        writeLogError_("Parameter error: Priority iRT file must be in TSV format. Provided: " + 
                       FileTypes::typeToName(priority_file_type));
        return PARSE_ERROR;
      }
    }

    // Check swath window input
    if (!swath_windows_file.empty())
    {
      OPENMS_LOG_INFO << "Validate provided Swath windows file:" << std::endl;
      std::vector<double> swath_prec_lower;
      std::vector<double> swath_prec_upper;
      SwathWindowLoader::readSwathWindows(swath_windows_file, swath_prec_lower, swath_prec_upper);

      for (Size i = 0; i < swath_prec_lower.size(); i++)
      {
        OPENMS_LOG_DEBUG << "Read lower swath window " << swath_prec_lower[i] << " and upper window " << swath_prec_upper[i] << std::endl;
      }
    }

    double min_upper_edge_dist = getDoubleOption_("min_upper_edge_dist");
    bool use_ms1_im = getStringOption_("use_ms1_ion_mobility") == "true";
    bool prm = getStringOption_("matching_window_only") == "true";

    EstimateWindowsChoice use_est_window_choices = parseEstimateExtractionWindows_(getStringOption_("estimate_extraction_windows"));
    ChromExtractParams cp;
    cp.min_upper_edge_dist   = min_upper_edge_dist;
    cp.mz_extraction_window  = getDoubleOption_("mz_extraction_window");
    cp.ppm                   = getStringOption_("mz_extraction_window_unit") == "ppm";
    cp.rt_extraction_window  = getDoubleOption_("rt_extraction_window");
    cp.im_extraction_window  = getDoubleOption_("ion_mobility_window");
    cp.extraction_function   = getStringOption_("extraction_function");
    cp.extra_rt_extract      = getDoubleOption_("extra_rt_extraction_window");

    ChromExtractParams cp_irt = cp;
    cp_irt.rt_extraction_window = -1; // extract the whole RT range for iRT measurements
    cp_irt.mz_extraction_window = getDoubleOption_("irt_mz_extraction_window");
    cp_irt.im_extraction_window = getDoubleOption_("irt_im_extraction_window");

    if ( (cp_irt.im_extraction_window == -1) & (cp.im_extraction_window != -1) )
    {
      OPENMS_LOG_WARN << "Warning: -irt_im_extraction_window is not set, this will lead to no ion mobility calibration" << std::endl;
    }

    cp_irt.ppm                  = getStringOption_("irt_mz_extraction_window_unit") == "ppm";

    ChromExtractParams cp_ms1 = cp;
    cp_ms1.mz_extraction_window  = getDoubleOption_("mz_extraction_window_ms1");
    cp_ms1.ppm                   = getStringOption_("mz_extraction_window_ms1_unit") == "ppm";
    cp_ms1.im_extraction_window  = (use_ms1_im) ? getDoubleOption_("im_extraction_window_ms1") : -1;

    Param feature_finder_param = getParam_().copy("Scoring:", true);
    feature_finder_param.setValue("use_ms1_ion_mobility", getStringOption_("use_ms1_ion_mobility"));

    Param tsv_reader_param = getParam_().copy("Library:", true);
    if (use_emg_score)
    {
      feature_finder_param.setValue("Scores:use_elution_model_score", "true");
    }
    else
    {
      feature_finder_param.setValue("Scores:use_elution_model_score", "false");
    }
    if (use_ms1_traces)
    {
      feature_finder_param.setValue("Scores:use_ms1_correlation", "true");
      feature_finder_param.setValue("Scores:use_ms1_fullscan", "true");
    }
    if (enable_uis_scoring)
    {
      feature_finder_param.setValue("Scores:use_uis_scores", "true");
    }

    bool compute_peak_shape_metrics = feature_finder_param.getValue("TransitionGroupPicker:compute_peak_shape_metrics").toBool();
    if (compute_peak_shape_metrics)
    {
      feature_finder_param.setValue("Scores:use_peak_shape_metrics", "true");
    }

    ///////////////////////////////////
    // Load the transitions
    ///////////////////////////////////
    OpenSwath::LightTargetedExperiment transition_exp = loadTransitionList(tr_type, tr_file, tsv_reader_param);
    OPENMS_LOG_INFO << "Loaded " << transition_exp.getProteins().size() << " proteins, " <<
      transition_exp.getCompounds().size() << " compounds with " << transition_exp.getTransitions().size() << " transitions." << std::endl;

    if (out_features_type == FileTypes::OSW)
    {
      if (tr_type == FileTypes::PQP)
      {
         // copy the PQP file and name it OSW file
          std::ifstream  src(tr_file.c_str(), std::ios::binary);
          std::ofstream  dst(out_features.c_str(), std::ios::binary | std::ios::trunc);
          dst << src.rdbuf();
      }
      else if (tr_type == FileTypes::TSV)
      {
        // Convert TSV to .PQP 
        TransitionTSVFile tsv_reader;
        TargetedExperiment transition_exp_heavy;
        tsv_reader.setParameters(tsv_reader_param);
        tsv_reader.convertTSVToTargetedExperiment(tr_file.c_str(), tr_type, transition_exp_heavy);
        TransitionPQPFile().convertTargetedExperimentToPQP(out_features.c_str(), transition_exp_heavy);

        // instead of reloading - edit the already loaded transition_exp to be compatible with .pqp format
        // read the PQP to traMLID mapping
        auto precursor_traml_to_pqp = TransitionPQPFile().getPQPIDToTraMLIDMap(out_features.c_str(), "PRECURSOR");
        auto transition_traml_to_pqp = TransitionPQPFile().getPQPIDToTraMLIDMap(out_features.c_str(), "TRANSITION");

        // convert tramlID in transitionExp to PQP ID
        for (auto & prec : transition_exp.getCompounds())
        {
          if (auto id = precursor_traml_to_pqp.find(prec.id); id != precursor_traml_to_pqp.end())
          {
            prec.id = id->second;
          }
        }

        for (auto & tr : transition_exp.getTransitions())
        {
          // convert transition tramlID peptide reference in transitionExp to PQP ID 
          auto pep = precursor_traml_to_pqp.find(tr.getPeptideRef());
          if (pep != precursor_traml_to_pqp.end())
          {
            tr.peptide_ref = pep->second;
          }

          // Update transition id
          auto id = transition_traml_to_pqp.find(tr.transition_name);
          if (id != transition_traml_to_pqp.end())
          {
            tr.transition_name = id->second;
          }
        }
      }
      else if (tr_type == FileTypes::TRAML)
      {
        if (out_features_type == FileTypes::OSW)
        {
          throw Exception::InvalidParameter(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, String("Conversion from TraML to OSW is not supported."));
        }
      }
    }

    // If pasef flag is set, validate that IM is present
    if (pasef)
    {
      auto transitions = transition_exp.getTransitions();

      for ( Size k=0; k < (Size)transitions.size(); k++ )
      {
        if (transitions[k].precursor_im == -1)
        {
          throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Error: Transition " + transitions[k].getNativeID() +  " does not have a valid IM value, this must be set to use the -pasef flag");
        }
      }
    }

    ///////////////////////////////////
    // Load the SWATH files
    ///////////////////////////////////
    std::shared_ptr<ExperimentalSettings> exp_meta(new ExperimentalSettings);
    std::vector< OpenSwath::SwathMap > swath_maps;

    // collect some QC data
    if (!out_qc.empty())
    {
      OpenSwath::SwathQC qc(30, 0.04);
      MSDataTransformingConsumer qc_consumer; // apply some transformation
      qc_consumer.setSpectraProcessingFunc(qc.getSpectraProcessingFunc());
      qc_consumer.setExperimentalSettingsFunc(qc.getExpSettingsFunc());
      if (!loadSwathFiles(file_list, exp_meta, swath_maps, split_file, tmp_dir, readoptions,
                          swath_windows_file, min_upper_edge_dist, force,
                          sort_swath_maps, prm, pasef, &qc_consumer))
      {
        return PARSE_ERROR;
      }
      qc.storeJSON(out_qc);
    }
    else
    {
      if (!loadSwathFiles(file_list, exp_meta, swath_maps, split_file, tmp_dir, readoptions,
                          swath_windows_file, min_upper_edge_dist, force,
                          sort_swath_maps, prm, pasef))
      {
        return PARSE_ERROR;
      }
    }


    ///////////////////////////////////
    // Get the transformation information (using iRT peptides)
    ///////////////////////////////////
    String irt_trafo_out = debug_params.getValue("irt_trafo").toString();
    String irt_mzml_out = debug_params.getValue("irt_mzml").toString();
    Param irt_detection_param = getParam_().copy("Calibration:RTNormalization:", true);
    Param calibration_param = getParam_().copy("Calibration:MassIMCorrection:", true);
    calibration_param.setValue("mz_extraction_window", cp_irt.mz_extraction_window);
    calibration_param.setValue("mz_extraction_window_ppm", cp_irt.ppm ? "true" : "false");
    calibration_param.setValue("im_extraction_window", cp_irt.im_extraction_window);
    calibration_param.setValue("im_estimation_padding_factor", getDoubleOption_("im_estimation_padding_factor"));
    calibration_param.setValue("mz_estimation_padding_factor", getDoubleOption_("mz_estimation_padding_factor"));
    calibration_param.setValue("mz_correction_function", mz_correction_function);

    // Load priority peptide sequences from irtkit and cirtkit if auto_irt is enabled
    std::unordered_set<std::string> priority_peptides;
    if (auto_irt)
    {
      String data_path = File::getOpenMSDataPath();
      std::vector<String> priority_files;
      
      String irtkit_path = data_path + "/CHEMISTRY/irtkit.tsv";
      String cirtkit_path = data_path + "/CHEMISTRY/cirtkit.tsv";
      
      if (File::exists(irtkit_path))
      {
        priority_files.push_back(irtkit_path);
      }
      else
      {
        OPENMS_LOG_WARN << "irtkit.tsv not found at: " << irtkit_path << std::endl;
      }
      
      if (File::exists(cirtkit_path))
      {
        priority_files.push_back(cirtkit_path);
      }
      else
      {
        OPENMS_LOG_WARN << "cirtkit.tsv not found at: " << cirtkit_path << std::endl;
      }
      
      // Add custom priority iRT file if provided
      if (!priority_sampling_irt_tr_file.empty())
      {
        if (File::exists(priority_sampling_irt_tr_file))
        {
          priority_files.push_back(priority_sampling_irt_tr_file);
          OPENMS_LOG_DEBUG << "Including custom priority iRT file: " << priority_sampling_irt_tr_file << std::endl;
        }
      }
      
      if (!priority_files.empty())
      {
        Param priority_tsv_param = TransitionTSVFile().getDefaults();
        priority_peptides = loadPriorityPeptideSequences(priority_files, priority_tsv_param);
      }
      else
      {
        OPENMS_LOG_WARN << "No priority peptide files found. Continuing without priority sampling." << std::endl;
      }
    }

    // 1) Prepare in‐memory iRT experiments for linear + nonlinear
    OpenSwath::LightTargetedExperiment lin_irt_exp;
    if (!irt_tr_file.empty())
    {
      // user‐supplied linear iRT file takes absolute precedence
      FileTypes::Type irt_tr_type = FileHandler::getType(irt_tr_file);
      Param irt_tsv_reader_param = TransitionTSVFile().getDefaults();
      lin_irt_exp = loadTransitionList(irt_tr_type, irt_tr_file, irt_tsv_reader_param);
    }
    else if (auto_irt)
    {
      OPENMS_LOG_INFO << "Linear iRT Calibration: Sampling input transition experiment for " << irt_bins_lin << " bins across the RT with " << irt_pep_lin << " peptides per bin" << std::endl;
      // sampled transtion_exp on‐the‐fly
      // Note1: We sort the targetedExperiment peptides by the aggregated total intensity (i.e. sum of fragment library intensities per peptide),
      //         in order to reduce the sampling space to the top N fraction of highly intense peptides
      // Note2: For linear iRTs we set top fraction to 40%, that is, we reduce the space of peptides to sample for 40% of the highest intense peptides.
      //          The reason for restricting the space a lot more for linear iRTs is to ensure that we sample the most intense peptides which
      //          are going to be more likely detected.
      lin_irt_exp = OpenSwathHelper::sampleExperiment(
        transition_exp,
        irt_bins_lin,
        irt_pep_lin,
        irt_seed,
        true,
        0.4,
        priority_peptides
      );
    }

    OpenSwath::LightTargetedExperiment nl_irt_exp;
    if (!nonlinear_irt_tr_file.empty())
    {
      // user‐supplied nonlinear iRT file
      FileTypes::Type irt_nl_tr_type = FileHandler::getType(nonlinear_irt_tr_file);
      Param irt_nl_tsv_reader_param = TransitionTSVFile().getDefaults();
      nl_irt_exp = loadTransitionList(irt_nl_tr_type, nonlinear_irt_tr_file, irt_nl_tsv_reader_param);
    }
    else if (auto_irt && irt_pep_nl > 0)
    {
      OPENMS_LOG_INFO << "NonLinear iRT Calibration: Sampling input transition experiment for " << irt_bins_nl << " bins across the RT with " << irt_pep_nl << " peptides per bin" << std::endl;
      // sampled transtion_exp on‐the‐fly for nonlinear (only if >0)
      // Note1: We sort the targetedExperiment peptides by the aggregated total intensity (i.e. sum of fragment library intensities per peptide),
      //         in order to reduce the sampling space to the top N fraction of highly intense peptides
      // Note2: For the additional nonlinear iRTs we set top fraction to 80%, that is, we reduce the space of peptides to sample for 80% of the highest intense peptides.
      //          The reason for being less restrictive of the sampling space for nonlinear iRTs, is because we can be more liberal with the quality of the
      //          nonlinear iRTs.
      nl_irt_exp = OpenSwathHelper::sampleExperiment(
        transition_exp,
        irt_bins_nl,
        irt_pep_nl,
        irt_seed,
        true,
        0.7,
        priority_peptides
      );
    }

    // 2) Launch either just linear or linear+nonlinear
    TransformationDescription trafo_rtnorm; double estimated_rt_extraction_window;
    double rt_estimation_padding_factor = getDoubleOption_("rt_estimation_padding_factor");
    if (nl_irt_exp.getTransitions().empty())
    {
      // --- single, linear calibration ---
      auto calibration_result = performCalibration(
        trafo_in,
        lin_irt_exp,
        swath_maps,
        min_rsq,
        min_coverage,
        feature_finder_param,
        cp_irt,
        irt_detection_param,
        calibration_param,
        debug_level,
        pasef,
        load_into_memory,
        irt_trafo_out,
        irt_mzml_out);
      // We need to set trafo_rtnorm to the calibration result
      trafo_rtnorm = calibration_result.rt_trafo;
      // Use the 0.99 quantile so the window covers ~99% of residuals, ignoring rare extremes (those that are potential outliers).
      estimated_rt_extraction_window = calibration_result.rt_trafo.estimateWindow(0.99, true, true, rt_estimation_padding_factor);
      // RT (seconds)
      apply_window("RT",
                   estimated_rt_extraction_window,
                   /*dst*/  cp.rt_extraction_window,
                   /*user*/ cp.rt_extraction_window,
                   /*applicable=*/true,
                   /*commit=*/use_est_window_choices.rt);

      // MS2 m/z (ppm)
      apply_window("MS2 m/z (ppm)",
                   calibration_result.ms2_mz_window_ppm,
                   cp.mz_extraction_window, cp.mz_extraction_window,
                   /*applicable=*/true,
                   /*commit=*/use_est_window_choices.mz && cp.ppm);
      if (use_est_window_choices.mz && !cp.ppm)
      {
        OPENMS_LOG_WARN
          << "[Auto-calibration] MS2 m/z window not applied: user selected Thomson (Th) as unit, "
          << "but the estimated window is " << calibration_result.ms2_mz_window_ppm << " ppm. "
          << "Keeping the user-set value " << cp.mz_extraction_window << " Th. "
          << std::endl;
      }

      // MS2 ion mobility (1/k0)
      apply_window("MS2 ion mobility (1/k0)",
                   calibration_result.ms2_im_window,
                   cp.im_extraction_window, cp.im_extraction_window,
                   /*applicable=*/pasef,
                   /*commit=*/use_est_window_choices.im);

      // MS1 m/z (ppm)
      apply_window("MS1 m/z (ppm)",
                   calibration_result.ms1_mz_window_ppm,
                   cp_ms1.mz_extraction_window, cp_ms1.mz_extraction_window,
                   /*applicable=*/true,
                   /*commit=*/use_est_window_choices.mz && cp_ms1.ppm);
      if (use_est_window_choices.mz && !cp_ms1.ppm)
      {
        OPENMS_LOG_WARN
          << "[Auto-calibration] MS1 m/z window not applied: user selected Thomson (Th) as unit, "
          << "but the estimated window is " << calibration_result.ms1_mz_window_ppm << " ppm. "
          << "Keeping the user-set value " << cp_ms1.mz_extraction_window << " Th. "
          << std::endl;
      }

      // MS1 ion mobility (1/k0)
      apply_window("MS1 ion mobility (1/k0)",
                   calibration_result.ms1_im_window,
                   cp_ms1.im_extraction_window, cp_ms1.im_extraction_window,
                   /*applicable=*/pasef && use_ms1_im,
                   /*commit=*/use_est_window_choices.im);
    }
    else
    {
      ///////////////////////////////////
      // First perform a simple linear transform, then do a second, nonlinear one
      ///////////////////////////////////
      OPENMS_LOG_INFO << "Performing iRT linear transform..." << std::endl;

      Param linear_irt = irt_detection_param;
      linear_irt.setValue("alignmentMethod", "linear");
      Param no_calibration = calibration_param;
      no_calibration.setValue("mz_correction_function", "none");
      auto calibration_result = performCalibration(trafo_in, lin_irt_exp, swath_maps,
                                        min_rsq, min_coverage, feature_finder_param,
                                        cp_irt, linear_irt, no_calibration,
                                        debug_level, pasef, load_into_memory,
                                        irt_trafo_out, irt_mzml_out);
      trafo_rtnorm = calibration_result.rt_trafo;

      cp_irt.rt_extraction_window = getDoubleOption_("irt_nonlinear_rt_extraction_window"); // extract some substantial part of the RT range (should be covered by linear correction)

      ///////////////////////////////////
      // Get the secondary transformation (nonlinear)
      ///////////////////////////////////
      OPENMS_LOG_INFO << "Performing additional iRT nonlinear transform..." << std::endl;

      OpenSwathCalibrationWorkflow wf;
      wf.setLogType(log_type_);
      std::vector<OpenMS::MSChromatogram> chroms;
      wf.simpleExtractChromatograms_(
        swath_maps,
        nl_irt_exp,
        chroms,
        trafo_rtnorm,
        cp_irt,
        pasef,
        load_into_memory);

      Param nl_param = irt_detection_param;
      nl_param.setValue("estimateBestPeptides", "true");

      TransformationDescription im_trafo;
      trafo_rtnorm = wf.doDataNormalization_(
        nl_irt_exp,
        chroms,
        im_trafo,
        swath_maps,
        min_rsq,
        min_coverage,
        feature_finder_param,
        nl_param,
        calibration_param,
        pasef);

      // apply IM‐correction back to the library
      if (!irt_trafo_out.empty())
      {
        String nonlinear_path = irt_trafo_out;

        const String ext = ".trafoXML";
        nonlinear_path = nonlinear_path.substr(0, nonlinear_path.size() - ext.size());
        nonlinear_path += "_nonlinear.trafoXML";

        FileHandler().storeTransformations(nonlinear_path, trafo_rtnorm, { FileTypes::TRANSFORMATIONXML });
      }

      // Use the 0.99 quantile so the window covers ~99% of residuals, ignoring rare extremes (those that are potential outliers).
      estimated_rt_extraction_window = trafo_rtnorm.estimateWindow(0.99, true, true, rt_estimation_padding_factor);

      TransformationDescription im_trafo_inv = im_trafo;
      im_trafo_inv.invert();
      for (auto & cmp : transition_exp.getCompounds())
      {
        cmp.drift_time = im_trafo_inv.apply(cmp.drift_time);
      }
      double estimated_mz_extraction_window = wf.getEstimatedMzWindow();
      double estimated_im_extraction_window = wf.getEstimatedImWindow();
      double estimated_ms1_mz_extraction_window = wf.getEstimatedMs1MzWindow();
      double estimated_ms1_im_extraction_window = wf.getEstimatedMs1ImWindow();

      // RT (seconds)
      apply_window("RT",
                   estimated_rt_extraction_window,
                   /*dst*/  cp.rt_extraction_window,
                   /*user*/ cp.rt_extraction_window,
                   /*applicable=*/true,
                   /*commit=*/use_est_window_choices.rt);

      // MS2 m/z (ppm)
      apply_window("MS2 m/z (ppm)",
                   estimated_mz_extraction_window,
                   cp.mz_extraction_window, cp.mz_extraction_window,
                   /*applicable=*/true,
                   /*commit=*/use_est_window_choices.mz && cp.ppm);
      if (use_est_window_choices.mz && !cp.ppm)
      {
        OPENMS_LOG_WARN
          << "[Auto-calibration] MS2 m/z window not applied: user selected Thomson (Th) as unit, "
          << "but the estimated window is " << calibration_result.ms2_mz_window_ppm << " ppm. "
          << "Keeping the user-set value " << cp.mz_extraction_window << " Th. "
          << std::endl;
      }

      // MS2 ion mobility (1/k0)
      apply_window("MS2 ion mobility (1/k0)",
                   estimated_im_extraction_window,
                   cp.im_extraction_window, cp.im_extraction_window,
                   /*applicable=*/pasef,
                   /*commit=*/use_est_window_choices.im);

      // MS1 m/z (ppm)
      apply_window("MS1 m/z (ppm)",
                   estimated_ms1_mz_extraction_window,
                   cp_ms1.mz_extraction_window, cp_ms1.mz_extraction_window,
                   /*applicable=*/true,
                   /*commit=*/use_est_window_choices.mz && cp_ms1.ppm);
      if (use_est_window_choices.mz && !cp_ms1.ppm)
      {
        OPENMS_LOG_WARN
          << "[Auto-calibration] MS1 m/z window not applied: user selected Thomson (Th) as unit, "
          << "but the estimated window is " << calibration_result.ms1_mz_window_ppm << " ppm. "
          << "Keeping the user-set value " << cp_ms1.mz_extraction_window << " Th. "
          << std::endl;
      }

      // MS1 ion mobility (1/k0)
      apply_window("MS1 ion mobility (1/k0)",
                   estimated_ms1_im_extraction_window,
                   cp_ms1.im_extraction_window, cp_ms1.im_extraction_window,
                   /*applicable=*/pasef && use_ms1_im,
                   /*commit=*/use_est_window_choices.im);
    }

    ///////////////////////////////////
    // Set up chromatogram output
    // Either use chrom.mzML or sqliteDB (sqMass)
    ///////////////////////////////////
    Interfaces::IMSDataConsumer* chromatogramConsumer;
    UInt64 run_id = OpenMS::UniqueIdGenerator::getUniqueId();
    prepareChromOutput(&chromatogramConsumer, exp_meta, transition_exp, out_chrom, run_id);

    ///////////////////////////////////
    // Set up peakgroup file output .osw file
    ///////////////////////////////////

    FeatureMap out_featureFile;
    // store features if not writing to .featureXML
    bool store_features = (out_features_type != FileTypes::FEATUREXML);
    String osw_out_filename = store_features ? out_features : "";
    OpenSwathOSWWriter oswwriter(osw_out_filename, run_id, file_list[0], enable_uis_scoring);

    OpenSwathWorkflow wf(use_ms1_traces, use_ms1_im, prm, pasef, outer_loop_threads);
    wf.setLogType(log_type_);
    wf.performExtraction(swath_maps, trafo_rtnorm, cp, cp_ms1, feature_finder_param, transition_exp,
        out_featureFile, true, oswwriter, chromatogramConsumer, batchSize, ms1_isotopes, load_into_memory);

    if ( out_features_type == FileTypes::FEATUREXML )
    {
      std::cout << "Writing features ..." << std::endl;
      addDataProcessing_(out_featureFile, getProcessingInfo_(DataProcessing::QUANTITATION));
      out_featureFile.ensureUniqueId();
      FileHandler().storeFeatures(out_features, out_featureFile, {FileTypes::FEATUREXML});
    }

    delete chromatogramConsumer;

    return EXECUTION_OK;
  }

};

int main(int argc, const char ** argv)
{
  TOPPOpenSwathWorkflow tool;
  return tool.main(argc, argv);
}

/// @endcond
