// Copyright (c) 2002-present, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin
// SPDX-License-Identifier: BSD-3-Clause
//
// --------------------------------------------------------------------------
// $Maintainer: Timo Sachsenberg $
// $Authors: Timo Sachsenberg $
// --------------------------------------------------------------------------

#pragma once

#include <OpenMS/CONCEPT/Exception.h>
#include <OpenMS/CONCEPT/LogStream.h>
#include <OpenMS/DATASTRUCTURES/String.h>
#include <OpenMS/DATASTRUCTURES/StringListUtils.h>
#include <OpenMS/SYSTEM/File.h>
#include <OpenMS/KERNEL/MSExperiment.h>
#include <OpenMS/FEATUREFINDER/MassTraceDetection.h>
#include <OpenMS/PROCESSING/FILTERING/ThresholdMower.h>
#include <OpenMS/PROCESSING/CALIBRATION/InternalCalibration.h>
#include <OpenMS/PROCESSING/CALIBRATION/MZTrafoModel.h>
#include <OpenMS/MATH/StatisticFunctions.h>
#include <OpenMS/FEATUREFINDER/FeatureFinderMultiplexAlgorithm.h>

#include <map>
#include <vector>

namespace OpenMS
{
    /**
        @brief Common functions for DDA workflows
    
        @ingroup Analysis_ID
    */
    class OPENMS_DLLAPI DDAWorkflowCommons
    {
        public:
        /* @brief create Map between mzML file and corresponding id file
         * Checks implemented:
         *      - Check if the number of spectra and id files match.
         *      - If spectra and id files share common base names (without extension)
         *        but appear in different order, throw an error.
         *
         * @param[in] in <StringList> List of mzML file paths.
         * @param[in] in_ids <StringList> List of identification file paths.
         * @return <std::map<String, String>> A map where keys are mzML file paths and values are corresponding id file paths.
         * @throws Exception::InvalidParameter if the number of mzML files and identification files don't match
         * @throws Exception::MissingInformation if files share common base names but appear in different order
         * @note This mapping is crucial for linking raw data with identification results in DDA workflows.
         */
        static std::map<String, String> mapMzML2Ids(StringList & in, StringList & in_ids);

        /**
         * @brief Small helper to get the mapping from id files to mzML files
         *
         * Basically just reverses the mapMzML2Ids function. 
         * Potential improvement: Could be combined into a single function exposed to the user.
         *
         * @param[in] m2i <const std::map<String, String>&> The mzML to id file mapping generated by mapMzML2Ids.
         * @return <std::map<String, String>> A map where keys are id file paths and values are corresponding mzML file paths.
        */
        static std::map<String, String> mapId2MzMLs(const std::map<String, String>& m2i);

        /**
         * Estimates the median chromatographic full width at half maximum (FWHM) for a given MSExperiment.
         *
         * @param[in] ms_centroided The centroided MSExperiment for which to estimate the FWHM.
         * @return The estimated median chromatographic FWHM in retention time units.
         * @note FWHM is a measure of peak width and is crucial for chromatographic peak detection and feature finding.
         *       The estimation is based on the top 1000 intensity mass traces to focus on prominent chromatographic peaks.
        */
        static double estimateMedianChromatographicFWHM(MSExperiment & ms_centroided);          
            
        /**
         * @brief Recalibrates the masses of the MSExperiment using peptide identifications.
         *
         * This function recalibrates the masses of the MSExperiment by applying a mass recalibration
         * based on the theoretical masses from identification data.
         *
         * @param[in,out] ms_centroided <MSExperiment&> The MSExperiment object containing the centroided spectra, which will be recalibrated in place.
         * @param[in] peptide_ids <PeptideIdentificationList&> The vector of PeptideIdentification objects containing the peptide identifications.
         * @param id_file_abs_path The absolute path of the identification file.
         *
         * @note Mass recalibration is essential to improve mass accuracy, which is critical for correct peptide identification and quantification.
         */
        static void recalibrateMS1(MSExperiment & ms_centroided,
            PeptideIdentificationList& peptide_ids,
            const String & id_file_abs_path = ""
        );

        /**
        * @brief Extracts seeding features from centroided MS data (e.g., for untarged extraction).
        *
        * MS1 spectra are subjected to a threshold filter to removelow-intensity peaks, 
        * and then uses the FeatureFinderMultiplex algorithm to identify potential seeding features.
        * The function also takes into account the median full width at half maximum (FWHM) of the peaks 
        * to adjust the FeatureFinderMultiplex parameters for better seed detection.
        *
        * @param[in] ms_centroided <const MSExperiment&> The centroided MSExperiment object. Only MS1 level
        *                          spectra are considered for seed feature calculation.
        * @param[in] intensity_threshold Intensity threshold below which peaks are discarded.
        * @param[out] seeds The FeatureMap object where the identified seeding features will be stored.
        * @param[in] median_fwhm The median FWHM of the peaks, used to adjust the FeatureFinderMultiplex parameters for
        *                        seed detection.
        * @param[in] charge_min Minimum charge state to consider for feature seeds (default: 2).
        * @param[in] charge_max Maximum charge state to consider for feature seeds (default: 5).
        *
        * @note The function employs a ThresholdMower filter with hardcoded parameters (m/z tolerance: 20 ppm, intensity cutoff: intensity_threshold, below cutoff: remove)
        *       and the FeatureFinderMultiplex algorithm with parameters optimized for seed feature detection in DDA workflows.
        *       These parameters may be refined or exposed as function arguments in future implementations for more flexibility.
        */
        static void calculateSeeds(
            const MSExperiment & ms_centroided, 
            const double intensity_threshold,
            FeatureMap & seeds, 
            double median_fwhm,
            Size charge_min = 2,
            Size charge_max = 5
        );

    };

}
