package de.unijena.bioinf.fingerid;

import de.unijena.bioinf.ChemistryBase.chem.MolecularFormula;
import de.unijena.bioinf.ChemistryBase.ms.ft.FTree;
import de.unijena.bioinf.ChemistryBase.ms.ft.Fragment;
import de.unijena.bioinf.ChemistryBase.ms.utils.SimpleSpectrum;
import de.unijena.bioinf.FragmentationTreeConstruction.computation.scoring.CommonLossEdgeScorer;
import gnu.trove.function.TIntFunction;
import gnu.trove.map.hash.TObjectIntHashMap;
import gnu.trove.procedure.TObjectIntProcedure;
import gnu.trove.procedure.TObjectProcedure;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;

/* loaded from: input_file:de/unijena/bioinf/fingerid/FragmentSet.class */
public class FragmentSet {
    private final HashMap<MolecularFormula, Double> weightMap;
    private final MolecularFormula[] fragments;
    static final String[] MATCHING_FORMULA_STRINGS = {"B", "BO", "C", "C10", "C10Cl", "C10ClO", "C10F", "C10F3", "C10FN", "C10FO", "C10N", "C10N2", "C10N5O4", "C10NO", "C10NS", "C10O", "C10O2", "C10O3", "C11", "C11Cl2", "C11F", "C11F3S", "C11N", "C11N2", "C11NS", "C11O", "C11O2", "C11O3", "C11OS", "C12", "C12ClNS", "C12F3S", "C12N", "C12N2", "C12NS", "C12NS2", "C12O", "C12O2", "C12O2S", "C12O3", "C12OS", "C13", "C13Cl2", "C13ClNS", "C13F3NS", "C13F3S", "C13N2", "C13NOS", "C13NS", "C13NS2", "C14", "C14ClNS", "C14F3NS", "C14F3S", "C14NOS", "C14NS", "C14NS2", "C15", "C15ClN", "C15ClNS", "C15F3NS", "C15F3S", "C15NOS", "C15NS", "C15NS2", "C16ClN", "C16F3NS", "C16NS", "C17ClN", "C17N2", "C18N2", "C18NO", "C2", "C2Cl", "C2Cl2", "C2Cl2F", "C2Cl2FS", "C2Cl2N3", "C2Cl2S", "C2Cl3", "C2Cl3N", "C2Cl3O", "C2Cl3O2", "C2Cl4", "C2ClN", "C2ClN2", "C2ClN3", "C2ClN4", "C2ClNO", "C2ClNO2S", "C2ClNS", "C2ClO", "C2ClO2", "C2ClO2P", "C2ClO2S", "C2ClO3P", "C2F", "C2F2", "C2F2O", "C2F3", "C2F3N", "C2F3NS", "C2F3O", "C2F3O2S", "C2F3OS", "C2F4O", "C2F5", "C2FN", "C2FNO", "C2FO", "C2FO2", "C2FO3", "C2FS", "C2I", "C2I2", "C2IN", "C2IO", "C2IO2", "C2N", "C2N2", "C2N2O", "C2N2O2", "C2N2O2P", "C2N2O2S", "C2N2O3", "C2N2O3S", "C2N2OP", "C2N2S", "C2N2S2", "C2N3", "C2N3O", "C2N3S", "C2N4", "C2N4S", "C2N5", "C2NO", "C2NO2", "C2NO2P", "C2NO2PS", "C2NO2S", "C2NO2S2", "C2NO3", "C2NO3P", "C2NO3S", "C2NOP", "C2NOPS", "C2NOS", "C2NOS2", "C2NS", "C2NS2", "C2O", "C2O2", "C2O2P", "C2O2PS2", "C2O2S", "C2O2S2", "C2O3", "C2O3P", "C2O3PS", "C2O3S", "C2O4", "C2O4P", "C2O4S", "C2O5", "C2O5P", "C2O5S", "C2OP", "C2OPS2", "C2OS", "C2P", "C2PS", "C2S", "C2S2", "C3", "C3B", "C3Cl", "C3Cl2", "C3Cl2N3", "C3Cl2O", "C3Cl3", "C3Cl3N", "C3Cl3O", "C3Cl3O2", "C3Cl4", "C3ClFNO", "C3ClN", "C3ClN2", "C3ClN3", "C3ClN4", "C3ClNO", "C3ClNS", "C3ClO", "C3ClO2", "C3ClO3P", "C3ClS2", "C3F", "C3F2", "C3F2O", "C3F3", "C3F3N", "C3F3NO2S", "C3F3NS", "C3F3O", "C3F3O2S", "C3F3O3S", "C3F5", "C3FN", "C3FNO", "C3FO", "C3FO2", "C3FO3", "C3FS", "C3I", "C3IO", "C3IO2", "C3N", "C3N2", "C3N2O", "C3N2O2", "C3N2O2P", "C3N2O2S", "C3N2O2S2", "C3N2O3", "C3N2OS", "C3N2S", "C3N2S2", "C3N3", "C3N3O", "C3N3S", "C3N4", "C3NO", "C3NO2", "C3NO2S", "C3NO2S2", "C3NO3", "C3NO3P", "C3NO3S", "C3NO3S2", "C3NO4", "C3NOP", "C3NOS", "C3NOS2", "C3NS", "C3NS2", "C3O", "C3O2", "C3O2S", "C3O2S2", "C3O3", "C3O3P", "C3O3PS", "C3O3S", "C3O4", "C3O4P", "C3O4S", "C3O5", "C3O5P", "C3O5S", "C3O6P", "C3OP", "C3OS", "C3P", "C3S", "C3S2", "C4", "C4Cl", "C4Cl2", "C4Cl2N", "C4Cl2N2", "C4Cl2NO", "C4Cl2O", "C4Cl3", "C4Cl3O", "C4ClFN", "C4ClFNO", "C4ClN", "C4ClN2", "C4ClN2S", "C4ClN3", "C4ClNO", "C4ClO", "C4ClO2", "C4ClO3", "C4ClS", "C4ClS2", "C4F", "C4F2", "C4F2O", "C4F3", "C4F3N3O", "C4F3O", "C4F3O2S", "C4F4O", "C4F5", "C4FN", "C4FO", "C4FO3", "C4I", "C4IN", "C4IO", "C4N", "C4N2", "C4N2O", "C4N2O2", "C4N2O2S", "C4N2O2S2", "C4N2O3", "C4N2OS", "C4N2S", "C4N2S2", "C4N3", "C4N3O", "C4N3S", "C4N4", "C4N4S", "C4N5", "C4NO", "C4NO2", "C4NO2S", "C4NO2S2", "C4NO3", "C4NO3P", "C4NO3S", "C4NO3S2", "C4NO4P", "C4NOP", "C4NOS", "C4NS", "C4NS2", "C4O", "C4O2", "C4O2P", "C4O2S", "C4O2S2", "C4O3", "C4O3PS", "C4O3S", "C4O4", "C4O4P", "C4O4S", "C4O5", "C4OS", "C4OS2", "C4P", "C4S", "C4S2", "C5", "C5Cl", "C5Cl2", "C5Cl2N", "C5Cl2NO", "C5Cl2O", "C5Cl2O2", "C5Cl2OS", "C5Cl3", "C5Cl3O", "C5ClFN", "C5ClFNO", "C5ClN", "C5ClN2", "C5ClN2S", "C5ClN3", "C5ClN4", "C5ClNO", "C5ClO", "C5ClO2", "C5ClO3", "C5ClOS", "C5ClS", "C5F", "C5F2", "C5F3", "C5F3N", "C5F3O", "C5F3O2S", "C5F5", "C5FN", "C5FN2S", "C5FO", "C5FO3", "C5I", "C5IN", "C5INO", "C5IO", "C5N", "C5N2", "C5N2O", "C5N2O2", "C5N2O2S", "C5N2O2S2", "C5N2OS", "C5N2S", "C5N3", "C5N3O", "C5N3S", "C5N4", "C5N4S", "C5N5", "C5NO", "C5NO2", "C5NO2S", "C5NO2S2", "C5NO3", "C5NO3S2", "C5NOS", "C5NOS2", "C5NS", "C5NS2", "C5O", "C5O2", "C5O2P", "C5O2S", "C5O2S2", "C5O3", "C5O3S2", "C5O4", "C5O4P", "C5O5", "C5OS", "C5OS2", "C5S", "C5S2", "C6", "C6Cl", "C6Cl2", "C6Cl2N", "C6Cl2N2", "C6Cl2O", "C6Cl3", "C6Cl3O", "C6ClFNO", "C6ClN", "C6ClN2", "C6ClNO", "C6ClNO2S", "C6ClO", "C6ClO2", "C6ClO2S", "C6ClO3", "C6ClO3S", "C6ClOS", "C6ClS", "C6F", "C6F2", "C6F2N", "C6F3", "C6F3N", "C6F3N2", "C6F3O", "C6FN", "C6FN2S", "C6FNS", "C6FO", "C6FO3", "C6FS", "C6I", "C6IN", "C6INO", "C6IO", "C6N", "C6N2", "C6N2O", "C6N2O2", "C6N2O2S", "C6N2OS", "C6N2S2", "C6N3", "C6N3O2S", "C6N3OS", "C6N3S", "C6N4", "C6N4S", "C6N5", "C6NO", "C6NO2", "C6NO2S", "C6NO3", "C6NO3S", "C6NO4", "C6NOS", "C6NS", "C6NS2", "C6O", "C6O2", "C6O2S", "C6O2S2", "C6O3", "C6O3S", "C6O4", "C6O5", "C6O6", "C6OS", "C6OS2", "C6S", "C6S2", "C7", "C7Cl", "C7Cl2", "C7Cl2N", "C7Cl2NO", "C7Cl2O", "C7Cl3", "C7Cl3O", "C7ClN", "C7ClN2", "C7ClN2O", "C7ClN4O2", "C7ClNO", "C7ClNS", "C7ClO", "C7ClO2", "C7ClO3S", "C7ClS", "C7F", "C7F2", "C7F2N", "C7F2O", "C7F3", "C7F3N", "C7F3O", "C7FN", "C7FN2S", "C7FNO", "C7FNS", "C7FO", "C7I", "C7IN", "C7INO", "C7IO", "C7N", "C7N2", "C7N2O", "C7N2O2S", "C7N2O3S", "C7N2OS", "C7N2S", "C7N3", "C7N3S", "C7N4S", "C7NO", "C7NO2", "C7NO2S", "C7NO3", "C7NO3S", "C7NO4S", "C7NS", "C7NS2", "C7O", "C7O2", "C7O2S", "C7O2S2", "C7O3", "C7O3PS", "C7O3S", "C7O4", "C7O4S", "C7O5", "C7O6", "C7OS", "C7S", "C7S2", "C8", "C8Cl", "C8Cl2", "C8Cl2N", "C8Cl2N2", "C8Cl2NO", "C8Cl2O", "C8Cl3", "C8Cl3O", "C8Cl4", "C8ClFN", "C8ClN", "C8ClN2", "C8ClNO", "C8ClNO2S", "C8ClNS", "C8ClO", "C8ClO2", "C8ClS", "C8F", "C8F2", "C8F2N", "C8F2O", "C8F3", "C8F3N", "C8F3N2", "C8F3O", "C8F3O2S", "C8FN", "C8FNO", "C8FO", "C8FS", "C8IN", "C8IO", "C8N", "C8N2", "C8N2O", "C8N2O2S", "C8N2OS", "C8N2S", "C8N2S2", "C8NO", "C8NO2", "C8NO2S", "C8NO3", "C8NO3S", "C8NOS", "C8NS", "C8NS2", "C8O", "C8O2", "C8O2S", "C8O3", "C8O3PS", "C8O3S", "C8O4", "C8O5", "C8O6", "C8OS", "C8S", "C8S2", "C9", "C9Cl", "C9Cl2", "C9ClINO", "C9ClN", "C9ClNO", "C9ClO", "C9ClO2", "C9ClS", "C9F", "C9F2", "C9F3", "C9F3N", "C9FN", "C9FN2S", "C9FNO", "C9FNS", "C9I", "C9IO", "C9N", "C9N2", "C9N2OS", "C9NO", "C9NO2", "C9NO2S", "C9O", "C9O2", "C9O3", "C9O4", "C9S", "CBO", "CCl", "CCl2", "CCl2F", "CCl2FS", "CCl3", "CCl3O", "CCl4", "CClF3S", "CClN", "CClN2", "CClN4", "CClNO", "CClNO2P", "CClNS", "CClO", "CClO2", "CClO2P", "CClO2S", "CF", "CF2", "CF2O", "CF3", "CF3N", "CF3O", "CF3O2S", "CF3O3S", "CF3OS", "CF3S", "CF5", "CFN", "CFNO", "CFO", "CFO2", "CFO2S", "CFS", "CI", "CI2", "CIN", "CIO", "CIO2", "CN", "CN2", "CN2O", "CN2O2", "CN2O2P", "CN2O2S", "CN2O3S", "CN2OS", "CN2S", "CN3", "CN3O", "CN3S", "CN4", "CNO", "CNO2", "CNO2P", "CNO2PS", "CNO2S", "CNO2S2", "CNO3", "CNO3P", "CNO3S", "CNO3S2", "CNOP", "CNOS", "CNP", "CNS", "CO", "CO2", "CO2P", "CO2S", "CO2S2", "CO3", "CO3P", "CO3PS", "CO3S", "CO4", "CO4P", "CO4S", "CO5S", "COP", "COS", "CP", "CS", "CS2", "Cl", "Cl2", "Cl2S", "Cl3", "ClF", "ClN", "ClNO", "ClO", "ClO2", "ClO2P", "ClO2S", "ClO3P", "F", "F2", "F2O", "F3", "F3N", "F3O", "FN", "FO", "FO2", "FO2S", "FP", "FS", "I", "I2", "IN", "IO", "N", "N2", "N2O", "N2O2", "N2O2S", "N2OP", "N2S", "N3", "N3O", "N4S", "NO", "NO2", "NO2PS", "NO2S", "NO2S2", "NO3", "NO3P", "NO3S", "NO3S2", "NO4S", "NOP", "NOS", "NP", "NS", "O", "O2", "O2P", "O2S", "O3", "O3P", "O3PS", "O3S", "O4", "O4P", "O4S", "O5P", "O5S", "OP", "OP2", "OS", "P", "PS", "S", "S2"};
    static final MolecularFormula[] MATCHING_FORMULAS = new MolecularFormula[MATCHING_FORMULA_STRINGS.length];

    public FragmentSet(FTree[] fTreeArr, SimpleSpectrum[] simpleSpectrumArr, double[] dArr) {
        this.weightMap = findFragmentsInTrees(fTreeArr);
        this.fragments = (MolecularFormula[]) this.weightMap.keySet().toArray(new MolecularFormula[this.weightMap.size()]);
    }

    public MolecularFormula[] getFragments() {
        return this.fragments;
    }

    public HashMap<MolecularFormula, Double> getWeightMap() {
        return this.weightMap;
    }

    public static MolecularFormula[] findFragmentsInTreesOld(FTree[] fTreeArr) {
        final TObjectIntHashMap tObjectIntHashMap = new TObjectIntHashMap();
        final TObjectIntHashMap tObjectIntHashMap2 = new TObjectIntHashMap();
        for (FTree fTree : fTreeArr) {
            Iterator it = fTree.getFragments().iterator();
            while (it.hasNext()) {
                tObjectIntHashMap.adjustOrPutValue(((Fragment) it.next()).getFormula(), 1, 1);
            }
        }
        tObjectIntHashMap.retainEntries(new TObjectIntProcedure<MolecularFormula>() { // from class: de.unijena.bioinf.fingerid.FragmentSet.1
            public boolean execute(MolecularFormula molecularFormula, int i) {
                return i >= 5;
            }
        });
        for (String str : CommonLossEdgeScorer.literature_list) {
            tObjectIntHashMap.put(MolecularFormula.parse(str), 1);
        }
        for (MolecularFormula molecularFormula : getMatchingFormulas()) {
            tObjectIntHashMap.put(molecularFormula, 1);
        }
        tObjectIntHashMap.transformValues(new TIntFunction() { // from class: de.unijena.bioinf.fingerid.FragmentSet.2
            public int execute(int i) {
                return 1;
            }
        });
        for (final FTree fTree2 : fTreeArr) {
            final MolecularFormula formula = fTree2.getRoot().getFormula();
            tObjectIntHashMap.forEachKey(new TObjectProcedure<MolecularFormula>() { // from class: de.unijena.bioinf.fingerid.FragmentSet.3
                public boolean execute(MolecularFormula molecularFormula2) {
                    if (!formula.isSubtractable(molecularFormula2)) {
                        return true;
                    }
                    tObjectIntHashMap2.adjustOrPutValue(molecularFormula2, 1, 1);
                    Iterator it2 = fTree2.getFragments().iterator();
                    while (it2.hasNext()) {
                        if (((Fragment) it2.next()).getFormula().isSubtractable(molecularFormula2)) {
                            tObjectIntHashMap.adjustOrPutValue(molecularFormula2, 1, 1);
                        }
                    }
                    return true;
                }
            });
        }
        tObjectIntHashMap.retainEntries(new TObjectIntProcedure<MolecularFormula>() { // from class: de.unijena.bioinf.fingerid.FragmentSet.4
            public boolean execute(MolecularFormula molecularFormula2, int i) {
                int i2 = tObjectIntHashMap2.get(molecularFormula2);
                return (i2 >= 20 || (!molecularFormula2.isCHNO() && i2 >= 5)) && ((float) i) / ((float) i2) <= 10.0f;
            }
        });
        MolecularFormula[] molecularFormulaArr = (MolecularFormula[]) tObjectIntHashMap.keys(new MolecularFormula[tObjectIntHashMap.size()]);
        Arrays.sort(molecularFormulaArr);
        ArrayList arrayList = new ArrayList();
        for (MolecularFormula molecularFormula2 : molecularFormulaArr) {
            boolean z = true;
            Iterator it2 = arrayList.iterator();
            while (true) {
                if (!it2.hasNext()) {
                    break;
                }
                MolecularFormula subtract = molecularFormula2.subtract((MolecularFormula) it2.next());
                if (subtract.isAllPositiveOrZero() && subtract.atomCount() < 4 && subtract.atomCount() == subtract.numberOfCarbons() + subtract.numberOfHydrogens()) {
                    z = false;
                    break;
                }
            }
            if (z) {
                arrayList.add(molecularFormula2);
            }
        }
        return (MolecularFormula[]) arrayList.toArray(new MolecularFormula[arrayList.size()]);
    }

    public static HashMap<MolecularFormula, Double> findFragmentsInTrees(FTree[] fTreeArr) {
        final TObjectIntHashMap tObjectIntHashMap = new TObjectIntHashMap();
        final TObjectIntHashMap tObjectIntHashMap2 = new TObjectIntHashMap();
        for (FTree fTree : fTreeArr) {
            Iterator it = fTree.getFragments().iterator();
            while (it.hasNext()) {
                tObjectIntHashMap.adjustOrPutValue(((Fragment) it.next()).getFormula(), 1, 1);
            }
        }
        tObjectIntHashMap.retainEntries(new TObjectIntProcedure<MolecularFormula>() { // from class: de.unijena.bioinf.fingerid.FragmentSet.5
            public boolean execute(MolecularFormula molecularFormula, int i) {
                return i >= 5;
            }
        });
        final HashSet hashSet = new HashSet();
        hashSet.add(MolecularFormula.parse("C"));
        hashSet.add(MolecularFormula.parse("O"));
        hashSet.add(MolecularFormula.parse("N"));
        hashSet.add(MolecularFormula.parse("CO2"));
        hashSet.add(MolecularFormula.parse("CN"));
        hashSet.add(MolecularFormula.parse("S"));
        hashSet.add(MolecularFormula.parse("P"));
        Iterator it2 = hashSet.iterator();
        while (it2.hasNext()) {
            tObjectIntHashMap.put((MolecularFormula) it2.next(), 1);
        }
        for (String str : CommonLossEdgeScorer.literature_list) {
            MolecularFormula parse = MolecularFormula.parse(str);
            tObjectIntHashMap.put(parse, 1);
            hashSet.add(parse);
        }
        for (MolecularFormula molecularFormula : getMatchingFormulas()) {
            tObjectIntHashMap.put(molecularFormula, 1);
            hashSet.add(molecularFormula);
        }
        tObjectIntHashMap.transformValues(new TIntFunction() { // from class: de.unijena.bioinf.fingerid.FragmentSet.6
            public int execute(int i) {
                return 1;
            }
        });
        final TObjectIntHashMap tObjectIntHashMap3 = new TObjectIntHashMap(tObjectIntHashMap.size());
        for (final FTree fTree2 : fTreeArr) {
            final MolecularFormula formula = fTree2.getRoot().getFormula();
            tObjectIntHashMap.forEachKey(new TObjectProcedure<MolecularFormula>() { // from class: de.unijena.bioinf.fingerid.FragmentSet.7
                public boolean execute(MolecularFormula molecularFormula2) {
                    if (!formula.isSubtractable(molecularFormula2)) {
                        return true;
                    }
                    tObjectIntHashMap3.adjustOrPutValue(molecularFormula2, fTree2.numberOfVertices(), fTree2.numberOfVertices());
                    tObjectIntHashMap2.adjustOrPutValue(molecularFormula2, 1, 1);
                    Iterator it3 = fTree2.getFragments().iterator();
                    while (it3.hasNext()) {
                        if (((Fragment) it3.next()).getFormula().isSubtractable(molecularFormula2)) {
                            tObjectIntHashMap.adjustOrPutValue(molecularFormula2, 1, 1);
                        }
                    }
                    return true;
                }
            });
        }
        final int min = Math.min((int) (fTreeArr.length * 0.01d), 50);
        tObjectIntHashMap.retainEntries(new TObjectIntProcedure<MolecularFormula>() { // from class: de.unijena.bioinf.fingerid.FragmentSet.8
            public boolean execute(MolecularFormula molecularFormula2, int i) {
                if (hashSet.contains(molecularFormula2)) {
                    return true;
                }
                int i2 = tObjectIntHashMap2.get(molecularFormula2);
                return (i2 >= min || (!molecularFormula2.isCHNO() && i2 >= 10)) && ((float) i) / ((float) i2) <= 10.0f;
            }
        });
        MolecularFormula[] molecularFormulaArr = (MolecularFormula[]) tObjectIntHashMap.keys(new MolecularFormula[tObjectIntHashMap.size()]);
        Arrays.sort(molecularFormulaArr);
        ArrayList arrayList = new ArrayList();
        for (MolecularFormula molecularFormula2 : molecularFormulaArr) {
            boolean z = true;
            Iterator it3 = arrayList.iterator();
            while (true) {
                if (!it3.hasNext()) {
                    break;
                }
                MolecularFormula subtract = molecularFormula2.subtract((MolecularFormula) it3.next());
                if (subtract.isAllPositiveOrZero() && subtract.atomCount() < 4 && subtract.atomCount() == subtract.numberOfCarbons() + subtract.numberOfHydrogens()) {
                    z = false;
                    break;
                }
            }
            if (z) {
                arrayList.add(molecularFormula2);
            }
        }
        HashMap<MolecularFormula, Double> hashMap = new HashMap<>(arrayList.size());
        Iterator it4 = arrayList.iterator();
        while (it4.hasNext()) {
            hashMap.put((MolecularFormula) it4.next(), Double.valueOf(tObjectIntHashMap.get(r0) / tObjectIntHashMap3.get(r0)));
        }
        return hashMap;
    }

    public static MolecularFormula[] getMatchingFormulas() {
        if (MATCHING_FORMULAS[0] == null) {
            for (int i = 0; i < MATCHING_FORMULAS.length; i++) {
                MATCHING_FORMULAS[i] = MolecularFormula.parse(MATCHING_FORMULA_STRINGS[i]);
            }
        }
        return MATCHING_FORMULAS;
    }
}
