package uk.ac.ebi.pride.jaxb.xml.extractor;

import java.io.File;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import psidev.psi.tools.xxindex.index.IndexElement;
import uk.ac.ebi.pride.jaxb.xml.PrideXmlXpath;
import uk.ac.ebi.pride.jaxb.xml.xxindex.XmlIndexer;
import uk.ac.ebi.pride.jaxb.xml.xxindex.XmlIndexerFactory;

/* loaded from: input_file:uk/ac/ebi/pride/jaxb/xml/extractor/PrideXmlExtractor.class */
public class PrideXmlExtractor {
    private static final Logger logger = LoggerFactory.getLogger(PrideXmlExtractor.class.getName());
    private static final Pattern ID_PATTERN = Pattern.compile("\\sid\\s*=\\s*['\"]([^'\"]*)['\"]", 2);
    private static final Pattern VERSION_PATTERN = Pattern.compile("\\sversion\\s*=\\s*['\"]([^'\"]*)['\"]", 2);
    private static final Pattern TAG_PATTERN = Pattern.compile("<[^>]+>\\s*([^<>]+)\\s*</[^>]+>", 2);
    private static final Pattern MS_LEVEL_PATTERN = Pattern.compile("\\smslevel\\s*=\\s*['\"]([^'\"]*)['\"]", 2);
    private static final int XML_CHAR_INCREMENT = 300;
    private XmlIndexer indexer;
    private File prideXmlFile;
    private Map<String, IndexElement> spectrumIdMap = null;
    private Map<String, IndexElement> gelFreeAccMap = null;
    private Map<String, IndexElement> twoDimAccMap = null;
    private Map<String, List<IndexElement>> identToPeptideMap = null;
    private List<String> identifiedSpectrumList = null;
    private int identificationId = 0;

    public PrideXmlExtractor(File file) {
        this.indexer = null;
        this.prideXmlFile = null;
        if (file == null) {
            throw new IllegalArgumentException("Xml file to be indexed must not be null");
        }
        if (!file.exists()) {
            throw new IllegalArgumentException("Xml file to be indexed does not exist: " + file.getAbsolutePath());
        }
        this.prideXmlFile = file;
        this.indexer = XmlIndexerFactory.getInstance().buildIndex(this.prideXmlFile, PrideXmlXpath.getXpaths());
        initializeCaches();
    }

    public File getSourceFile() {
        return this.prideXmlFile;
    }

    private void initializeCaches() {
        this.spectrumIdMap = initCacheMap(PrideXmlXpath.MZDATA_SPECTRUM.getXpath(), ID_PATTERN);
        this.gelFreeAccMap = initIdentificationCacheMap(PrideXmlXpath.GELFREE.getXpath());
        this.twoDimAccMap = initIdentificationCacheMap(PrideXmlXpath.TWOD.getXpath());
        this.identToPeptideMap = initPeptideCacheMap();
        this.identifiedSpectrumList = initIdentifiedSpectrumList();
    }

    private Map<String, IndexElement> initCacheMap(String str, Pattern pattern) {
        LinkedHashMap linkedHashMap = new LinkedHashMap();
        for (IndexElement indexElement : this.indexer.getIndexElements(str)) {
            String iDByPattern = getIDByPattern(indexElement, pattern, true);
            if (linkedHashMap.containsKey(iDByPattern)) {
                logger.error("Ambiguous ID Exception: " + str + " \nID: " + iDByPattern);
            } else {
                linkedHashMap.put(iDByPattern, indexElement);
            }
        }
        return linkedHashMap;
    }

    private Map<String, IndexElement> initIdentificationCacheMap(String str) {
        LinkedHashMap linkedHashMap = new LinkedHashMap();
        for (IndexElement indexElement : this.indexer.getIndexElements(str)) {
            if (linkedHashMap.containsKey(this.identificationId + "")) {
                logger.error("Ambiguous ID Exception: " + str + " \nID: " + this.identificationId);
            } else {
                linkedHashMap.put(this.identificationId + "", indexElement);
            }
            this.identificationId++;
        }
        return linkedHashMap;
    }

    private Map<String, List<IndexElement>> initPeptideCacheMap() {
        LinkedHashMap linkedHashMap = new LinkedHashMap();
        for (IndexElement indexElement : this.indexer.getIndexElements(PrideXmlXpath.GELFREE_PEPTIDE.getXpath())) {
            addIdentPeptide(searchForId(indexElement, this.gelFreeAccMap), indexElement, linkedHashMap);
        }
        for (IndexElement indexElement2 : this.indexer.getIndexElements(PrideXmlXpath.TWOD_PEPTIDE.getXpath())) {
            addIdentPeptide(searchForId(indexElement2, this.twoDimAccMap), indexElement2, linkedHashMap);
        }
        return linkedHashMap;
    }

    private List<String> initIdentifiedSpectrumList() {
        ArrayList arrayList = new ArrayList();
        Iterator<IndexElement> it2 = this.indexer.getIndexElements(PrideXmlXpath.GELFREE_PEPTIDE_SPEC_REF.getXpath()).iterator();
        while (it2.hasNext()) {
            arrayList.add(getIDByPattern(it2.next(), TAG_PATTERN, false));
        }
        Iterator<IndexElement> it3 = this.indexer.getIndexElements(PrideXmlXpath.TWOD_PEPTIDE_SPEC_REF.getXpath()).iterator();
        while (it3.hasNext()) {
            arrayList.add(getIDByPattern(it3.next(), TAG_PATTERN, false));
        }
        return arrayList;
    }

    private String searchForId(IndexElement indexElement, Map<String, IndexElement> map) {
        String str = null;
        Iterator<Map.Entry<String, IndexElement>> it2 = map.entrySet().iterator();
        while (true) {
            if (!it2.hasNext()) {
                break;
            }
            Map.Entry<String, IndexElement> next = it2.next();
            IndexElement value = next.getValue();
            if (value.getStart() <= indexElement.getStart() && value.getStop() >= indexElement.getStop()) {
                str = next.getKey();
                break;
            }
        }
        return str;
    }

    private void addIdentPeptide(String str, IndexElement indexElement, Map<String, List<IndexElement>> map) {
        List<IndexElement> list = map.get(str);
        if (list == null) {
            list = new ArrayList();
            map.put(str, list);
        }
        list.add(indexElement);
    }

    public String getExpCollectionVersionString() {
        String str = null;
        Iterator<IndexElement> it2 = this.indexer.getIndexElements(PrideXmlXpath.EXP_COLLECTION.getXpath()).iterator();
        if (it2.hasNext()) {
            str = getIDByPattern(it2.next(), VERSION_PATTERN, true);
        }
        return str;
    }

    public String getExpAccXmlString() {
        return getFirstXmlString(PrideXmlXpath.EXP_ACC.getXpath());
    }

    public String getExpTitleXmlString() {
        return getFirstXmlString(PrideXmlXpath.EXP_TITLE.getXpath());
    }

    public List<String> getReferenceXmlStrings() {
        return this.indexer.getXmlStringList(PrideXmlXpath.EXP_REF.getXpath());
    }

    public String getExpShortLabelXmlString() {
        return getFirstXmlString(PrideXmlXpath.EXP_SHORTLABEL.getXpath());
    }

    public String getProtocolXmlString() {
        return getFirstXmlString(PrideXmlXpath.EXP_PROTOCOL.getXpath());
    }

    public String getAdditionalParamXmlString() {
        return getFirstXmlString(PrideXmlXpath.EXP_ADDITIONAL.getXpath());
    }

    public List<String> getCvLookupXmlStrings() {
        return this.indexer.getXmlStringList(PrideXmlXpath.MZDATA_CVLOOKUP.getXpath());
    }

    public String getDescriptionXmlString() {
        return getFirstXmlString(PrideXmlXpath.MZDATA_DESC.getXpath());
    }

    public String getAdminXmlString() {
        return getFirstXmlString(PrideXmlXpath.MZDATA_DESC_AMDIN.getXpath());
    }

    public String getInstrumentXmlString() {
        return getFirstXmlString(PrideXmlXpath.MZDATA_DESC_INSTRUMENT.getXpath());
    }

    public String getDataProcessingXmlString() {
        return getFirstXmlString(PrideXmlXpath.MZDATA_DESC_DATAPROCESSING.getXpath());
    }

    public List<String> getSpectrumIds() {
        return new ArrayList(this.spectrumIdMap.keySet());
    }

    public String getSpectrumXmlString(String str) {
        String str2 = null;
        if (this.spectrumIdMap != null && this.spectrumIdMap.containsKey(str)) {
            str2 = this.indexer.getXmlByIndexElement(this.spectrumIdMap.get(str));
        }
        return str2;
    }

    public Map<String, IndexElement> getSpectrumIndices() {
        return new HashMap(this.spectrumIdMap);
    }

    public int getSpectrumMsLevel(String str) {
        int i = -1;
        if (this.spectrumIdMap != null && this.spectrumIdMap.containsKey(str)) {
            i = Integer.parseInt(getIDByPattern(this.spectrumIdMap.get(str), MS_LEVEL_PATTERN, false));
        }
        return i;
    }

    public boolean isIdentifiedSpectrum(String str) {
        return this.identifiedSpectrumList.contains(str);
    }

    public List<String> getIdentIds() {
        ArrayList arrayList = new ArrayList(this.gelFreeAccMap.keySet());
        arrayList.addAll(this.twoDimAccMap.keySet());
        return arrayList;
    }

    public String getIdentXmlString(String str) {
        String gelFreeIdentXmlString = getGelFreeIdentXmlString(str);
        if (gelFreeIdentXmlString == null) {
            gelFreeIdentXmlString = getTwoDimIdentXmlString(str);
        }
        return gelFreeIdentXmlString;
    }

    public boolean hasGelFreeIdentId(String str) {
        return this.gelFreeAccMap.containsKey(str);
    }

    public List<String> getGelFreeIdentIds() {
        return new ArrayList(this.gelFreeAccMap.keySet());
    }

    public String getGelFreeIdentXmlString(String str) {
        String str2 = null;
        if (this.gelFreeAccMap != null && this.gelFreeAccMap.containsKey(str)) {
            str2 = this.indexer.getXmlByIndexElement(this.gelFreeAccMap.get(str));
        }
        return str2;
    }

    public boolean hasTwoDimIdentId(String str) {
        return this.twoDimAccMap.containsKey(str);
    }

    public List<String> getTwoDimIdentIds() {
        return new ArrayList(this.twoDimAccMap.keySet());
    }

    public String getTwoDimIdentXmlString(String str) {
        String str2 = null;
        if (this.twoDimAccMap != null && this.twoDimAccMap.containsKey(str)) {
            str2 = this.indexer.getXmlByIndexElement(this.twoDimAccMap.get(str));
        }
        return str2;
    }

    public int getNumberOfPeptides() {
        Collection<IndexElement> indexElements = this.indexer.getIndexElements(PrideXmlXpath.GELFREE_PEPTIDE.getXpath());
        int size = 0 + (indexElements == null ? 0 : indexElements.size());
        Collection<IndexElement> indexElements2 = this.indexer.getIndexElements(PrideXmlXpath.TWOD_PEPTIDE.getXpath());
        return size + (indexElements2 == null ? 0 : indexElements2.size());
    }

    public int getNumberOfPeptides(String str) {
        int i = 0;
        List<IndexElement> list = this.identToPeptideMap.get(str);
        if (list != null) {
            i = list.size();
        }
        return i;
    }

    public String getPeptideXmlString(String str, int i) {
        String str2 = null;
        List<IndexElement> list = this.identToPeptideMap.get(str);
        if (list != null && i >= 0 && i < list.size()) {
            str2 = this.indexer.getXmlByIndexElement(list.get(i));
        }
        return str2;
    }

    public List<String> getPeptideXmlStrings(String str) {
        ArrayList arrayList = new ArrayList();
        Iterator<IndexElement> it2 = this.identToPeptideMap.get(str).iterator();
        while (it2.hasNext()) {
            arrayList.add(this.indexer.getXmlByIndexElement(it2.next()));
        }
        return arrayList;
    }

    public Iterator<String> getPrideXmlEntries(String str) {
        return this.indexer.getXmlStringIterator(str);
    }

    private String getFirstXmlString(String str) {
        String str2 = null;
        Iterator<String> xmlStringIterator = this.indexer.getXmlStringIterator(str);
        if (xmlStringIterator != null && xmlStringIterator.hasNext()) {
            str2 = xmlStringIterator.next();
        }
        return str2;
    }

    private String getIDByPattern(IndexElement indexElement, Pattern pattern, boolean z) {
        String str = null;
        long start = indexElement.getStart();
        Matcher matcher = pattern.matcher(this.indexer.getXmlSnippet(start, z ? start + 300 : indexElement.getStop()));
        if (matcher.find()) {
            str = matcher.group(1);
        }
        return str;
    }
}
