# Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
# SPDX-License-Identifier: Apache-2.0
label: DecoyDatabase
doc: Creates combined target+decoy sequence database from forward sequence database.
inputs:
  in:
    doc: Input FASTA file(s), each containing a database. It is recommended to include a contaminant database as well.
    type: File[]
  out:
    doc: Output FASTA file where the decoy database (target + decoy or only decoy, see 'only_decoy') will be written to.
    type: string
  decoy_string:
    doc: String that is combined with the accession of the protein identifier to indicate a decoy protein.
    type: string?
  decoy_string_position:
    doc: Should the 'decoy_string' be prepended (prefix) or appended (suffix) to the protein accession?
    type: string?
  only_decoy:
    doc: Write only decoy proteins to the output database instead of a combined database.
    type: boolean?
  type:
    doc: Type of sequence. RNA sequences may contain modification codes, which will be handled correctly if this is set to 'RNA'.
    type: string?
  method:
    doc: Method by which decoy sequences are generated from target sequences. Note that all sequences are shuffled using the same random seed, ensuring that identical sequences produce the same shuffled decoy sequences. Shuffled sequences that produce highly similar output sequences are shuffled again (see shuffle_sequence_identity_threshold).
    type: string?
  shuffle_max_attempts:
    doc: "shuffle: maximum attempts to lower the amino acid sequence identity between target and decoy for the shuffle algorithm"
    type: long?
  shuffle_sequence_identity_threshold:
    doc: "shuffle: target-decoy amino acid sequence identity threshold for the shuffle algorithm. If the sequence identity is above this threshold, shuffling is repeated. In case of repeated failure, individual amino acids are 'mutated' to produce a different amino acid sequence."
    type: double?
  seed:
    doc: Random number seed (use 'time' for system time)
    type: string?
  enzyme:
    doc: Enzyme used for the digestion of the sample. Only applicable if parameter 'type' is 'protein'.
    type: string?
  log:
    doc: Name of log file (created only when specified)
    type: string?
  debug:
    doc: Sets the debug level
    type: long?
  threads:
    doc: Sets the number of threads allowed to be used by the TOPP tool
    type: long?
  no_progress:
    doc: Disables progress logging to command line
    type: boolean?
  force:
    doc: Overrides tool-specific checks
    type: boolean?
  test:
    doc: Enables the test mode (needed for internal use only)
    type: boolean?
  NeighborSearch__in_relevant_proteins:
    doc: These are the relevant proteins, for which we seek neighbors
    type: File?
  NeighborSearch__out_neighbor:
    doc: Output FASTA file with neighbors of relevant peptides (given in 'in_relevant_proteins').
    type: string?
  NeighborSearch__out_relevant:
    doc: Output FASTA file with target+decoy of relevant peptides (given in 'in_relevant_proteins'). Required for downstream filtering of search results via IDFilter and subsequent FDR.
    type: string?
  NeighborSearch__missed_cleavages:
    doc: Number of missed cleavages for relevant and neighbor peptides.
    type: long?
  NeighborSearch__mz_bin_size:
    doc: Bin size for spectra m/z comparison (the original study suggests 0.05 Th for high-res and 1.0005079 Th for low-res spectra).
    type: double?
  NeighborSearch__pc_mass_tolerance:
    doc: Maximal precursor mass difference (in Da or ppm; see 'pc_mass_tolerance_unit') between neighbor and relevant peptide.
    type: double?
  NeighborSearch__pc_mass_tolerance_unit:
    doc: Is 'pc_mass_tolerance' in Da or ppm?
    type: string?
  NeighborSearch__min_peptide_length:
    doc: Minimum peptide length (relevant and neighbor peptides)
    type: long?
  NeighborSearch__min_shared_ion_fraction:
    doc: Minimal required overlap 't_i' of b/y ions shared between neighbor candidate and a relevant peptide (t_i <= 2*B12/(B1+B2)). Higher values result in fewer neighbors.
    type: double?
  Decoy__non_shuffle_pattern:
    doc: Residues to not shuffle (keep at a constant position when shuffling). Separate by comma, e.g. use 'K,P,R' here.
    type: string?
  Decoy__keepPeptideNTerm:
    doc: Whether to keep peptide N terminus constant when shuffling / reversing.
    type: string?
  Decoy__keepPeptideCTerm:
    doc: Whether to keep peptide C terminus constant when shuffling / reversing.
    type: string?
outputs:
  out:
    type: File
    outputBinding:
      glob: $(inputs.out)
  NeighborSearch__out_neighbor:
    type: File?
    outputBinding:
      glob: $(inputs.NeighborSearch__out_neighbor)
  NeighborSearch__out_relevant:
    type: File?
    outputBinding:
      glob: $(inputs.NeighborSearch__out_relevant)
cwlVersion: v1.2
class: CommandLineTool
baseCommand:
  - DecoyDatabase
requirements:
  InlineJavascriptRequirement: {}
  InitialWorkDirRequirement:
    listing:
      - entryname: cwl_inputs.json
        entry: $(JSON.stringify(inputs))
arguments:
  - -ini
  - cwl_inputs.json
