rustms/algorithm/
peptide.rs

1use std::collections::HashMap;
2use regex::Regex;
3use rayon::prelude::*;
4use rayon::ThreadPoolBuilder;
5use crate::chemistry::constants::{MASS_CO, MASS_NH3, MASS_PROTON, MASS_WATER};
6use crate::chemistry::formula::calculate_mz;
7use crate::chemistry::unimod::{modification_atomic_composition, unimod_modifications_mass_numerical};
8use crate::chemistry::utility::{find_unimod_patterns, unimod_sequence_to_tokens};
9use crate::proteomics::amino_acid::{amino_acid_composition, amino_acid_masses};
10use crate::proteomics::peptide::{FragmentType, PeptideProductIon, PeptideSequence};
11
12/// calculate the monoisotopic mass of a peptide sequence
13///
14/// Arguments:
15///
16/// * `sequence` - peptide sequence
17///
18/// Returns:
19///
20/// * `mass` - monoisotopic mass of the peptide
21///
22/// # Examples
23///
24/// ```
25/// use rustms::algorithm::peptide::calculate_peptide_mono_isotopic_mass;
26/// use rustms::proteomics::peptide::PeptideSequence;
27///
28/// let peptide_sequence = PeptideSequence::new("PEPTIDEH".to_string(), Some(1));
29/// let mass = calculate_peptide_mono_isotopic_mass(&peptide_sequence);
30/// let mass_quantized = (mass * 1e6).round() as i32;
31/// assert_eq!(mass_quantized, 936418877);
32/// ```
33pub fn calculate_peptide_mono_isotopic_mass(peptide_sequence: &PeptideSequence) -> f64 {
34    let amino_acid_masses = amino_acid_masses();
35    let modifications_mz_numerical = unimod_modifications_mass_numerical();
36    let pattern = Regex::new(r"\[UNIMOD:(\d+)]").unwrap();
37
38    let sequence = peptide_sequence.sequence.as_str();
39
40    // Find all occurrences of the pattern
41    let modifications: Vec<u32> = pattern
42        .find_iter(sequence)
43        .filter_map(|mat| mat.as_str()[8..mat.as_str().len() - 1].parse().ok())
44        .collect();
45
46    // Remove the modifications from the sequence
47    let sequence = pattern.replace_all(sequence, "");
48
49    // Count occurrences of each amino acid
50    let mut aa_counts = HashMap::new();
51    for char in sequence.chars() {
52        *aa_counts.entry(char).or_insert(0) += 1;
53    }
54
55    // Mass of amino acids and modifications
56    let mass_sequence: f64 = aa_counts.iter().map(|(aa, &count)| amino_acid_masses.get(&aa.to_string()[..]).unwrap_or(&0.0) * count as f64).sum();
57    let mass_modifications: f64 = modifications.iter().map(|&mod_id| modifications_mz_numerical.get(&mod_id).unwrap_or(&0.0)).sum();
58
59    mass_sequence + mass_modifications + MASS_WATER
60}
61
62/// calculate the monoisotopic mass of a peptide product ion for a given fragment type
63///
64/// Arguments:
65///
66/// * `sequence` - peptide sequence
67/// * `kind` - fragment type
68///
69/// Returns:
70///
71/// * `mass` - monoisotopic mass of the peptide
72///
73/// # Examples
74/// ```
75/// use rustms::algorithm::peptide::calculate_peptide_product_ion_mono_isotopic_mass;
76/// use rustms::proteomics::peptide::FragmentType;
77/// let sequence = "PEPTIDEH";
78/// let mass = calculate_peptide_product_ion_mono_isotopic_mass(sequence, FragmentType::Y);
79/// assert_eq!(mass, 936.4188766862999);
80/// ```
81pub fn calculate_peptide_product_ion_mono_isotopic_mass(sequence: &str, kind: FragmentType) -> f64 {
82
83    let (sequence, modifications) = find_unimod_patterns(sequence);
84
85    // Return mz of empty sequence
86    if sequence.is_empty() {
87        return 0.0;
88    }
89
90    let amino_acid_masses = amino_acid_masses();
91
92    // Add up raw amino acid masses and potential modifications
93    let mass_sequence: f64 = sequence.chars()
94        .map(|aa| amino_acid_masses.get(&aa.to_string()[..]).unwrap_or(&0.0))
95        .sum();
96
97    let mass_modifications: f64 = modifications.iter().sum();
98
99    // Calculate total mass
100    let mass = mass_sequence + mass_modifications + MASS_WATER;
101
102    let mass = match kind {
103        FragmentType::A => mass - MASS_CO - MASS_WATER,
104        FragmentType::B => mass - MASS_WATER,
105        FragmentType::C => mass + MASS_NH3 - MASS_WATER,
106        FragmentType::X => mass + MASS_CO - 2.0 * MASS_PROTON,
107        FragmentType::Y => mass,
108        FragmentType::Z => mass - MASS_NH3,
109    };
110
111    mass
112}
113
114/// calculate the monoisotopic m/z of a peptide product ion for a given fragment type and charge
115///
116/// Arguments:
117///
118/// * `sequence` - peptide sequence
119/// * `kind` - fragment type
120/// * `charge` - charge
121///
122/// Returns:
123///
124/// * `mz` - monoisotopic mass of the peptide
125///
126/// # Examples
127/// ```
128/// use rustms::algorithm::peptide::calculate_product_ion_mz;
129/// use rustms::chemistry::constants::MASS_PROTON;
130/// use rustms::proteomics::peptide::FragmentType;
131/// let sequence = "PEPTIDEH";
132/// let mz = calculate_product_ion_mz(sequence, FragmentType::Y, Some(1));
133/// assert_eq!(mz, 936.4188766862999 + MASS_PROTON);
134/// ```
135pub fn calculate_product_ion_mz(sequence: &str, kind: FragmentType, charge: Option<i32>) -> f64 {
136    let mass = calculate_peptide_product_ion_mono_isotopic_mass(sequence, kind);
137    calculate_mz(mass, charge.unwrap_or(1))
138}
139
140/// get a count dictionary of the amino acid composition of a peptide sequence
141///
142/// Arguments:
143///
144/// * `sequence` - peptide sequence
145///
146/// Returns:
147///
148/// * `composition` - a dictionary of amino acid composition
149///
150/// # Examples
151///
152/// ```
153/// use rustms::algorithm::peptide::calculate_amino_acid_composition;
154///
155/// let sequence = "PEPTIDEH";
156/// let composition = calculate_amino_acid_composition(sequence);
157/// assert_eq!(composition.get("P"), Some(&2));
158/// assert_eq!(composition.get("E"), Some(&2));
159/// assert_eq!(composition.get("T"), Some(&1));
160/// assert_eq!(composition.get("I"), Some(&1));
161/// assert_eq!(composition.get("D"), Some(&1));
162/// assert_eq!(composition.get("H"), Some(&1));
163/// ```
164pub fn calculate_amino_acid_composition(sequence: &str) -> HashMap<String, i32> {
165    let mut composition = HashMap::new();
166    for char in sequence.chars() {
167        *composition.entry(char.to_string()).or_insert(0) += 1;
168    }
169    composition
170}
171
172/// calculate the atomic composition of a peptide sequence
173pub fn peptide_sequence_to_atomic_composition(peptide_sequence: &PeptideSequence) -> HashMap<&'static str, i32> {
174
175    let token_sequence = unimod_sequence_to_tokens(peptide_sequence.sequence.as_str(), false);
176    let mut collection: HashMap<&'static str, i32> = HashMap::new();
177
178    // Assuming amino_acid_composition and modification_composition return appropriate mappings...
179    let aa_compositions = amino_acid_composition();
180    let mod_compositions = modification_atomic_composition();
181
182    // No need for conversion to HashMap<String, ...> as long as you're directly accessing
183    // the HashMap provided by modification_composition() if it uses String keys.
184    for token in token_sequence {
185        if token.len() == 1 {
186            let char = token.chars().next().unwrap();
187            if let Some(composition) = aa_compositions.get(&char) {
188                for (key, value) in composition.iter() {
189                    *collection.entry(key).or_insert(0) += *value;
190                }
191            }
192        } else {
193            // Directly use &token without .as_str() conversion
194            if let Some(composition) = mod_compositions.get(&token) {
195                for (key, value) in composition.iter() {
196                    *collection.entry(key).or_insert(0) += *value;
197                }
198            }
199        }
200    }
201
202    // Add water
203    *collection.entry("H").or_insert(0) += 2; //
204    *collection.entry("O").or_insert(0) += 1; //
205
206    collection
207}
208
209/// calculate the atomic composition of a product ion
210///
211/// Arguments:
212///
213/// * `product_ion` - a PeptideProductIon instance
214///
215/// Returns:
216///
217/// * `Vec<(&str, i32)>` - a vector of tuples representing the atomic composition of the product ion
218pub fn atomic_product_ion_composition(product_ion: &PeptideProductIon) -> Vec<(&str, i32)> {
219
220    let mut composition = peptide_sequence_to_atomic_composition(&product_ion.ion.sequence);
221
222    match product_ion.kind {
223        FragmentType::A => {
224            // A: peptide_mass - CO - Water
225            *composition.entry("H").or_insert(0) -= 2;
226            *composition.entry("O").or_insert(0) -= 2;
227            *composition.entry("C").or_insert(0) -= 1;
228        },
229        FragmentType::B => {
230            // B: peptide_mass - Water
231            *composition.entry("H").or_insert(0) -= 2;
232            *composition.entry("O").or_insert(0) -= 1;
233        },
234        FragmentType::C => {
235            // C: peptide_mass + NH3 - Water
236            *composition.entry("H").or_insert(0) += 1;
237            *composition.entry("N").or_insert(0) += 1;
238            *composition.entry("O").or_insert(0) -= 1;
239        },
240        FragmentType::X => {
241            // X: peptide_mass + CO
242            *composition.entry("C").or_insert(0) += 1; // Add 1 for CO
243            *composition.entry("O").or_insert(0) += 1; // Add 1 for CO
244            *composition.entry("H").or_insert(0) -= 2; // Subtract 2 for 2 protons
245        },
246        FragmentType::Y => {
247            ()
248        },
249        FragmentType::Z => {
250            *composition.entry("H").or_insert(0) -= 3;
251            *composition.entry("N").or_insert(0) -= 1;
252        },
253    }
254
255    composition.iter().map(|(k, v)| (*k, *v)).collect()
256}
257
258/// calculate the atomic composition of a peptide product ion series
259/// Arguments:
260///
261/// * `product_ions` - a vector of PeptideProductIon instances
262/// * `num_threads` - an usize representing the number of threads to use
263/// Returns:
264///
265/// * `Vec<Vec<(String, i32)>>` - a vector of vectors of tuples representing the atomic composition of each product ion
266///
267pub fn fragments_to_composition(product_ions: Vec<PeptideProductIon>, num_threads: usize) -> Vec<Vec<(String, i32)>> {
268    let thread_pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap();
269    let result = thread_pool.install(|| {
270        product_ions.par_iter().map(|ion| atomic_product_ion_composition(ion)).map(|composition| {
271            composition.iter().map(|(k, v)| (k.to_string(), *v)).collect()
272        }).collect()
273    });
274    result
275}
276
277/// count the number of protonizable sites in a peptide sequence
278///
279/// # Arguments
280///
281/// * `sequence` - a string representing the peptide sequence
282///
283/// # Returns
284///
285/// * `usize` - the number of protonizable sites in the peptide sequence
286///
287/// # Example
288///
289/// ```
290/// use rustms::algorithm::peptide::get_num_protonizable_sites;
291///
292/// let sequence = "PEPTIDEH";
293/// let num_protonizable_sites = get_num_protonizable_sites(sequence);
294/// assert_eq!(num_protonizable_sites, 2);
295/// ```
296pub fn get_num_protonizable_sites(sequence: &str) -> usize {
297    let mut sites = 1; // n-terminus
298    for s in sequence.chars() {
299        match s {
300            'H' | 'R' | 'K' => sites += 1,
301            _ => {}
302        }
303    }
304    sites
305}