rustms/algorithm/peptide.rs
1use std::collections::HashMap;
2use regex::Regex;
3use rayon::prelude::*;
4use rayon::ThreadPoolBuilder;
5use crate::chemistry::constants::{MASS_CO, MASS_NH3, MASS_PROTON, MASS_WATER};
6use crate::chemistry::formula::calculate_mz;
7use crate::chemistry::unimod::{modification_atomic_composition, unimod_modifications_mass_numerical};
8use crate::chemistry::utility::{find_unimod_patterns, unimod_sequence_to_tokens};
9use crate::proteomics::amino_acid::{amino_acid_composition, amino_acid_masses};
10use crate::proteomics::peptide::{FragmentType, PeptideProductIon, PeptideSequence};
11
12/// calculate the monoisotopic mass of a peptide sequence
13///
14/// Arguments:
15///
16/// * `sequence` - peptide sequence
17///
18/// Returns:
19///
20/// * `mass` - monoisotopic mass of the peptide
21///
22/// # Examples
23///
24/// ```
25/// use rustms::algorithm::peptide::calculate_peptide_mono_isotopic_mass;
26/// use rustms::proteomics::peptide::PeptideSequence;
27///
28/// let peptide_sequence = PeptideSequence::new("PEPTIDEH".to_string(), Some(1));
29/// let mass = calculate_peptide_mono_isotopic_mass(&peptide_sequence);
30/// let mass_quantized = (mass * 1e6).round() as i32;
31/// assert_eq!(mass_quantized, 936418877);
32/// ```
33pub fn calculate_peptide_mono_isotopic_mass(peptide_sequence: &PeptideSequence) -> f64 {
34 let amino_acid_masses = amino_acid_masses();
35 let modifications_mz_numerical = unimod_modifications_mass_numerical();
36 let pattern = Regex::new(r"\[UNIMOD:(\d+)]").unwrap();
37
38 let sequence = peptide_sequence.sequence.as_str();
39
40 // Find all occurrences of the pattern
41 let modifications: Vec<u32> = pattern
42 .find_iter(sequence)
43 .filter_map(|mat| mat.as_str()[8..mat.as_str().len() - 1].parse().ok())
44 .collect();
45
46 // Remove the modifications from the sequence
47 let sequence = pattern.replace_all(sequence, "");
48
49 // Count occurrences of each amino acid
50 let mut aa_counts = HashMap::new();
51 for char in sequence.chars() {
52 *aa_counts.entry(char).or_insert(0) += 1;
53 }
54
55 // Mass of amino acids and modifications
56 let mass_sequence: f64 = aa_counts.iter().map(|(aa, &count)| amino_acid_masses.get(&aa.to_string()[..]).unwrap_or(&0.0) * count as f64).sum();
57 let mass_modifications: f64 = modifications.iter().map(|&mod_id| modifications_mz_numerical.get(&mod_id).unwrap_or(&0.0)).sum();
58
59 mass_sequence + mass_modifications + MASS_WATER
60}
61
62/// calculate the monoisotopic mass of a peptide product ion for a given fragment type
63///
64/// Arguments:
65///
66/// * `sequence` - peptide sequence
67/// * `kind` - fragment type
68///
69/// Returns:
70///
71/// * `mass` - monoisotopic mass of the peptide
72///
73/// # Examples
74/// ```
75/// use rustms::algorithm::peptide::calculate_peptide_product_ion_mono_isotopic_mass;
76/// use rustms::proteomics::peptide::FragmentType;
77/// let sequence = "PEPTIDEH";
78/// let mass = calculate_peptide_product_ion_mono_isotopic_mass(sequence, FragmentType::Y);
79/// assert_eq!(mass, 936.4188766862999);
80/// ```
81pub fn calculate_peptide_product_ion_mono_isotopic_mass(sequence: &str, kind: FragmentType) -> f64 {
82
83 let (sequence, modifications) = find_unimod_patterns(sequence);
84
85 // Return mz of empty sequence
86 if sequence.is_empty() {
87 return 0.0;
88 }
89
90 let amino_acid_masses = amino_acid_masses();
91
92 // Add up raw amino acid masses and potential modifications
93 let mass_sequence: f64 = sequence.chars()
94 .map(|aa| amino_acid_masses.get(&aa.to_string()[..]).unwrap_or(&0.0))
95 .sum();
96
97 let mass_modifications: f64 = modifications.iter().sum();
98
99 // Calculate total mass
100 let mass = mass_sequence + mass_modifications + MASS_WATER;
101
102 let mass = match kind {
103 FragmentType::A => mass - MASS_CO - MASS_WATER,
104 FragmentType::B => mass - MASS_WATER,
105 FragmentType::C => mass + MASS_NH3 - MASS_WATER,
106 FragmentType::X => mass + MASS_CO - 2.0 * MASS_PROTON,
107 FragmentType::Y => mass,
108 FragmentType::Z => mass - MASS_NH3,
109 };
110
111 mass
112}
113
114/// calculate the monoisotopic m/z of a peptide product ion for a given fragment type and charge
115///
116/// Arguments:
117///
118/// * `sequence` - peptide sequence
119/// * `kind` - fragment type
120/// * `charge` - charge
121///
122/// Returns:
123///
124/// * `mz` - monoisotopic mass of the peptide
125///
126/// # Examples
127/// ```
128/// use rustms::algorithm::peptide::calculate_product_ion_mz;
129/// use rustms::chemistry::constants::MASS_PROTON;
130/// use rustms::proteomics::peptide::FragmentType;
131/// let sequence = "PEPTIDEH";
132/// let mz = calculate_product_ion_mz(sequence, FragmentType::Y, Some(1));
133/// assert_eq!(mz, 936.4188766862999 + MASS_PROTON);
134/// ```
135pub fn calculate_product_ion_mz(sequence: &str, kind: FragmentType, charge: Option<i32>) -> f64 {
136 let mass = calculate_peptide_product_ion_mono_isotopic_mass(sequence, kind);
137 calculate_mz(mass, charge.unwrap_or(1))
138}
139
140/// get a count dictionary of the amino acid composition of a peptide sequence
141///
142/// Arguments:
143///
144/// * `sequence` - peptide sequence
145///
146/// Returns:
147///
148/// * `composition` - a dictionary of amino acid composition
149///
150/// # Examples
151///
152/// ```
153/// use rustms::algorithm::peptide::calculate_amino_acid_composition;
154///
155/// let sequence = "PEPTIDEH";
156/// let composition = calculate_amino_acid_composition(sequence);
157/// assert_eq!(composition.get("P"), Some(&2));
158/// assert_eq!(composition.get("E"), Some(&2));
159/// assert_eq!(composition.get("T"), Some(&1));
160/// assert_eq!(composition.get("I"), Some(&1));
161/// assert_eq!(composition.get("D"), Some(&1));
162/// assert_eq!(composition.get("H"), Some(&1));
163/// ```
164pub fn calculate_amino_acid_composition(sequence: &str) -> HashMap<String, i32> {
165 let mut composition = HashMap::new();
166 for char in sequence.chars() {
167 *composition.entry(char.to_string()).or_insert(0) += 1;
168 }
169 composition
170}
171
172/// calculate the atomic composition of a peptide sequence
173pub fn peptide_sequence_to_atomic_composition(peptide_sequence: &PeptideSequence) -> HashMap<&'static str, i32> {
174
175 let token_sequence = unimod_sequence_to_tokens(peptide_sequence.sequence.as_str(), false);
176 let mut collection: HashMap<&'static str, i32> = HashMap::new();
177
178 // Assuming amino_acid_composition and modification_composition return appropriate mappings...
179 let aa_compositions = amino_acid_composition();
180 let mod_compositions = modification_atomic_composition();
181
182 // No need for conversion to HashMap<String, ...> as long as you're directly accessing
183 // the HashMap provided by modification_composition() if it uses String keys.
184 for token in token_sequence {
185 if token.len() == 1 {
186 let char = token.chars().next().unwrap();
187 if let Some(composition) = aa_compositions.get(&char) {
188 for (key, value) in composition.iter() {
189 *collection.entry(key).or_insert(0) += *value;
190 }
191 }
192 } else {
193 // Directly use &token without .as_str() conversion
194 if let Some(composition) = mod_compositions.get(&token) {
195 for (key, value) in composition.iter() {
196 *collection.entry(key).or_insert(0) += *value;
197 }
198 }
199 }
200 }
201
202 // Add water
203 *collection.entry("H").or_insert(0) += 2; //
204 *collection.entry("O").or_insert(0) += 1; //
205
206 collection
207}
208
209/// calculate the atomic composition of a product ion
210///
211/// Arguments:
212///
213/// * `product_ion` - a PeptideProductIon instance
214///
215/// Returns:
216///
217/// * `Vec<(&str, i32)>` - a vector of tuples representing the atomic composition of the product ion
218pub fn atomic_product_ion_composition(product_ion: &PeptideProductIon) -> Vec<(&str, i32)> {
219
220 let mut composition = peptide_sequence_to_atomic_composition(&product_ion.ion.sequence);
221
222 match product_ion.kind {
223 FragmentType::A => {
224 // A: peptide_mass - CO - Water
225 *composition.entry("H").or_insert(0) -= 2;
226 *composition.entry("O").or_insert(0) -= 2;
227 *composition.entry("C").or_insert(0) -= 1;
228 },
229 FragmentType::B => {
230 // B: peptide_mass - Water
231 *composition.entry("H").or_insert(0) -= 2;
232 *composition.entry("O").or_insert(0) -= 1;
233 },
234 FragmentType::C => {
235 // C: peptide_mass + NH3 - Water
236 *composition.entry("H").or_insert(0) += 1;
237 *composition.entry("N").or_insert(0) += 1;
238 *composition.entry("O").or_insert(0) -= 1;
239 },
240 FragmentType::X => {
241 // X: peptide_mass + CO
242 *composition.entry("C").or_insert(0) += 1; // Add 1 for CO
243 *composition.entry("O").or_insert(0) += 1; // Add 1 for CO
244 *composition.entry("H").or_insert(0) -= 2; // Subtract 2 for 2 protons
245 },
246 FragmentType::Y => {
247 ()
248 },
249 FragmentType::Z => {
250 *composition.entry("H").or_insert(0) -= 3;
251 *composition.entry("N").or_insert(0) -= 1;
252 },
253 }
254
255 composition.iter().map(|(k, v)| (*k, *v)).collect()
256}
257
258/// calculate the atomic composition of a peptide product ion series
259/// Arguments:
260///
261/// * `product_ions` - a vector of PeptideProductIon instances
262/// * `num_threads` - an usize representing the number of threads to use
263/// Returns:
264///
265/// * `Vec<Vec<(String, i32)>>` - a vector of vectors of tuples representing the atomic composition of each product ion
266///
267pub fn fragments_to_composition(product_ions: Vec<PeptideProductIon>, num_threads: usize) -> Vec<Vec<(String, i32)>> {
268 let thread_pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap();
269 let result = thread_pool.install(|| {
270 product_ions.par_iter().map(|ion| atomic_product_ion_composition(ion)).map(|composition| {
271 composition.iter().map(|(k, v)| (k.to_string(), *v)).collect()
272 }).collect()
273 });
274 result
275}
276
277/// count the number of protonizable sites in a peptide sequence
278///
279/// # Arguments
280///
281/// * `sequence` - a string representing the peptide sequence
282///
283/// # Returns
284///
285/// * `usize` - the number of protonizable sites in the peptide sequence
286///
287/// # Example
288///
289/// ```
290/// use rustms::algorithm::peptide::get_num_protonizable_sites;
291///
292/// let sequence = "PEPTIDEH";
293/// let num_protonizable_sites = get_num_protonizable_sites(sequence);
294/// assert_eq!(num_protonizable_sites, 2);
295/// ```
296pub fn get_num_protonizable_sites(sequence: &str) -> usize {
297 let mut sites = 1; // n-terminus
298 for s in sequence.chars() {
299 match s {
300 'H' | 'R' | 'K' => sites += 1,
301 _ => {}
302 }
303 }
304 sites
305}